Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb2b1a1d

Browse files
committed
individual workspace latency metrics
1 parent3f90ee6 commitb2b1a1d

File tree

4 files changed

+104
-52
lines changed

4 files changed

+104
-52
lines changed

‎scaletest/coderconnect/config.go‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ type Config struct {
6464

6565
Metrics*Metrics`json:"-"`
6666

67-
MetricLabelValues []string`json:"metric_label_values"`
68-
6967
// DialBarrier is used to ensure all runners have dialed the Coder Connect
7068
// endpoint before creating their workspace(s).
7169
DialBarrier*harness.Barrier`json:"-"`
@@ -82,6 +80,14 @@ func (c Config) Validate() error {
8280
returnxerrors.Errorf("workspace config: %w",err)
8381
}
8482

83+
ifc.Workspace.Request.Name!="" {
84+
returnxerrors.New("workspace name cannot be overridden")
85+
}
86+
87+
ifc.WorkspaceCount<=0 {
88+
returnxerrors.New("workspace_count must be greater than 0")
89+
}
90+
8591
ifc.DialBarrier==nil {
8692
returnxerrors.New("dial barrier must be set")
8793
}

‎scaletest/coderconnect/metrics.go‎

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package coderconnect
22

33
import (
4+
"strconv"
45
"sync/atomic"
56
"time"
67

@@ -11,37 +12,35 @@ type Metrics struct {
1112
WorkspaceUpdatesLatencySeconds prometheus.HistogramVec
1213
WorkspaceUpdatesErrorsTotal prometheus.CounterVec
1314

14-
numErrors atomic.Int64
15-
completionDuration time.Duration
15+
numErrors atomic.Int64
1616
}
1717

18-
funcNewMetrics(reg prometheus.Registerer,labelNames...string)*Metrics {
18+
funcNewMetrics(reg prometheus.Registerer)*Metrics {
1919
m:=&Metrics{
2020
WorkspaceUpdatesLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
2121
Namespace:"coderd",
2222
Subsystem:"scaletest",
2323
Name:"workspace_updates_latency_seconds",
24-
Help:"Timeuntil all expected workspacesandagents are seen viaworkspaceupdates",
25-
},labelNames),
24+
Help:"Timebetween starting a workspace buildandreceiving both the agent update andworkspaceupdate",
25+
},[]string{"username","owned_workspaces","workspace"}),
2626
WorkspaceUpdatesErrorsTotal:*prometheus.NewCounterVec(prometheus.CounterOpts{
2727
Namespace:"coderd",
2828
Subsystem:"scaletest",
2929
Name:"workspace_updates_errors_total",
3030
Help:"Total number of workspace updates errors",
31-
},append(labelNames,"action")),
31+
},[]string{"username","owned_workspaces","action"}),
3232
}
3333

3434
reg.MustRegister(m.WorkspaceUpdatesLatencySeconds)
3535
reg.MustRegister(m.WorkspaceUpdatesErrorsTotal)
3636
returnm
3737
}
3838

39-
func (m*Metrics)AddError(labelValues...string) {
40-
m.numErrors.Add(1)
41-
m.WorkspaceUpdatesErrorsTotal.WithLabelValues(labelValues...).Inc()
39+
func (m*Metrics)RecordCompletion(elapsed time.Duration,usernamestring,ownedWorkspacesint64,workspacestring) {
40+
m.WorkspaceUpdatesLatencySeconds.WithLabelValues(username,strconv.Itoa(int(ownedWorkspaces)),workspace).Observe(elapsed.Seconds())
4241
}
4342

44-
func (m*Metrics)RecordCompletion(elapsed time.Duration,labelValues...string) {
45-
m.completionDuration=elapsed
46-
m.WorkspaceUpdatesLatencySeconds.WithLabelValues(labelValues...).Observe(elapsed.Seconds())
43+
func (m*Metrics)AddError(usernamestring,ownedWorkspacesint64,actionstring) {
44+
m.numErrors.Add(1)
45+
m.WorkspaceUpdatesErrorsTotal.WithLabelValues(username,strconv.Itoa(int(ownedWorkspaces)),action).Inc()
4746
}

‎scaletest/coderconnect/run.go‎

Lines changed: 77 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"cdr.dev/slog"
1616
"cdr.dev/slog/sloggers/sloghuman"
1717
"github.com/coder/coder/v2/coderd/tracing"
18+
"github.com/coder/coder/v2/coderd/util/syncmap"
1819
"github.com/coder/coder/v2/codersdk"
1920
"github.com/coder/coder/v2/codersdk/workspacesdk"
2021
"github.com/coder/coder/v2/scaletest/createusers"
@@ -32,8 +33,14 @@ type Runner struct {
3233
createUserRunner*createusers.Runner
3334
workspacebuildRunners []*workspacebuild.Runner
3435

35-
// startTime records when workspace builds begin (for metrics timing)
36-
startTime time.Time
36+
// workspace name to workspace
37+
workspaces*syncmap.Map[string,*workspace]
38+
}
39+
40+
typeworkspacestruct {
41+
workspaceID uuid.UUID
42+
buildStartTime time.Time
43+
updateLatency time.Duration
3744
}
3845

3946
var (
@@ -44,8 +51,9 @@ var (
4451

4552
funcNewRunner(client*codersdk.Client,cfgConfig)*Runner {
4653
return&Runner{
47-
client:client,
48-
cfg:cfg,
54+
client:client,
55+
cfg:cfg,
56+
workspaces:syncmap.New[string,*workspace](),
4957
}
5058
}
5159

@@ -113,24 +121,32 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
113121

114122
completionCh:=make(chanerror,1)
115123
gofunc() {
116-
completionCh<-r.watchWorkspaceUpdates(watchCtx,clients,logs)
124+
completionCh<-r.watchWorkspaceUpdates(watchCtx,clients,user,logs)
117125
}()
118126

119127
reachedBarrier=true
120128
r.cfg.DialBarrier.Wait()
121129

122-
r.startTime=time.Now()
123-
124130
workspaceRunners:=make([]*workspacebuild.Runner,0,r.cfg.WorkspaceCount)
125131
fori:=ranger.cfg.WorkspaceCount {
132+
workspaceName,err:=loadtestutil.GenerateWorkspaceName(id)
133+
iferr!=nil {
134+
returnxerrors.Errorf("generate random name for workspace: %w",err)
135+
}
126136
workspaceBuildConfig:=r.cfg.Workspace
127137
workspaceBuildConfig.OrganizationID=r.cfg.User.OrganizationID
128138
workspaceBuildConfig.UserID=user.ID.String()
139+
workspaceBuildConfig.Request.Name=workspaceName
129140

130141
runner:=workspacebuild.NewRunner(client,workspaceBuildConfig)
131142
workspaceRunners=append(workspaceRunners,runner)
132143

133144
_,_=fmt.Fprintf(logs,"Creating workspace %d/%d...\n",i+1,r.cfg.WorkspaceCount)
145+
146+
// Record build start time before running the workspace build
147+
r.workspaces.Store(workspaceName,&workspace{
148+
buildStartTime:time.Now(),
149+
})
134150
err=runner.Run(ctx,fmt.Sprintf("%s-%d",id,i),logs)
135151
iferr!=nil {
136152
returnxerrors.Errorf("create workspace %d: %w",i,err)
@@ -163,7 +179,7 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
163179
u,err:=client.URL.Parse("/api/v2/tailnet")
164180
iferr!=nil {
165181
logger.Error(ctx,"failed to parse tailnet URL",slog.Error(err))
166-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"parse_url")...)
182+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"parse_url")
167183
returnnil,xerrors.Errorf("parse tailnet URL: %w",err)
168184
}
169185

@@ -183,7 +199,7 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
183199
clients,err:=dialer.Dial(ctx,nil)
184200
iferr!=nil {
185201
logger.Error(ctx,"failed to dial workspace updates",slog.Error(err))
186-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"dial")...)
202+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"dial")
187203
returnnil,xerrors.Errorf("dial workspace updates: %w",err)
188204
}
189205

@@ -192,63 +208,86 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
192208

193209
// watchWorkspaceUpdates processes workspace updates and returns error or nil
194210
// once all expected workspaces and agents are seen.
195-
func (r*Runner)watchWorkspaceUpdates(ctx context.Context,clients*tailnet.ControlProtocolClients,logs io.Writer)error {
211+
func (r*Runner)watchWorkspaceUpdates(ctx context.Context,clients*tailnet.ControlProtocolClients,user codersdk.User,logs io.Writer)error {
196212
deferclients.Closer.Close()
197213

198-
seenWorkspaces:=make(map[uuid.UUID]bool)
199-
seenAgents:=make(map[uuid.UUID]bool)
200214
expectedWorkspaces:=r.cfg.WorkspaceCount
201-
expectedAgents:=expectedWorkspaces
202-
203-
_,_=fmt.Fprintf(logs,"Waiting for %d workspaces and %d agents\n",expectedWorkspaces,expectedAgents)
215+
seenWorkspaces:=0
216+
// Workspace ID to agent update arrival time.
217+
// At the end, we reconcile to see which took longer, and mark that as the
218+
// latency.
219+
agents:=make(map[uuid.UUID]time.Time)
204220

221+
_,_=fmt.Fprintf(logs,"Waiting for %d workspaces and their agents\n",expectedWorkspaces)
205222
for {
206223
select {
207224
case<-ctx.Done():
208225
_,_=fmt.Fprintf(logs,"Context canceled while waiting for workspace updates: %v\n",ctx.Err())
209-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"context_done")...)
226+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"context_done")
210227
returnctx.Err()
211228
default:
212229
}
213230

214231
update,err:=clients.WorkspaceUpdates.Recv()
215232
iferr!=nil {
216233
_,_=fmt.Fprintf(logs,"Workspace updates stream error: %v\n",err)
217-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"recv")...)
234+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"recv")
218235
returnxerrors.Errorf("receive workspace update: %w",err)
219236
}
220237

221238
for_,ws:=rangeupdate.UpsertedWorkspaces {
222239
wsID,err:=uuid.FromBytes(ws.Id)
223240
iferr!=nil {
224241
_,_=fmt.Fprintf(logs,"Invalid workspace ID in update: %v\n",err)
225-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"bad_workspace_id")...)
242+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"bad_workspace_id")
226243
continue
227244
}
228-
if!seenWorkspaces[wsID] {
229-
seenWorkspaces[wsID]=true
230-
_,_=fmt.Fprintf(logs,"Received workspace update: %s (%d/%d)\n",wsID,len(seenWorkspaces),expectedWorkspaces)
245+
246+
iftracking,ok:=r.workspaces.Load(ws.GetName());ok {
247+
iftracking.updateLatency==0 {
248+
r.workspaces.Store(ws.GetName(),&workspace{
249+
workspaceID:wsID,
250+
buildStartTime:tracking.buildStartTime,
251+
updateLatency:time.Since(tracking.buildStartTime),
252+
})
253+
seenWorkspaces++
254+
}
255+
}elseif!ok {
256+
returnxerrors.Errorf("received update for unknown workspace %q (id: %s)",ws.GetName(),wsID)
231257
}
232258
}
233259

234260
for_,agent:=rangeupdate.UpsertedAgents {
235-
agentID,err:=uuid.FromBytes(agent.Id)
261+
wsID,err:=uuid.FromBytes(agent.WorkspaceId)
236262
iferr!=nil {
237-
_,_=fmt.Fprintf(logs,"Invalidagent ID in update: %v\n",err)
238-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"bad_agent_id")...)
263+
_,_=fmt.Fprintf(logs,"Invalidworkspace ID in agent update: %v\n",err)
264+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"bad_agent_workspace_id")
239265
continue
240266
}
241-
if!seenAgents[agentID] {
242-
seenAgents[agentID]=true
243-
_,_=fmt.Fprintf(logs,"Received agent update: %s (%d/%d)\n",agentID,len(seenAgents),expectedAgents)
267+
268+
if_,ok:=agents[wsID];!ok {
269+
agents[wsID]=time.Now()
244270
}
245271
}
246272

247-
iflen(seenWorkspaces)>=int(expectedWorkspaces)&&len(seenAgents)>=int(expectedAgents) {
248-
elapsed:=time.Since(r.startTime)
249-
_,_=fmt.Fprintf(logs,"All expected workspaces (%d) and agents (%d) received in %v\n",
250-
len(seenWorkspaces),len(seenAgents),elapsed)
251-
r.cfg.Metrics.RecordCompletion(elapsed,r.cfg.MetricLabelValues...)
273+
ifseenWorkspaces==int(expectedWorkspaces)&&len(agents)==int(expectedWorkspaces) {
274+
// For each workspace, record the latency from build start to
275+
// workspace update, or agent update, whichever is later.
276+
r.workspaces.Range(func(wsNamestring,ws*workspace)bool {
277+
ifagentTime,ok:=agents[ws.workspaceID];ok {
278+
agentLatency:=agentTime.Sub(ws.buildStartTime)
279+
ifagentLatency>ws.updateLatency {
280+
// Update in-place, so GetMetrics is accurate.
281+
ws.updateLatency=agentLatency
282+
}
283+
}else {
284+
// Unreachable, recorded for debugging
285+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"missing_agent")
286+
}
287+
r.cfg.Metrics.RecordCompletion(ws.updateLatency,user.Username,r.cfg.WorkspaceCount,wsName)
288+
returntrue
289+
})
290+
_,_=fmt.Fprintf(logs,"Updates received for all %d workspaces and agents\n",expectedWorkspaces)
252291
returnnil
253292
}
254293
}
@@ -260,9 +299,14 @@ const (
260299
)
261300

262301
func (r*Runner)GetMetrics()map[string]any {
302+
latencyMap:=make(map[string]float64)
303+
r.workspaces.Range(func(wsNamestring,ws*workspace)bool {
304+
latencyMap[wsName]=ws.updateLatency.Seconds()
305+
returntrue
306+
})
263307
returnmap[string]any{
264308
WorkspaceUpdatesErrorsTotal:r.cfg.Metrics.numErrors.Load(),
265-
WorkspaceUpdatesLatencyMetric:r.cfg.Metrics.completionDuration.Seconds(),
309+
WorkspaceUpdatesLatencyMetric:latencyMap,
266310
}
267311
}
268312

‎scaletest/coderconnect/run_test.go‎

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func TestRun(t *testing.T) {
6565
coderdtest.AwaitTemplateVersionJobCompleted(t,client,version.ID)
6666

6767
barrier:=harness.NewBarrier(numUsers)
68-
metrics:=coderconnect.NewMetrics(prometheus.NewRegistry(),"num_workspaces","username")
68+
metrics:=coderconnect.NewMetrics(prometheus.NewRegistry())
6969

7070
th:=harness.NewTestHarness(harness.ConcurrentExecutionStrategy{}, harness.ConcurrentExecutionStrategy{})
7171
fori:=rangenumUsers {
@@ -83,9 +83,7 @@ func TestRun(t *testing.T) {
8383
WorkspaceCount:int64(userWorkspaces),
8484
DialTimeout:testutil.WaitMedium,
8585
WorkspaceUpdatesTimeout:testutil.WaitLong,
86-
NoCleanup:false,
8786
Metrics:metrics,
88-
MetricLabelValues: []string{"1","fake-username"},
8987
DialBarrier:barrier,
9088
}
9189
err:=cfg.Validate()
@@ -95,13 +93,16 @@ func TestRun(t *testing.T) {
9593

9694
ctx:=testutil.Context(t,testutil.WaitLong)
9795

98-
// Run the tests
9996
err:=th.Run(ctx)
10097
require.NoError(t,err)
10198

99+
res:=th.Results()
100+
require.Len(t,res.Runs,numUsers)
101+
require.Equal(t,0,res.TotalFail)
102+
102103
users,err:=client.Users(ctx, codersdk.UsersRequest{})
103104
require.NoError(t,err)
104-
require.Len(t,users.Users,numUsers+1)// owner + created users
105+
require.Len(t,users.Users,1+numUsers)// owner + created users
105106

106107
workspaces,err:=client.Workspaces(ctx, codersdk.WorkspaceFilter{})
107108
require.NoError(t,err)
@@ -126,6 +127,8 @@ func TestRun(t *testing.T) {
126127
require.Contains(t,th.Results().Runs,"coderconnect/"+id)
127128
metrics:=th.Results().Runs["coderconnect/"+id].Metrics
128129
require.Contains(t,metrics,coderconnect.WorkspaceUpdatesLatencyMetric)
130+
require.Len(t,metrics[coderconnect.WorkspaceUpdatesLatencyMetric],userWorkspaces)
129131
require.Contains(t,metrics,coderconnect.WorkspaceUpdatesErrorsTotal)
132+
require.EqualValues(t,0,metrics[coderconnect.WorkspaceUpdatesErrorsTotal])
130133
}
131134
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp