Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3f21341

Browse files
committed
individual workspace latency metrics
1 parent3f90ee6 commit3f21341

File tree

4 files changed

+106
-51
lines changed

4 files changed

+106
-51
lines changed

‎scaletest/coderconnect/config.go‎

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,6 @@ type Config struct {
6464

6565
Metrics*Metrics`json:"-"`
6666

67-
MetricLabelValues []string`json:"metric_label_values"`
68-
6967
// DialBarrier is used to ensure all runners have dialed the Coder Connect
7068
// endpoint before creating their workspace(s).
7169
DialBarrier*harness.Barrier`json:"-"`
@@ -82,6 +80,14 @@ func (c Config) Validate() error {
8280
returnxerrors.Errorf("workspace config: %w",err)
8381
}
8482

83+
ifc.Workspace.Request.Name!="" {
84+
returnxerrors.New("workspace name cannot be overridden")
85+
}
86+
87+
ifc.WorkspaceCount<=0 {
88+
returnxerrors.New("workspace_count must be greater than 0")
89+
}
90+
8591
ifc.DialBarrier==nil {
8692
returnxerrors.New("dial barrier must be set")
8793
}

‎scaletest/coderconnect/metrics.go‎

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package coderconnect
22

33
import (
4+
"strconv"
45
"sync/atomic"
56
"time"
67

@@ -11,37 +12,35 @@ type Metrics struct {
1112
WorkspaceUpdatesLatencySeconds prometheus.HistogramVec
1213
WorkspaceUpdatesErrorsTotal prometheus.CounterVec
1314

14-
numErrors atomic.Int64
15-
completionDuration time.Duration
15+
numErrors atomic.Int64
1616
}
1717

18-
funcNewMetrics(reg prometheus.Registerer,labelNames...string)*Metrics {
18+
funcNewMetrics(reg prometheus.Registerer)*Metrics {
1919
m:=&Metrics{
2020
WorkspaceUpdatesLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
2121
Namespace:"coderd",
2222
Subsystem:"scaletest",
2323
Name:"workspace_updates_latency_seconds",
24-
Help:"Timeuntil all expected workspacesandagents are seen viaworkspaceupdates",
25-
},labelNames),
24+
Help:"Timebetween starting a workspace buildandreceiving both the agent update andworkspaceupdate",
25+
},[]string{"username","owned_workspaces","workspace"}),
2626
WorkspaceUpdatesErrorsTotal:*prometheus.NewCounterVec(prometheus.CounterOpts{
2727
Namespace:"coderd",
2828
Subsystem:"scaletest",
2929
Name:"workspace_updates_errors_total",
3030
Help:"Total number of workspace updates errors",
31-
},append(labelNames,"action")),
31+
},[]string{"username","owned_workspaces","action"}),
3232
}
3333

3434
reg.MustRegister(m.WorkspaceUpdatesLatencySeconds)
3535
reg.MustRegister(m.WorkspaceUpdatesErrorsTotal)
3636
returnm
3737
}
3838

39-
func (m*Metrics)AddError(labelValues...string) {
40-
m.numErrors.Add(1)
41-
m.WorkspaceUpdatesErrorsTotal.WithLabelValues(labelValues...).Inc()
39+
func (m*Metrics)RecordCompletion(elapsed time.Duration,usernamestring,ownedWorkspacesint64,workspacestring) {
40+
m.WorkspaceUpdatesLatencySeconds.WithLabelValues(username,strconv.Itoa(int(ownedWorkspaces)),workspace).Observe(elapsed.Seconds())
4241
}
4342

44-
func (m*Metrics)RecordCompletion(elapsed time.Duration,labelValues...string) {
45-
m.completionDuration=elapsed
46-
m.WorkspaceUpdatesLatencySeconds.WithLabelValues(labelValues...).Observe(elapsed.Seconds())
43+
func (m*Metrics)AddError(usernamestring,ownedWorkspacesint64,actionstring) {
44+
m.numErrors.Add(1)
45+
m.WorkspaceUpdatesErrorsTotal.WithLabelValues(username,strconv.Itoa(int(ownedWorkspaces)),action).Inc()
4746
}

‎scaletest/coderconnect/run.go‎

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"cdr.dev/slog"
1616
"cdr.dev/slog/sloggers/sloghuman"
1717
"github.com/coder/coder/v2/coderd/tracing"
18+
"github.com/coder/coder/v2/coderd/util/syncmap"
1819
"github.com/coder/coder/v2/codersdk"
1920
"github.com/coder/coder/v2/codersdk/workspacesdk"
2021
"github.com/coder/coder/v2/scaletest/createusers"
@@ -32,8 +33,14 @@ type Runner struct {
3233
createUserRunner*createusers.Runner
3334
workspacebuildRunners []*workspacebuild.Runner
3435

35-
// startTime records when workspace builds begin (for metrics timing)
36-
startTime time.Time
36+
// workspace name to workspace
37+
workspaces*syncmap.Map[string,*workspace]
38+
}
39+
40+
typeworkspacestruct {
41+
workspaceID uuid.UUID
42+
buildStartTime time.Time
43+
updateLatency time.Duration
3744
}
3845

3946
var (
@@ -44,8 +51,9 @@ var (
4451

4552
funcNewRunner(client*codersdk.Client,cfgConfig)*Runner {
4653
return&Runner{
47-
client:client,
48-
cfg:cfg,
54+
client:client,
55+
cfg:cfg,
56+
workspaces:syncmap.New[string,*workspace](),
4957
}
5058
}
5159

@@ -113,24 +121,32 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
113121

114122
completionCh:=make(chanerror,1)
115123
gofunc() {
116-
completionCh<-r.watchWorkspaceUpdates(watchCtx,clients,logs)
124+
completionCh<-r.watchWorkspaceUpdates(watchCtx,clients,user,logs)
117125
}()
118126

119127
reachedBarrier=true
120128
r.cfg.DialBarrier.Wait()
121129

122-
r.startTime=time.Now()
123-
124130
workspaceRunners:=make([]*workspacebuild.Runner,0,r.cfg.WorkspaceCount)
125131
fori:=ranger.cfg.WorkspaceCount {
132+
workspaceName,err:=loadtestutil.GenerateWorkspaceName(id)
133+
iferr!=nil {
134+
returnxerrors.Errorf("generate random name for workspace: %w",err)
135+
}
126136
workspaceBuildConfig:=r.cfg.Workspace
127137
workspaceBuildConfig.OrganizationID=r.cfg.User.OrganizationID
128138
workspaceBuildConfig.UserID=user.ID.String()
139+
workspaceBuildConfig.Request.Name=workspaceName
129140

130141
runner:=workspacebuild.NewRunner(client,workspaceBuildConfig)
131142
workspaceRunners=append(workspaceRunners,runner)
132143

133144
_,_=fmt.Fprintf(logs,"Creating workspace %d/%d...\n",i+1,r.cfg.WorkspaceCount)
145+
146+
// Record build start time before running the workspace build
147+
r.workspaces.Store(workspaceName,&workspace{
148+
buildStartTime:time.Now(),
149+
})
134150
err=runner.Run(ctx,fmt.Sprintf("%s-%d",id,i),logs)
135151
iferr!=nil {
136152
returnxerrors.Errorf("create workspace %d: %w",i,err)
@@ -163,7 +179,7 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
163179
u,err:=client.URL.Parse("/api/v2/tailnet")
164180
iferr!=nil {
165181
logger.Error(ctx,"failed to parse tailnet URL",slog.Error(err))
166-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"parse_url")...)
182+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"parse_url")
167183
returnnil,xerrors.Errorf("parse tailnet URL: %w",err)
168184
}
169185

@@ -183,7 +199,7 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
183199
clients,err:=dialer.Dial(ctx,nil)
184200
iferr!=nil {
185201
logger.Error(ctx,"failed to dial workspace updates",slog.Error(err))
186-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"dial")...)
202+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"dial")
187203
returnnil,xerrors.Errorf("dial workspace updates: %w",err)
188204
}
189205

@@ -192,63 +208,89 @@ func (r *Runner) dialCoderConnect(ctx context.Context, client *codersdk.Client,
192208

193209
// watchWorkspaceUpdates processes workspace updates and returns error or nil
194210
// once all expected workspaces and agents are seen.
195-
func (r*Runner)watchWorkspaceUpdates(ctx context.Context,clients*tailnet.ControlProtocolClients,logs io.Writer)error {
211+
func (r*Runner)watchWorkspaceUpdates(ctx context.Context,clients*tailnet.ControlProtocolClients,user codersdk.User,logs io.Writer)error {
196212
deferclients.Closer.Close()
197213

198-
seenWorkspaces:=make(map[uuid.UUID]bool)
199-
seenAgents:=make(map[uuid.UUID]bool)
200214
expectedWorkspaces:=r.cfg.WorkspaceCount
201-
expectedAgents:=expectedWorkspaces
215+
seenWorkspaces:=0
216+
217+
// Workspace ID to agent update arrival time.
218+
// At the end, we reconcile to see which took longer, and mark that as the
219+
// latency.
220+
agents:=make(map[uuid.UUID]time.Time)
202221

203-
_,_=fmt.Fprintf(logs,"Waiting for %d workspaces and%d agents\n",expectedWorkspaces,expectedAgents)
222+
_,_=fmt.Fprintf(logs,"Waiting for %d workspaces andtheir agents\n",expectedWorkspaces)
204223

205224
for {
206225
select {
207226
case<-ctx.Done():
208227
_,_=fmt.Fprintf(logs,"Context canceled while waiting for workspace updates: %v\n",ctx.Err())
209-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"context_done")...)
228+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"context_done")
210229
returnctx.Err()
211230
default:
212231
}
213232

214233
update,err:=clients.WorkspaceUpdates.Recv()
215234
iferr!=nil {
216235
_,_=fmt.Fprintf(logs,"Workspace updates stream error: %v\n",err)
217-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"recv")...)
236+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"recv")
218237
returnxerrors.Errorf("receive workspace update: %w",err)
219238
}
220239

221240
for_,ws:=rangeupdate.UpsertedWorkspaces {
222241
wsID,err:=uuid.FromBytes(ws.Id)
223242
iferr!=nil {
224243
_,_=fmt.Fprintf(logs,"Invalid workspace ID in update: %v\n",err)
225-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"bad_workspace_id")...)
244+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"bad_workspace_id")
226245
continue
227246
}
228-
if!seenWorkspaces[wsID] {
229-
seenWorkspaces[wsID]=true
230-
_,_=fmt.Fprintf(logs,"Received workspace update: %s (%d/%d)\n",wsID,len(seenWorkspaces),expectedWorkspaces)
247+
248+
iftracking,ok:=r.workspaces.Load(ws.GetName());ok {
249+
iftracking.updateLatency==0 {
250+
r.workspaces.Store(ws.GetName(),&workspace{
251+
workspaceID:wsID,
252+
buildStartTime:tracking.buildStartTime,
253+
updateLatency:time.Since(tracking.buildStartTime),
254+
})
255+
seenWorkspaces++
256+
}
257+
}elseif!ok {
258+
returnxerrors.Errorf("received update for unknown workspace %q (id: %s)",ws.GetName(),wsID)
231259
}
232260
}
233261

234262
for_,agent:=rangeupdate.UpsertedAgents {
235-
agentID,err:=uuid.FromBytes(agent.Id)
263+
wsID,err:=uuid.FromBytes(agent.WorkspaceId)
236264
iferr!=nil {
237-
_,_=fmt.Fprintf(logs,"Invalidagent ID in update: %v\n",err)
238-
r.cfg.Metrics.AddError(append(r.cfg.MetricLabelValues,"bad_agent_id")...)
265+
_,_=fmt.Fprintf(logs,"Invalidworkspace ID in agent update: %v\n",err)
266+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"bad_agent_workspace_id")
239267
continue
240268
}
241-
if!seenAgents[agentID] {
242-
seenAgents[agentID]=true
243-
_,_=fmt.Fprintf(logs,"Received agent update: %s (%d/%d)\n",agentID,len(seenAgents),expectedAgents)
269+
270+
if_,ok:=agents[wsID];!ok {
271+
agents[wsID]=time.Now()
244272
}
245273
}
246274

247-
iflen(seenWorkspaces)>=int(expectedWorkspaces)&&len(seenAgents)>=int(expectedAgents) {
248-
elapsed:=time.Since(r.startTime)
249-
_,_=fmt.Fprintf(logs,"All expected workspaces (%d) and agents (%d) received in %v\n",
250-
len(seenWorkspaces),len(seenAgents),elapsed)
251-
r.cfg.Metrics.RecordCompletion(elapsed,r.cfg.MetricLabelValues...)
275+
ifseenWorkspaces==int(expectedWorkspaces)&&len(agents)==int(expectedWorkspaces) {
276+
// For each workspace, record the latency from build start to
277+
// workspace update, or agent update, whichever is later.
278+
r.workspaces.Range(func(wsNamestring,ws*workspace)bool {
279+
ifagentTime,ok:=agents[ws.workspaceID];ok {
280+
agentLatency:=agentTime.Sub(ws.buildStartTime)
281+
ifagentLatency>ws.updateLatency {
282+
// Update in-place, so our final metrics reporting is
283+
// correct.
284+
ws.updateLatency=agentLatency
285+
}
286+
}else {
287+
// Unreachable, recorded for debugging
288+
r.cfg.Metrics.AddError(user.Username,r.cfg.WorkspaceCount,"missing_agent")
289+
}
290+
r.cfg.Metrics.RecordCompletion(ws.updateLatency,user.Username,r.cfg.WorkspaceCount,wsName)
291+
returntrue
292+
})
293+
_,_=fmt.Fprintf(logs,"Updates received for all %d workspaces and agents\n",expectedWorkspaces)
252294
returnnil
253295
}
254296
}
@@ -260,9 +302,14 @@ const (
260302
)
261303

262304
func (r*Runner)GetMetrics()map[string]any {
305+
latencyMap:=make(map[string]float64)
306+
r.workspaces.Range(func(wsNamestring,ws*workspace)bool {
307+
latencyMap[wsName]=ws.updateLatency.Seconds()
308+
returntrue
309+
})
263310
returnmap[string]any{
264311
WorkspaceUpdatesErrorsTotal:r.cfg.Metrics.numErrors.Load(),
265-
WorkspaceUpdatesLatencyMetric:r.cfg.Metrics.completionDuration.Seconds(),
312+
WorkspaceUpdatesLatencyMetric:latencyMap,
266313
}
267314
}
268315

‎scaletest/coderconnect/run_test.go‎

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func TestRun(t *testing.T) {
6565
coderdtest.AwaitTemplateVersionJobCompleted(t,client,version.ID)
6666

6767
barrier:=harness.NewBarrier(numUsers)
68-
metrics:=coderconnect.NewMetrics(prometheus.NewRegistry(),"num_workspaces","username")
68+
metrics:=coderconnect.NewMetrics(prometheus.NewRegistry())
6969

7070
th:=harness.NewTestHarness(harness.ConcurrentExecutionStrategy{}, harness.ConcurrentExecutionStrategy{})
7171
fori:=rangenumUsers {
@@ -83,9 +83,7 @@ func TestRun(t *testing.T) {
8383
WorkspaceCount:int64(userWorkspaces),
8484
DialTimeout:testutil.WaitMedium,
8585
WorkspaceUpdatesTimeout:testutil.WaitLong,
86-
NoCleanup:false,
8786
Metrics:metrics,
88-
MetricLabelValues: []string{"1","fake-username"},
8987
DialBarrier:barrier,
9088
}
9189
err:=cfg.Validate()
@@ -95,13 +93,16 @@ func TestRun(t *testing.T) {
9593

9694
ctx:=testutil.Context(t,testutil.WaitLong)
9795

98-
// Run the tests
9996
err:=th.Run(ctx)
10097
require.NoError(t,err)
10198

99+
res:=th.Results()
100+
require.Len(t,res.Runs,numUsers)
101+
require.Equal(t,0,res.TotalFail)
102+
102103
users,err:=client.Users(ctx, codersdk.UsersRequest{})
103104
require.NoError(t,err)
104-
require.Len(t,users.Users,numUsers+1)// owner + created users
105+
require.Len(t,users.Users,1+numUsers)// owner + created users
105106

106107
workspaces,err:=client.Workspaces(ctx, codersdk.WorkspaceFilter{})
107108
require.NoError(t,err)
@@ -126,6 +127,8 @@ func TestRun(t *testing.T) {
126127
require.Contains(t,th.Results().Runs,"coderconnect/"+id)
127128
metrics:=th.Results().Runs["coderconnect/"+id].Metrics
128129
require.Contains(t,metrics,coderconnect.WorkspaceUpdatesLatencyMetric)
130+
require.Len(t,metrics[coderconnect.WorkspaceUpdatesLatencyMetric],userWorkspaces)
129131
require.Contains(t,metrics,coderconnect.WorkspaceUpdatesErrorsTotal)
132+
require.EqualValues(t,0,metrics[coderconnect.WorkspaceUpdatesErrorsTotal])
130133
}
131134
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp