Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3a57c5f

Browse files
committed
feat(scaletest): add runner for thundering herd autostart
1 parentff930ad commit3a57c5f

File tree

6 files changed

+568
-18
lines changed

6 files changed

+568
-18
lines changed

‎scaletest/autostart/config.go‎

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package autostart
2+
3+
import (
4+
"sync"
5+
"time"
6+
7+
"golang.org/x/xerrors"
8+
9+
"github.com/coder/coder/v2/codersdk"
10+
"github.com/coder/coder/v2/scaletest/createusers"
11+
"github.com/coder/coder/v2/scaletest/workspacebuild"
12+
)
13+
14+
typeConfigstruct {
15+
// User is the configuration for the user to create.
16+
User createusers.Config`json:"user"`
17+
18+
// Workspace is the configuration for the workspace to create. The workspace
19+
// will be built using the new user.
20+
//
21+
// OrganizationID is ignored and set to the new user's organization ID.
22+
Workspace workspacebuild.Config`json:"workspace"`
23+
24+
// WorkspaceJobTimeout is how long to wait for any one workspace job
25+
// (start or stop) to complete.
26+
WorkspaceJobTimeout time.Duration`json:"workspace_job_timeout"`
27+
28+
// AutostartDelay is how long after all the workspaces have been stopped
29+
// to schedule them to be started again.
30+
AutostartDelay time.Duration`json:"autostart_delay"`
31+
32+
// AutostartTimeout is how long to wait for the autostart build to be
33+
// initiated after the scheduled time.
34+
AutostartTimeout time.Duration`json:"autostart_timeout"`
35+
36+
Metrics*Metrics`json:"-"`
37+
38+
// SetupBarrier is used to ensure all runners own stopped workspaces
39+
// before setting the autostart schedule on each.
40+
SetupBarrier*sync.WaitGroup`json:"-"`
41+
}
42+
43+
func (cConfig)Validate()error {
44+
iferr:=c.User.Validate();err!=nil {
45+
returnxerrors.Errorf("user config: %w",err)
46+
}
47+
c.Workspace.OrganizationID=c.User.OrganizationID
48+
// This value will be overwritten during the test.
49+
c.Workspace.UserID=codersdk.Me
50+
iferr:=c.Workspace.Validate();err!=nil {
51+
returnxerrors.Errorf("workspace config: %w",err)
52+
}
53+
54+
ifc.SetupBarrier==nil {
55+
returnxerrors.New("setup barrier must be set")
56+
}
57+
58+
ifc.WorkspaceJobTimeout<=0 {
59+
returnxerrors.New("workspace_job_timeout must be greater than 0")
60+
}
61+
62+
ifc.AutostartDelay<time.Minute*2 {
63+
returnxerrors.New("autostart_delay must be at least 2 minutes")
64+
}
65+
66+
ifc.AutostartTimeout<=0 {
67+
returnxerrors.New("autostart_timeout must be greater than 0")
68+
}
69+
70+
ifc.Metrics==nil {
71+
returnxerrors.New("metrics must be set")
72+
}
73+
74+
returnnil
75+
}

‎scaletest/autostart/metrics.go‎

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package autostart
2+
3+
import (
4+
"time"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
)
8+
9+
typeMetricsstruct {
10+
AutostartJobCreationLatencySeconds prometheus.HistogramVec
11+
AutostartJobAcquiredLatencySeconds prometheus.HistogramVec
12+
AutostartTotalLatencySeconds prometheus.HistogramVec
13+
AutostartErrorsTotal prometheus.CounterVec
14+
}
15+
16+
funcNewMetrics(reg prometheus.Registerer)*Metrics {
17+
m:=&Metrics{
18+
AutostartJobCreationLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
19+
Namespace:"coderd",
20+
Subsystem:"scaletest",
21+
Name:"autostart_job_creation_latency_seconds",
22+
Help:"Time from when the workspace is scheduled to be autostarted to when the autostart job has been created.",
23+
}, []string{"username","workspace_name"}),
24+
AutostartJobAcquiredLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
25+
Namespace:"coderd",
26+
Subsystem:"scaletest",
27+
Name:"autostart_job_acquired_latency_seconds",
28+
Help:"Time from when the workspace is scheduled to be autostarted to when the job has been acquired by a provisioner daemon.",
29+
}, []string{"username","workspace_name"}),
30+
AutostartTotalLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
31+
Namespace:"coderd",
32+
Subsystem:"scaletest",
33+
Name:"autostart_total_latency_seconds",
34+
Help:"Time from when the workspace is scheduled to be autostarted to when the autostart build has finished.",
35+
}, []string{"username","workspace_name"}),
36+
AutostartErrorsTotal:*prometheus.NewCounterVec(prometheus.CounterOpts{
37+
Namespace:"coderd",
38+
Subsystem:"scaletest",
39+
Name:"autostart_errors_total",
40+
Help:"Total number of autostart errors",
41+
}, []string{"username","action"}),
42+
}
43+
44+
reg.MustRegister(m.AutostartTotalLatencySeconds)
45+
reg.MustRegister(m.AutostartJobCreationLatencySeconds)
46+
reg.MustRegister(m.AutostartJobAcquiredLatencySeconds)
47+
reg.MustRegister(m.AutostartErrorsTotal)
48+
returnm
49+
}
50+
51+
func (m*Metrics)RecordCompletion(elapsed time.Duration,usernamestring,workspacestring) {
52+
m.AutostartTotalLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
53+
}
54+
55+
func (m*Metrics)RecordJobCreation(elapsed time.Duration,usernamestring,workspacestring) {
56+
m.AutostartJobCreationLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
57+
}
58+
59+
func (m*Metrics)RecordJobAcquired(elapsed time.Duration,usernamestring,workspacestring) {
60+
m.AutostartJobAcquiredLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
61+
}
62+
63+
func (m*Metrics)AddError(usernamestring,actionstring) {
64+
m.AutostartErrorsTotal.WithLabelValues(username,action).Inc()
65+
}

‎scaletest/autostart/run.go‎

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
package autostart
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"time"
8+
9+
"golang.org/x/xerrors"
10+
11+
"cdr.dev/slog"
12+
"cdr.dev/slog/sloggers/sloghuman"
13+
"github.com/coder/coder/v2/coderd/tracing"
14+
"github.com/coder/coder/v2/codersdk"
15+
"github.com/coder/coder/v2/scaletest/createusers"
16+
"github.com/coder/coder/v2/scaletest/harness"
17+
"github.com/coder/coder/v2/scaletest/loadtestutil"
18+
"github.com/coder/coder/v2/scaletest/workspacebuild"
19+
)
20+
21+
typeRunnerstruct {
22+
client*codersdk.Client
23+
cfgConfig
24+
25+
createUserRunner*createusers.Runner
26+
workspacebuildRunner*workspacebuild.Runner
27+
28+
autostartTotalLatency time.Duration
29+
autostartJobCreationLatency time.Duration
30+
autostartJobAcquiredLatency time.Duration
31+
}
32+
33+
funcNewRunner(client*codersdk.Client,cfgConfig)*Runner {
34+
return&Runner{
35+
client:client,
36+
cfg:cfg,
37+
}
38+
}
39+
40+
var (
41+
_ harness.Runnable=&Runner{}
42+
_ harness.Cleanable=&Runner{}
43+
_ harness.Collectable=&Runner{}
44+
)
45+
46+
func (r*Runner)Run(ctx context.Context,idstring,logs io.Writer)error {
47+
ctx,span:=tracing.StartSpan(ctx)
48+
deferspan.End()
49+
50+
reachedBarrier:=false
51+
deferfunc() {
52+
if!reachedBarrier {
53+
r.cfg.SetupBarrier.Done()
54+
}
55+
}()
56+
57+
logs=loadtestutil.NewSyncWriter(logs)
58+
logger:=slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
59+
r.client.SetLogger(logger)
60+
r.client.SetLogBodies(true)
61+
62+
r.createUserRunner=createusers.NewRunner(r.client,r.cfg.User)
63+
newUserAndToken,err:=r.createUserRunner.RunReturningUser(ctx,id,logs)
64+
iferr!=nil {
65+
r.cfg.Metrics.AddError("","create_user")
66+
returnxerrors.Errorf("create user: %w",err)
67+
}
68+
newUser:=newUserAndToken.User
69+
70+
newUserClient:=codersdk.New(r.client.URL,
71+
codersdk.WithSessionToken(newUserAndToken.SessionToken),
72+
codersdk.WithLogger(logger),
73+
codersdk.WithLogBodies())
74+
75+
logger.Info(ctx,fmt.Sprintf("user %q created",newUser.Username),slog.F("id",newUser.ID.String()))
76+
77+
workspaceBuildConfig:=r.cfg.Workspace
78+
workspaceBuildConfig.OrganizationID=r.cfg.User.OrganizationID
79+
workspaceBuildConfig.UserID=newUser.ID.String()
80+
// We'll wait for the build ourselves to avoid multiple API requests
81+
workspaceBuildConfig.NoWaitForBuild=true
82+
83+
r.workspacebuildRunner=workspacebuild.NewRunner(newUserClient,workspaceBuildConfig)
84+
workspace,err:=r.workspacebuildRunner.RunReturningWorkspace(ctx,id,logs)
85+
iferr!=nil {
86+
r.cfg.Metrics.AddError(newUser.Username,"create_workspace")
87+
returnxerrors.Errorf("create workspace: %w",err)
88+
}
89+
90+
watchCtx,cancel:=context.WithCancel(ctx)
91+
defercancel()
92+
workspaceUpdates,err:=newUserClient.WatchWorkspace(watchCtx,workspace.ID)
93+
iferr!=nil {
94+
r.cfg.Metrics.AddError(newUser.Username,"watch_workspace")
95+
returnxerrors.Errorf("watch workspace: %w",err)
96+
}
97+
98+
createWorkspaceCtx,cancel2:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout)
99+
defercancel2()
100+
101+
err=waitForWorkspaceUpdate(createWorkspaceCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
102+
returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStart&&
103+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
104+
})
105+
iferr!=nil {
106+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_initial_build")
107+
returnxerrors.Errorf("timeout waiting for initial workspace build to complete: %w",err)
108+
}
109+
110+
logger.Info(ctx,fmt.Sprintf("stopping workspace %q",workspace.Name))
111+
112+
_,err=newUserClient.CreateWorkspaceBuild(ctx,workspace.ID, codersdk.CreateWorkspaceBuildRequest{
113+
Transition:codersdk.WorkspaceTransitionStop,
114+
})
115+
iferr!=nil {
116+
r.cfg.Metrics.AddError(newUser.Username,"create_stop_build")
117+
returnxerrors.Errorf("create stop build: %w",err)
118+
}
119+
120+
stopBuildCtx,cancel3:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout)
121+
defercancel3()
122+
123+
err=waitForWorkspaceUpdate(stopBuildCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
124+
returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStop&&
125+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
126+
})
127+
iferr!=nil {
128+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_stop_build")
129+
returnxerrors.Errorf("timeout waiting for stop build to complete: %w",err)
130+
}
131+
132+
logger.Info(ctx,fmt.Sprintf("workspace %q stopped successfully",workspace.Name))
133+
134+
logger.Info(ctx,"waiting for all runners to reach barrier")
135+
reachedBarrier=true
136+
r.cfg.SetupBarrier.Done()
137+
r.cfg.SetupBarrier.Wait()
138+
logger.Info(ctx,"all runners reached barrier, proceeding with autostart schedule")
139+
140+
testStartTime:=time.Now()
141+
autostartTime:=testStartTime.Add(r.cfg.AutostartDelay).Round(time.Minute)
142+
timeUntilAutostart:=autostartTime.Sub(testStartTime)
143+
schedule:=fmt.Sprintf("CRON_TZ=UTC %d %d * * *",autostartTime.Minute(),autostartTime.Hour())
144+
145+
logger.Info(ctx,fmt.Sprintf("setting autostart schedule for workspace %q: %s",workspace.Name,schedule))
146+
147+
err=newUserClient.UpdateWorkspaceAutostart(ctx,workspace.ID, codersdk.UpdateWorkspaceAutostartRequest{
148+
Schedule:&schedule,
149+
})
150+
iferr!=nil {
151+
r.cfg.Metrics.AddError(newUser.Username,"update_workspace_autostart")
152+
returnxerrors.Errorf("update workspace autostart: %w",err)
153+
}
154+
155+
logger.Info(ctx,fmt.Sprintf("waiting for workspace %q to autostart",workspace.Name))
156+
157+
autostartInitiateCtx,cancel4:=context.WithTimeout(ctx,timeUntilAutostart+r.cfg.AutostartTimeout)
158+
defercancel4()
159+
160+
logger.Info(ctx,"listening for workspace updates to detect autostart build")
161+
162+
err=waitForWorkspaceUpdate(autostartInitiateCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
163+
ifws.LatestBuild.Transition!=codersdk.WorkspaceTransitionStart {
164+
returnfalse
165+
}
166+
167+
// The job has been created, but it might be pending
168+
ifr.autostartJobCreationLatency==0 {
169+
r.autostartJobCreationLatency=time.Since(autostartTime)
170+
r.cfg.Metrics.RecordJobCreation(r.autostartJobCreationLatency,newUser.Username,workspace.Name)
171+
}
172+
173+
ifws.LatestBuild.Job.Status==codersdk.ProvisionerJobRunning||
174+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded {
175+
// Job is no longer pending, but it might not have finished
176+
ifr.autostartJobAcquiredLatency==0 {
177+
r.autostartJobAcquiredLatency=time.Since(autostartTime)
178+
r.cfg.Metrics.RecordJobAcquired(r.autostartJobAcquiredLatency,newUser.Username,workspace.Name)
179+
}
180+
returnws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
181+
}
182+
183+
returnfalse
184+
})
185+
iferr!=nil {
186+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_autostart_build")
187+
returnxerrors.Errorf("timeout waiting for autostart build to be created: %w",err)
188+
}
189+
190+
r.autostartTotalLatency=time.Since(autostartTime)
191+
192+
logger.Info(ctx,fmt.Sprintf("autostart completed in %v",r.autostartTotalLatency))
193+
r.cfg.Metrics.RecordCompletion(r.autostartTotalLatency,newUser.Username,workspace.Name)
194+
195+
returnnil
196+
}
197+
198+
funcwaitForWorkspaceUpdate(ctx context.Context,logger slog.Logger,updates<-chan codersdk.Workspace,shouldBreakfunc(codersdk.Workspace)bool)error {
199+
for {
200+
select {
201+
case<-ctx.Done():
202+
returnctx.Err()
203+
caseupdatedWorkspace,ok:=<-updates:
204+
if!ok {
205+
returnxerrors.New("workspace updates channel closed")
206+
}
207+
logger.Debug(ctx,"received workspace update",slog.F("update",updatedWorkspace))
208+
ifshouldBreak(updatedWorkspace) {
209+
returnnil
210+
}
211+
}
212+
}
213+
}
214+
215+
func (r*Runner)Cleanup(ctx context.Context,idstring,logs io.Writer)error {
216+
ifr.workspacebuildRunner!=nil {
217+
_,_=fmt.Fprintln(logs,"Cleaning up workspace...")
218+
iferr:=r.workspacebuildRunner.Cleanup(ctx,id,logs);err!=nil {
219+
returnxerrors.Errorf("cleanup workspace: %w",err)
220+
}
221+
}
222+
223+
ifr.createUserRunner!=nil {
224+
_,_=fmt.Fprintln(logs,"Cleaning up user...")
225+
iferr:=r.createUserRunner.Cleanup(ctx,id,logs);err!=nil {
226+
returnxerrors.Errorf("cleanup user: %w",err)
227+
}
228+
}
229+
230+
returnnil
231+
}
232+
233+
const (
234+
AutostartTotalLatencyMetric="autostart_total_latency_seconds"
235+
AutostartJobCreationLatencyMetric="autostart_job_creation_latency_seconds"
236+
AutostartJobAcquiredLatencyMetric="autostart_job_acquired_latency_seconds"
237+
)
238+
239+
func (r*Runner)GetMetrics()map[string]any {
240+
returnmap[string]any{
241+
AutostartTotalLatencyMetric:r.autostartTotalLatency.Seconds(),
242+
AutostartJobCreationLatencyMetric:r.autostartJobCreationLatency.Seconds(),
243+
AutostartJobAcquiredLatencyMetric:r.autostartJobAcquiredLatency.Seconds(),
244+
}
245+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp