Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf84a789

Browse files
feat(scaletest): add runner for thundering herd autostart (#19998)
Relates tocoder/internal#911This PR adds a scaletest runner that simulates a user with a workspace configured to autostart at a specific time.An instance of this autostart runner does the following:1. Creates a user.2. Creates a workspace.3. Listens on `/api/v2/workspaces/<ws-id>/watch` to wait for build completions throughout the run.4. Stops the workspace, waits for the build to finish.4. Waits for all other concurrently executing runners (per the `SetupBarrier` `WaitGroup`) to also have a stopped workspace.5. Sets the workspace autostart time to `time.Now().Add(cfg.AutoStartDelay)`6. Waits for the autostart workspace build to begin, enter the `pending` status, and then `running`.7. Waits for the autostart workspace build to end.Exposes four prometheus metrics:- `autostart_job_creation_latency_seconds` - HistogramVec. Labels = {username, workspace_name}- `autostart_job_acquired_latency_seconds` - HistogramVec. Labels = {username, workspace_name}- `autostart_total_latency_seconds` - `HistogramVec`. Labels = `{username, workspace_name}`.- `autostart_errors_total` - `CounterVec`. Labels = `{username, action}`.
1 parent41420ae commitf84a789

File tree

6 files changed

+568
-18
lines changed

6 files changed

+568
-18
lines changed

‎scaletest/autostart/config.go‎

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package autostart
2+
3+
import (
4+
"sync"
5+
"time"
6+
7+
"golang.org/x/xerrors"
8+
9+
"github.com/coder/coder/v2/codersdk"
10+
"github.com/coder/coder/v2/scaletest/createusers"
11+
"github.com/coder/coder/v2/scaletest/workspacebuild"
12+
)
13+
14+
typeConfigstruct {
15+
// User is the configuration for the user to create.
16+
User createusers.Config`json:"user"`
17+
18+
// Workspace is the configuration for the workspace to create. The workspace
19+
// will be built using the new user.
20+
//
21+
// OrganizationID is ignored and set to the new user's organization ID.
22+
Workspace workspacebuild.Config`json:"workspace"`
23+
24+
// WorkspaceJobTimeout is how long to wait for any one workspace job
25+
// (start or stop) to complete.
26+
WorkspaceJobTimeout time.Duration`json:"workspace_job_timeout"`
27+
28+
// AutostartDelay is how long after all the workspaces have been stopped
29+
// to schedule them to be started again.
30+
AutostartDelay time.Duration`json:"autostart_delay"`
31+
32+
// AutostartTimeout is how long to wait for the autostart build to be
33+
// initiated after the scheduled time.
34+
AutostartTimeout time.Duration`json:"autostart_timeout"`
35+
36+
Metrics*Metrics`json:"-"`
37+
38+
// SetupBarrier is used to ensure all runners own stopped workspaces
39+
// before setting the autostart schedule on each.
40+
SetupBarrier*sync.WaitGroup`json:"-"`
41+
}
42+
43+
func (cConfig)Validate()error {
44+
iferr:=c.User.Validate();err!=nil {
45+
returnxerrors.Errorf("user config: %w",err)
46+
}
47+
c.Workspace.OrganizationID=c.User.OrganizationID
48+
// This value will be overwritten during the test.
49+
c.Workspace.UserID=codersdk.Me
50+
iferr:=c.Workspace.Validate();err!=nil {
51+
returnxerrors.Errorf("workspace config: %w",err)
52+
}
53+
54+
ifc.SetupBarrier==nil {
55+
returnxerrors.New("setup barrier must be set")
56+
}
57+
58+
ifc.WorkspaceJobTimeout<=0 {
59+
returnxerrors.New("workspace_job_timeout must be greater than 0")
60+
}
61+
62+
ifc.AutostartDelay<time.Minute*2 {
63+
returnxerrors.New("autostart_delay must be at least 2 minutes")
64+
}
65+
66+
ifc.AutostartTimeout<=0 {
67+
returnxerrors.New("autostart_timeout must be greater than 0")
68+
}
69+
70+
ifc.Metrics==nil {
71+
returnxerrors.New("metrics must be set")
72+
}
73+
74+
returnnil
75+
}

‎scaletest/autostart/metrics.go‎

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package autostart
2+
3+
import (
4+
"time"
5+
6+
"github.com/prometheus/client_golang/prometheus"
7+
)
8+
9+
typeMetricsstruct {
10+
AutostartJobCreationLatencySeconds prometheus.HistogramVec
11+
AutostartJobAcquiredLatencySeconds prometheus.HistogramVec
12+
AutostartTotalLatencySeconds prometheus.HistogramVec
13+
AutostartErrorsTotal prometheus.CounterVec
14+
}
15+
16+
funcNewMetrics(reg prometheus.Registerer)*Metrics {
17+
m:=&Metrics{
18+
AutostartJobCreationLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
19+
Namespace:"coderd",
20+
Subsystem:"scaletest",
21+
Name:"autostart_job_creation_latency_seconds",
22+
Help:"Time from when the workspace is scheduled to be autostarted to when the autostart job has been created.",
23+
}, []string{"username","workspace_name"}),
24+
AutostartJobAcquiredLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
25+
Namespace:"coderd",
26+
Subsystem:"scaletest",
27+
Name:"autostart_job_acquired_latency_seconds",
28+
Help:"Time from when the workspace is scheduled to be autostarted to when the job has been acquired by a provisioner daemon.",
29+
}, []string{"username","workspace_name"}),
30+
AutostartTotalLatencySeconds:*prometheus.NewHistogramVec(prometheus.HistogramOpts{
31+
Namespace:"coderd",
32+
Subsystem:"scaletest",
33+
Name:"autostart_total_latency_seconds",
34+
Help:"Time from when the workspace is scheduled to be autostarted to when the autostart build has finished.",
35+
}, []string{"username","workspace_name"}),
36+
AutostartErrorsTotal:*prometheus.NewCounterVec(prometheus.CounterOpts{
37+
Namespace:"coderd",
38+
Subsystem:"scaletest",
39+
Name:"autostart_errors_total",
40+
Help:"Total number of autostart errors",
41+
}, []string{"username","action"}),
42+
}
43+
44+
reg.MustRegister(m.AutostartTotalLatencySeconds)
45+
reg.MustRegister(m.AutostartJobCreationLatencySeconds)
46+
reg.MustRegister(m.AutostartJobAcquiredLatencySeconds)
47+
reg.MustRegister(m.AutostartErrorsTotal)
48+
returnm
49+
}
50+
51+
func (m*Metrics)RecordCompletion(elapsed time.Duration,usernamestring,workspacestring) {
52+
m.AutostartTotalLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
53+
}
54+
55+
func (m*Metrics)RecordJobCreation(elapsed time.Duration,usernamestring,workspacestring) {
56+
m.AutostartJobCreationLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
57+
}
58+
59+
func (m*Metrics)RecordJobAcquired(elapsed time.Duration,usernamestring,workspacestring) {
60+
m.AutostartJobAcquiredLatencySeconds.WithLabelValues(username,workspace).Observe(elapsed.Seconds())
61+
}
62+
63+
func (m*Metrics)AddError(usernamestring,actionstring) {
64+
m.AutostartErrorsTotal.WithLabelValues(username,action).Inc()
65+
}

‎scaletest/autostart/run.go‎

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
package autostart
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"io"
7+
"time"
8+
9+
"golang.org/x/xerrors"
10+
11+
"cdr.dev/slog"
12+
"cdr.dev/slog/sloggers/sloghuman"
13+
"github.com/coder/coder/v2/coderd/tracing"
14+
"github.com/coder/coder/v2/codersdk"
15+
"github.com/coder/coder/v2/scaletest/createusers"
16+
"github.com/coder/coder/v2/scaletest/harness"
17+
"github.com/coder/coder/v2/scaletest/loadtestutil"
18+
"github.com/coder/coder/v2/scaletest/workspacebuild"
19+
)
20+
21+
typeRunnerstruct {
22+
client*codersdk.Client
23+
cfgConfig
24+
25+
createUserRunner*createusers.Runner
26+
workspacebuildRunner*workspacebuild.Runner
27+
28+
autostartTotalLatency time.Duration
29+
autostartJobCreationLatency time.Duration
30+
autostartJobAcquiredLatency time.Duration
31+
}
32+
33+
funcNewRunner(client*codersdk.Client,cfgConfig)*Runner {
34+
return&Runner{
35+
client:client,
36+
cfg:cfg,
37+
}
38+
}
39+
40+
var (
41+
_ harness.Runnable=&Runner{}
42+
_ harness.Cleanable=&Runner{}
43+
_ harness.Collectable=&Runner{}
44+
)
45+
46+
func (r*Runner)Run(ctx context.Context,idstring,logs io.Writer)error {
47+
ctx,span:=tracing.StartSpan(ctx)
48+
deferspan.End()
49+
50+
reachedBarrier:=false
51+
deferfunc() {
52+
if!reachedBarrier {
53+
r.cfg.SetupBarrier.Done()
54+
}
55+
}()
56+
57+
logs=loadtestutil.NewSyncWriter(logs)
58+
logger:=slog.Make(sloghuman.Sink(logs)).Leveled(slog.LevelDebug)
59+
r.client.SetLogger(logger)
60+
r.client.SetLogBodies(true)
61+
62+
r.createUserRunner=createusers.NewRunner(r.client,r.cfg.User)
63+
newUserAndToken,err:=r.createUserRunner.RunReturningUser(ctx,id,logs)
64+
iferr!=nil {
65+
r.cfg.Metrics.AddError("","create_user")
66+
returnxerrors.Errorf("create user: %w",err)
67+
}
68+
newUser:=newUserAndToken.User
69+
70+
newUserClient:=codersdk.New(r.client.URL,
71+
codersdk.WithSessionToken(newUserAndToken.SessionToken),
72+
codersdk.WithLogger(logger),
73+
codersdk.WithLogBodies())
74+
75+
//nolint:gocritic // short log is fine
76+
logger.Info(ctx,"user created",slog.F("username",newUser.Username),slog.F("user_id",newUser.ID.String()))
77+
78+
workspaceBuildConfig:=r.cfg.Workspace
79+
workspaceBuildConfig.OrganizationID=r.cfg.User.OrganizationID
80+
workspaceBuildConfig.UserID=newUser.ID.String()
81+
// We'll wait for the build ourselves to avoid multiple API requests
82+
workspaceBuildConfig.NoWaitForBuild=true
83+
84+
r.workspacebuildRunner=workspacebuild.NewRunner(newUserClient,workspaceBuildConfig)
85+
workspace,err:=r.workspacebuildRunner.RunReturningWorkspace(ctx,id,logs)
86+
iferr!=nil {
87+
r.cfg.Metrics.AddError(newUser.Username,"create_workspace")
88+
returnxerrors.Errorf("create workspace: %w",err)
89+
}
90+
91+
watchCtx,cancel:=context.WithCancel(ctx)
92+
defercancel()
93+
workspaceUpdates,err:=newUserClient.WatchWorkspace(watchCtx,workspace.ID)
94+
iferr!=nil {
95+
r.cfg.Metrics.AddError(newUser.Username,"watch_workspace")
96+
returnxerrors.Errorf("watch workspace: %w",err)
97+
}
98+
99+
createWorkspaceCtx,cancel2:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout)
100+
defercancel2()
101+
102+
err=waitForWorkspaceUpdate(createWorkspaceCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
103+
returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStart&&
104+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
105+
})
106+
iferr!=nil {
107+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_initial_build")
108+
returnxerrors.Errorf("timeout waiting for initial workspace build to complete: %w",err)
109+
}
110+
111+
logger.Info(ctx,"stopping workspace",slog.F("workspace_name",workspace.Name))
112+
113+
_,err=newUserClient.CreateWorkspaceBuild(ctx,workspace.ID, codersdk.CreateWorkspaceBuildRequest{
114+
Transition:codersdk.WorkspaceTransitionStop,
115+
})
116+
iferr!=nil {
117+
r.cfg.Metrics.AddError(newUser.Username,"create_stop_build")
118+
returnxerrors.Errorf("create stop build: %w",err)
119+
}
120+
121+
stopBuildCtx,cancel3:=context.WithTimeout(ctx,r.cfg.WorkspaceJobTimeout)
122+
defercancel3()
123+
124+
err=waitForWorkspaceUpdate(stopBuildCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
125+
returnws.LatestBuild.Transition==codersdk.WorkspaceTransitionStop&&
126+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
127+
})
128+
iferr!=nil {
129+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_stop_build")
130+
returnxerrors.Errorf("timeout waiting for stop build to complete: %w",err)
131+
}
132+
133+
logger.Info(ctx,"workspace stopped successfully",slog.F("workspace_name",workspace.Name))
134+
135+
logger.Info(ctx,"waiting for all runners to reach barrier")
136+
reachedBarrier=true
137+
r.cfg.SetupBarrier.Done()
138+
r.cfg.SetupBarrier.Wait()
139+
logger.Info(ctx,"all runners reached barrier, proceeding with autostart schedule")
140+
141+
testStartTime:=time.Now().UTC()
142+
autostartTime:=testStartTime.Add(r.cfg.AutostartDelay).Round(time.Minute)
143+
schedule:=fmt.Sprintf("CRON_TZ=UTC %d %d * * *",autostartTime.Minute(),autostartTime.Hour())
144+
145+
logger.Info(ctx,"setting autostart schedule for workspace",slog.F("workspace_name",workspace.Name),slog.F("schedule",schedule))
146+
147+
err=newUserClient.UpdateWorkspaceAutostart(ctx,workspace.ID, codersdk.UpdateWorkspaceAutostartRequest{
148+
Schedule:&schedule,
149+
})
150+
iferr!=nil {
151+
r.cfg.Metrics.AddError(newUser.Username,"update_workspace_autostart")
152+
returnxerrors.Errorf("update workspace autostart: %w",err)
153+
}
154+
155+
logger.Info(ctx,"waiting for workspace to autostart",slog.F("workspace_name",workspace.Name))
156+
157+
autostartInitiateCtx,cancel4:=context.WithDeadline(ctx,autostartTime.Add(r.cfg.AutostartDelay))
158+
defercancel4()
159+
160+
logger.Info(ctx,"listening for workspace updates to detect autostart build")
161+
162+
err=waitForWorkspaceUpdate(autostartInitiateCtx,logger,workspaceUpdates,func(ws codersdk.Workspace)bool {
163+
ifws.LatestBuild.Transition!=codersdk.WorkspaceTransitionStart {
164+
returnfalse
165+
}
166+
167+
// The job has been created, but it might be pending
168+
ifr.autostartJobCreationLatency==0 {
169+
r.autostartJobCreationLatency=time.Since(autostartTime)
170+
r.cfg.Metrics.RecordJobCreation(r.autostartJobCreationLatency,newUser.Username,workspace.Name)
171+
}
172+
173+
ifws.LatestBuild.Job.Status==codersdk.ProvisionerJobRunning||
174+
ws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded {
175+
// Job is no longer pending, but it might not have finished
176+
ifr.autostartJobAcquiredLatency==0 {
177+
r.autostartJobAcquiredLatency=time.Since(autostartTime)
178+
r.cfg.Metrics.RecordJobAcquired(r.autostartJobAcquiredLatency,newUser.Username,workspace.Name)
179+
}
180+
returnws.LatestBuild.Job.Status==codersdk.ProvisionerJobSucceeded
181+
}
182+
183+
returnfalse
184+
})
185+
iferr!=nil {
186+
r.cfg.Metrics.AddError(newUser.Username,"wait_for_autostart_build")
187+
returnxerrors.Errorf("timeout waiting for autostart build to be created: %w",err)
188+
}
189+
190+
r.autostartTotalLatency=time.Since(autostartTime)
191+
192+
logger.Info(ctx,"autostart workspace build complete",slog.F("duration",r.autostartTotalLatency))
193+
r.cfg.Metrics.RecordCompletion(r.autostartTotalLatency,newUser.Username,workspace.Name)
194+
195+
returnnil
196+
}
197+
198+
funcwaitForWorkspaceUpdate(ctx context.Context,logger slog.Logger,updates<-chan codersdk.Workspace,shouldBreakfunc(codersdk.Workspace)bool)error {
199+
for {
200+
select {
201+
case<-ctx.Done():
202+
returnctx.Err()
203+
caseupdatedWorkspace,ok:=<-updates:
204+
if!ok {
205+
returnxerrors.New("workspace updates channel closed")
206+
}
207+
logger.Debug(ctx,"received workspace update",slog.F("update",updatedWorkspace))
208+
ifshouldBreak(updatedWorkspace) {
209+
returnnil
210+
}
211+
}
212+
}
213+
}
214+
215+
func (r*Runner)Cleanup(ctx context.Context,idstring,logs io.Writer)error {
216+
ifr.workspacebuildRunner!=nil {
217+
_,_=fmt.Fprintln(logs,"Cleaning up workspace...")
218+
iferr:=r.workspacebuildRunner.Cleanup(ctx,id,logs);err!=nil {
219+
returnxerrors.Errorf("cleanup workspace: %w",err)
220+
}
221+
}
222+
223+
ifr.createUserRunner!=nil {
224+
_,_=fmt.Fprintln(logs,"Cleaning up user...")
225+
iferr:=r.createUserRunner.Cleanup(ctx,id,logs);err!=nil {
226+
returnxerrors.Errorf("cleanup user: %w",err)
227+
}
228+
}
229+
230+
returnnil
231+
}
232+
233+
const (
234+
AutostartTotalLatencyMetric="autostart_total_latency_seconds"
235+
AutostartJobCreationLatencyMetric="autostart_job_creation_latency_seconds"
236+
AutostartJobAcquiredLatencyMetric="autostart_job_acquired_latency_seconds"
237+
)
238+
239+
func (r*Runner)GetMetrics()map[string]any {
240+
returnmap[string]any{
241+
AutostartTotalLatencyMetric:r.autostartTotalLatency.Seconds(),
242+
AutostartJobCreationLatencyMetric:r.autostartJobCreationLatency.Seconds(),
243+
AutostartJobAcquiredLatencyMetric:r.autostartJobAcquiredLatency.Seconds(),
244+
}
245+
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp