@@ -25,7 +25,9 @@ type Runner struct {
25
25
createUserRunner * createusers.Runner
26
26
workspacebuildRunner * workspacebuild.Runner
27
27
28
- autostartDuration time.Duration
28
+ autostartTotalLatency time.Duration
29
+ autostartJobCreationLatency time.Duration
30
+ autostartJobAcquiredLatency time.Duration
29
31
// Closed when the autostart schedule has been set.
30
32
// Used in tests.
31
33
autostartSet chan struct {}
@@ -49,6 +51,13 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
49
51
ctx ,span := tracing .StartSpan (ctx )
50
52
defer span .End ()
51
53
54
+ reachedBarrier := false
55
+ defer func () {
56
+ if ! reachedBarrier {
57
+ r .cfg .SetupBarrier .Done ()
58
+ }
59
+ }()
60
+
52
61
logs = loadtestutil .NewSyncWriter (logs )
53
62
logger := slog .Make (sloghuman .Sink (logs )).Leveled (slog .LevelDebug )
54
63
r .client .SetLogger (logger )
@@ -72,6 +81,8 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
72
81
workspaceBuildConfig := r .cfg .Workspace
73
82
workspaceBuildConfig .OrganizationID = r .cfg .User .OrganizationID
74
83
workspaceBuildConfig .UserID = newUser .ID .String ()
84
+ // We'll wait for the build ourselves to avoid multiple API requests
85
+ workspaceBuildConfig .NoWaitForBuild = true
75
86
76
87
r .workspacebuildRunner = workspacebuild .NewRunner (newUserClient ,workspaceBuildConfig )
77
88
workspace ,err := r .workspacebuildRunner .RunReturningWorkspace (ctx ,id ,logs )
@@ -80,36 +91,60 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
80
91
return xerrors .Errorf ("create workspace: %w" ,err )
81
92
}
82
93
83
- logger .Info (ctx ,fmt .Sprintf ("workspace %q created" ,workspace .Name ))
94
+ watchCtx ,cancel := context .WithCancel (ctx )
95
+ defer cancel ()
96
+ workspaceUpdates ,err := newUserClient .WatchWorkspace (watchCtx ,workspace .ID )
97
+ if err != nil {
98
+ r .cfg .Metrics .AddError (newUser .Username ,"watch_workspace" )
99
+ return xerrors .Errorf ("watch workspace: %w" ,err )
100
+ }
101
+
102
+ createWorkspaceCtx ,cancel2 := context .WithTimeout (ctx ,r .cfg .WorkspaceJobTimeout )
103
+ defer cancel2 ()
104
+
105
+ err = waitForWorkspaceUpdate (createWorkspaceCtx ,logger ,workspaceUpdates ,func (ws codersdk.Workspace )bool {
106
+ return ws .LatestBuild .Transition == codersdk .WorkspaceTransitionStart &&
107
+ ws .LatestBuild .Job .Status == codersdk .ProvisionerJobSucceeded
108
+ })
109
+ if err != nil {
110
+ r .cfg .Metrics .AddError (newUser .Username ,"wait_for_initial_build" )
111
+ return xerrors .Errorf ("timeout waiting for initial workspace build to complete: %w" ,err )
112
+ }
84
113
85
114
logger .Info (ctx ,fmt .Sprintf ("stopping workspace %q" ,workspace .Name ))
86
115
87
- stopBuild ,err : =newUserClient .CreateWorkspaceBuild (ctx ,workspace .ID , codersdk.CreateWorkspaceBuildRequest {
116
+ _ ,err = newUserClient .CreateWorkspaceBuild (ctx ,workspace .ID , codersdk.CreateWorkspaceBuildRequest {
88
117
Transition :codersdk .WorkspaceTransitionStop ,
89
118
})
90
119
if err != nil {
91
120
r .cfg .Metrics .AddError (newUser .Username ,"create_stop_build" )
92
121
return xerrors .Errorf ("create stop build: %w" ,err )
93
122
}
94
123
95
- stopBuildCtx ,cancel2 := context .WithTimeout (ctx ,r .cfg .WorkspaceJobTimeout )
96
- defer cancel2 ()
124
+ stopBuildCtx ,cancel3 := context .WithTimeout (ctx ,r .cfg .WorkspaceJobTimeout )
125
+ defer cancel3 ()
97
126
98
- err = workspacebuild .WaitForBuild (stopBuildCtx ,logs ,newUserClient ,stopBuild .ID )
127
+ err = waitForWorkspaceUpdate (stopBuildCtx ,logger ,workspaceUpdates ,func (ws codersdk.Workspace )bool {
128
+ return ws .LatestBuild .Transition == codersdk .WorkspaceTransitionStop &&
129
+ ws .LatestBuild .Job .Status == codersdk .ProvisionerJobSucceeded
130
+ })
99
131
if err != nil {
100
132
r .cfg .Metrics .AddError (newUser .Username ,"wait_for_stop_build" )
101
- return xerrors .Errorf ("wait for stop build to complete: %w" ,err )
133
+ return xerrors .Errorf ("timeout waiting for stop build to complete: %w" ,err )
102
134
}
103
135
104
136
logger .Info (ctx ,fmt .Sprintf ("workspace %q stopped successfully" ,workspace .Name ))
105
137
106
138
logger .Info (ctx ,"waiting for all runners to reach barrier" )
139
+ reachedBarrier = true
107
140
r .cfg .SetupBarrier .Done ()
108
141
r .cfg .SetupBarrier .Wait ()
109
142
logger .Info (ctx ,"all runners reached barrier, proceeding with autostart schedule" )
110
143
111
- autoStartTime := r .cfg .Clock .Now ().Add (r .cfg .AutostartDelay )
112
- schedule := fmt .Sprintf ("CRON_TZ=UTC %d %d * * *" ,autoStartTime .Minute (),autoStartTime .Hour ())
144
+ testStartTime := r .cfg .Clock .Now ()
145
+ autostartTime := testStartTime .Add (r .cfg .AutostartDelay ).Round (time .Minute )
146
+ timeUntilAutostart := autostartTime .Sub (testStartTime )
147
+ schedule := fmt .Sprintf ("CRON_TZ=UTC %d %d * * *" ,autostartTime .Minute (),autostartTime .Hour ())
113
148
114
149
logger .Info (ctx ,fmt .Sprintf ("setting autostart schedule for workspace %q: %s" ,workspace .Name ,schedule ))
115
150
@@ -122,61 +157,66 @@ func (r *Runner) Run(ctx context.Context, id string, logs io.Writer) error {
122
157
}
123
158
close (r .autostartSet )
124
159
125
- logger .Info (ctx ,fmt .Sprintf ("autostart schedule set for workspace %q" ,workspace .Name ))
126
-
127
160
logger .Info (ctx ,fmt .Sprintf ("waiting for workspace %q to autostart" ,workspace .Name ))
128
161
129
- autostartInitiateCtx ,cancel2 := context .WithTimeout (ctx ,r . cfg . AutostartTimeout + r .cfg .AutostartDelay )
130
- defer cancel2 ()
162
+ autostartInitiateCtx ,cancel4 := context .WithTimeout (ctx ,timeUntilAutostart + r .cfg .AutostartTimeout )
163
+ defer cancel4 ()
131
164
132
- workspaceUpdates ,err := newUserClient .WatchWorkspace (autostartInitiateCtx ,workspace .ID )
133
- if err != nil {
134
- r .cfg .Metrics .AddError (newUser .Username ,"watch_workspace" )
135
- return xerrors .Errorf ("watch workspace: %w" ,err )
136
- }
165
+ logger .Info (ctx ,"listening for workspace updates to detect autostart build" )
137
166
138
- var autoStartBuild codersdk.WorkspaceBuild
167
+ err = waitForWorkspaceUpdate (autostartInitiateCtx ,logger ,workspaceUpdates ,func (ws codersdk.Workspace )bool {
168
+ if ws .LatestBuild .Transition != codersdk .WorkspaceTransitionStart {
169
+ return false
170
+ }
139
171
140
- logger .Info (ctx ,"listening for workspace updates to detect autostart build" )
141
- waitNewBuildLoop:
142
- for {
143
- select {
144
- case <- autostartInitiateCtx .Done ():
145
- return xerrors .Errorf ("timeout waiting for autostart build to be created: %w" ,autostartInitiateCtx .Err ())
146
- case updatedWorkspace ,ok := <- workspaceUpdates :
147
- if ! ok {
148
- r .cfg .Metrics .AddError (newUser .Username ,"workspace_updates_channel_closed" )
149
- return xerrors .Errorf ("workspace updates channel closed" )
150
- }
172
+ // The job has been created, but it might be pending
173
+ if r .autostartJobCreationLatency == 0 {
174
+ r .autostartJobCreationLatency = r .cfg .Clock .Since (autostartTime )
175
+ r .cfg .Metrics .RecordJobCreation (r .autostartJobCreationLatency ,newUser .Username ,workspace .Name )
176
+ }
151
177
152
- if updatedWorkspace .LatestBuild .ID != stopBuild .ID &&
153
- updatedWorkspace .LatestBuild .Transition == codersdk .WorkspaceTransitionStart {
154
- autoStartBuild = updatedWorkspace .LatestBuild
155
- logger .Info (ctx ,fmt .Sprintf ("autostart build created with ID %s" ,autoStartBuild .ID ))
156
- break waitNewBuildLoop
178
+ if ws .LatestBuild .Job .Status == codersdk .ProvisionerJobRunning ||
179
+ ws .LatestBuild .Job .Status == codersdk .ProvisionerJobSucceeded {
180
+ // Job is no longer pending, but it might not have finished
181
+ if r .autostartJobAcquiredLatency == 0 {
182
+ r .autostartJobAcquiredLatency = r .cfg .Clock .Since (autostartTime )
183
+ r .cfg .Metrics .RecordJobAcquired (r .autostartJobAcquiredLatency ,newUser .Username ,workspace .Name )
157
184
}
185
+ return ws .LatestBuild .Job .Status == codersdk .ProvisionerJobSucceeded
158
186
}
159
- }
160
-
161
- logger .Info (ctx ,"waiting for autostart build to complete" )
162
- buildCompleteCtx ,cancel3 := context .WithTimeout (ctx ,r .cfg .WorkspaceJobTimeout )
163
- defer cancel3 ()
164
187
165
- err = workspacebuild .WaitForBuild (buildCompleteCtx ,logs ,newUserClient ,autoStartBuild .ID )
188
+ return false
189
+ })
166
190
if err != nil {
167
191
r .cfg .Metrics .AddError (newUser .Username ,"wait_for_autostart_build" )
168
- return xerrors .Errorf ("wait for autostart build tocomplete : %w" ,err )
192
+ return xerrors .Errorf ("timeout waiting for autostart build tobe created : %w" ,err )
169
193
}
170
194
171
- r .autostartDuration = r .cfg .Clock .Since (autoStartTime )
172
- logger .Info (ctx ,fmt .Sprintf ("workspace %q autostarted successfully" ,workspace .Name ))
195
+ r .autostartTotalLatency = r .cfg .Clock .Since (autostartTime )
173
196
174
- logger .Info (ctx ,fmt .Sprintf ("autostart completed in %v" ,r .autostartDuration ))
175
- r .cfg .Metrics .RecordCompletion (r .autostartDuration ,newUser .Username ,workspace .Name )
197
+ logger .Info (ctx ,fmt .Sprintf ("autostart completed in %v" ,r .autostartTotalLatency ))
198
+ r .cfg .Metrics .RecordCompletion (r .autostartTotalLatency ,newUser .Username ,workspace .Name )
176
199
177
200
return nil
178
201
}
179
202
203
+ func waitForWorkspaceUpdate (ctx context.Context ,logger slog.Logger ,updates <- chan codersdk.Workspace ,shouldBreak func (codersdk.Workspace )bool )error {
204
+ for {
205
+ select {
206
+ case <- ctx .Done ():
207
+ return ctx .Err ()
208
+ case updatedWorkspace ,ok := <- updates :
209
+ if ! ok {
210
+ return xerrors .New ("workspace updates channel closed" )
211
+ }
212
+ logger .Debug (ctx ,"received workspace update" ,slog .F ("update" ,updatedWorkspace ))
213
+ if shouldBreak (updatedWorkspace ) {
214
+ return nil
215
+ }
216
+ }
217
+ }
218
+ }
219
+
180
220
func (r * Runner )Cleanup (ctx context.Context ,id string ,logs io.Writer )error {
181
221
if r .workspacebuildRunner != nil {
182
222
_ ,_ = fmt .Fprintln (logs ,"Cleaning up workspace..." )
@@ -196,11 +236,15 @@ func (r *Runner) Cleanup(ctx context.Context, id string, logs io.Writer) error {
196
236
}
197
237
198
238
const (
199
- AutostartLatencyMetric = "autostart_latency_seconds"
239
+ AutostartTotalLatencyMetric = "autostart_total_latency_seconds"
240
+ AutostartJobCreationLatencyMetric = "autostart_job_creation_latency_seconds"
241
+ AutostartJobAcquiredLatencyMetric = "autostart_job_acquired_latency_seconds"
200
242
)
201
243
202
244
func (r * Runner )GetMetrics ()map [string ]any {
203
245
return map [string ]any {
204
- AutostartLatencyMetric :r .autostartDuration .Seconds (),
246
+ AutostartTotalLatencyMetric :r .autostartTotalLatency .Seconds (),
247
+ AutostartJobCreationLatencyMetric :r .autostartJobCreationLatency .Seconds (),
248
+ AutostartJobAcquiredLatencyMetric :r .autostartJobAcquiredLatency .Seconds (),
205
249
}
206
250
}