@@ -122,6 +122,7 @@ func New(options Options) io.Closer {
122
122
logDir :options .LogDir ,
123
123
tempDir :options .TempDir ,
124
124
lifecycleUpdate :make (chan struct {},1 ),
125
+ lifecycleReported :make (chan codersdk.WorkspaceAgentLifecycle ,1 ),
125
126
connStatsChan :make (chan * agentsdk.Stats ,1 ),
126
127
}
127
128
a .init (ctx )
@@ -150,9 +151,10 @@ type agent struct {
150
151
sessionToken atomic.Pointer [string ]
151
152
sshServer * ssh.Server
152
153
153
- lifecycleUpdate chan struct {}
154
- lifecycleMu sync.Mutex // Protects following.
155
- lifecycleState codersdk.WorkspaceAgentLifecycle
154
+ lifecycleUpdate chan struct {}
155
+ lifecycleReported chan codersdk.WorkspaceAgentLifecycle
156
+ lifecycleMu sync.RWMutex // Protects following.
157
+ lifecycleState codersdk.WorkspaceAgentLifecycle
156
158
157
159
network * tailnet.Conn
158
160
connStatsChan chan * agentsdk.Stats
@@ -205,9 +207,9 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
205
207
}
206
208
207
209
for r := retry .New (time .Second ,15 * time .Second );r .Wait (ctx ); {
208
- a .lifecycleMu .Lock ()
210
+ a .lifecycleMu .RLock ()
209
211
state := a .lifecycleState
210
- a .lifecycleMu .Unlock ()
212
+ a .lifecycleMu .RUnlock ()
211
213
212
214
if state == lastReported {
213
215
break
@@ -220,6 +222,11 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
220
222
})
221
223
if err == nil {
222
224
lastReported = state
225
+ select {
226
+ case a .lifecycleReported <- state :
227
+ case <- a .lifecycleReported :
228
+ a .lifecycleReported <- state
229
+ }
223
230
break
224
231
}
225
232
if xerrors .Is (err ,context .Canceled )|| xerrors .Is (err ,context .DeadlineExceeded ) {
@@ -231,13 +238,20 @@ func (a *agent) reportLifecycleLoop(ctx context.Context) {
231
238
}
232
239
}
233
240
241
+ // setLifecycle sets the lifecycle state and notifies the lifecycle loop.
242
+ // The state is only updated if it's a valid state transition.
234
243
func (a * agent )setLifecycle (ctx context.Context ,state codersdk.WorkspaceAgentLifecycle ) {
235
244
a .lifecycleMu .Lock ()
236
- defer a .lifecycleMu .Unlock ()
237
-
238
- a .logger .Debug (ctx ,"set lifecycle state" ,slog .F ("state" ,state ),slog .F ("previous" ,a .lifecycleState ))
239
-
245
+ lastState := a .lifecycleState
246
+ if slices .Index (codersdk .WorkspaceAgentLifecycleOrder ,lastState )> slices .Index (codersdk .WorkspaceAgentLifecycleOrder ,state ) {
247
+ a .logger .Warn (ctx ,"attempted to set lifecycle state to a previous state" ,slog .F ("last" ,lastState ),slog .F ("state" ,state ))
248
+ a .lifecycleMu .Unlock ()
249
+ return
250
+ }
240
251
a .lifecycleState = state
252
+ a .logger .Debug (ctx ,"set lifecycle state" ,slog .F ("state" ,state ),slog .F ("last" ,lastState ))
253
+ a .lifecycleMu .Unlock ()
254
+
241
255
select {
242
256
case a .lifecycleUpdate <- struct {}{}:
243
257
default :
@@ -297,9 +311,10 @@ func (a *agent) run(ctx context.Context) error {
297
311
}
298
312
}
299
313
314
+ lifecycleState := codersdk .WorkspaceAgentLifecycleReady
300
315
scriptDone := make (chan error ,1 )
301
316
scriptStart := time .Now ()
302
- err : =a .trackConnGoroutine (func () {
317
+ err = a .trackConnGoroutine (func () {
303
318
defer close (scriptDone )
304
319
scriptDone <- a .runStartupScript (ctx ,metadata .StartupScript )
305
320
})
@@ -327,16 +342,17 @@ func (a *agent) run(ctx context.Context) error {
327
342
if errors .Is (err ,context .Canceled ) {
328
343
return
329
344
}
330
- execTime := time .Since (scriptStart )
331
- lifecycleStatus := codersdk .WorkspaceAgentLifecycleReady
332
- if err != nil {
333
- a .logger .Warn (ctx ,"startup script failed" ,slog .F ("execution_time" ,execTime ),slog .Error (err ))
334
- lifecycleStatus = codersdk .WorkspaceAgentLifecycleStartError
335
- }else {
336
- a .logger .Info (ctx ,"startup script completed" ,slog .F ("execution_time" ,execTime ))
345
+ // Only log if there was a startup script.
346
+ if metadata .StartupScript != "" {
347
+ execTime := time .Since (scriptStart )
348
+ if err != nil {
349
+ a .logger .Warn (ctx ,"startup script failed" ,slog .F ("execution_time" ,execTime ),slog .Error (err ))
350
+ lifecycleState = codersdk .WorkspaceAgentLifecycleStartError
351
+ }else {
352
+ a .logger .Info (ctx ,"startup script completed" ,slog .F ("execution_time" ,execTime ))
353
+ }
337
354
}
338
-
339
- a .setLifecycle (ctx ,lifecycleStatus )
355
+ a .setLifecycle (ctx ,lifecycleState )
340
356
}()
341
357
}
342
358
@@ -604,14 +620,22 @@ func (a *agent) runCoordinator(ctx context.Context, network *tailnet.Conn) error
604
620
}
605
621
606
622
func (a * agent )runStartupScript (ctx context.Context ,script string )error {
623
+ return a .runScript (ctx ,"startup" ,script )
624
+ }
625
+
626
+ func (a * agent )runShutdownScript (ctx context.Context ,script string )error {
627
+ return a .runScript (ctx ,"shutdown" ,script )
628
+ }
629
+
630
+ func (a * agent )runScript (ctx context.Context ,lifecycle ,script string )error {
607
631
if script == "" {
608
632
return nil
609
633
}
610
634
611
- a .logger .Info (ctx ,"runningstartup script" ,slog .F ("script" ,script ))
612
- writer ,err := a .filesystem .OpenFile (filepath .Join (a .logDir ,"coder-startup -script.log" ),os .O_CREATE | os .O_RDWR ,0o600 )
635
+ a .logger .Info (ctx ,"running script" , slog . F ( "lifecycle" , lifecycle ) ,slog .F ("script" ,script ))
636
+ writer ,err := a .filesystem .OpenFile (filepath .Join (a .logDir ,fmt . Sprintf ( "coder-%s -script.log" , lifecycle ) ),os .O_CREATE | os .O_RDWR ,0o600 )
613
637
if err != nil {
614
- return xerrors .Errorf ("openstartup script log file: %w" ,err )
638
+ return xerrors .Errorf ("open%s script log file: %w" , lifecycle ,err )
615
639
}
616
640
defer func () {
617
641
_ = writer .Close ()
@@ -772,7 +796,7 @@ func (a *agent) createCommand(ctx context.Context, rawCommand string, env []stri
772
796
773
797
rawMetadata := a .metadata .Load ()
774
798
if rawMetadata == nil {
775
- return nil ,xerrors .Errorf ("no metadata was provided: %w" , err )
799
+ return nil ,xerrors .Errorf ("no metadata was provided" )
776
800
}
777
801
metadata ,valid := rawMetadata .(agentsdk.Metadata )
778
802
if ! valid {
@@ -1296,13 +1320,73 @@ func (a *agent) Close() error {
1296
1320
if a .isClosed () {
1297
1321
return nil
1298
1322
}
1323
+
1324
+ ctx := context .Background ()
1325
+ a .setLifecycle (ctx ,codersdk .WorkspaceAgentLifecycleShuttingDown )
1326
+
1327
+ lifecycleState := codersdk .WorkspaceAgentLifecycleOff
1328
+ if metadata ,ok := a .metadata .Load ().(agentsdk.Metadata );ok && metadata .ShutdownScript != "" {
1329
+ scriptDone := make (chan error ,1 )
1330
+ scriptStart := time .Now ()
1331
+ go func () {
1332
+ defer close (scriptDone )
1333
+ scriptDone <- a .runShutdownScript (ctx ,metadata .ShutdownScript )
1334
+ }()
1335
+
1336
+ var timeout <- chan time.Time
1337
+ // If timeout is zero, an older version of the coder
1338
+ // provider was used. Otherwise a timeout is always > 0.
1339
+ if metadata .ShutdownScriptTimeout > 0 {
1340
+ t := time .NewTimer (metadata .ShutdownScriptTimeout )
1341
+ defer t .Stop ()
1342
+ timeout = t .C
1343
+ }
1344
+
1345
+ var err error
1346
+ select {
1347
+ case err = <- scriptDone :
1348
+ case <- timeout :
1349
+ a .logger .Warn (ctx ,"shutdown script timed out" )
1350
+ a .setLifecycle (ctx ,codersdk .WorkspaceAgentLifecycleShutdownTimeout )
1351
+ err = <- scriptDone // The script can still complete after a timeout.
1352
+ }
1353
+ execTime := time .Since (scriptStart )
1354
+ if err != nil {
1355
+ a .logger .Warn (ctx ,"shutdown script failed" ,slog .F ("execution_time" ,execTime ),slog .Error (err ))
1356
+ lifecycleState = codersdk .WorkspaceAgentLifecycleShutdownError
1357
+ }else {
1358
+ a .logger .Info (ctx ,"shutdown script completed" ,slog .F ("execution_time" ,execTime ))
1359
+ }
1360
+ }
1361
+
1362
+ // Set final state and wait for it to be reported because context
1363
+ // cancellation will stop the report loop.
1364
+ a .setLifecycle (ctx ,lifecycleState )
1365
+
1366
+ // Wait for the lifecycle to be reported, but don't wait forever so
1367
+ // that we don't break user expectations.
1368
+ ctx ,cancel := context .WithTimeout (ctx ,5 * time .Second )
1369
+ defer cancel ()
1370
+ lifecycleWaitLoop:
1371
+ for {
1372
+ select {
1373
+ case <- ctx .Done ():
1374
+ break lifecycleWaitLoop
1375
+ case s := <- a .lifecycleReported :
1376
+ if s == lifecycleState {
1377
+ break lifecycleWaitLoop
1378
+ }
1379
+ }
1380
+ }
1381
+
1299
1382
close (a .closed )
1300
1383
a .closeCancel ()
1384
+ _ = a .sshServer .Close ()
1301
1385
if a .network != nil {
1302
1386
_ = a .network .Close ()
1303
1387
}
1304
- _ = a .sshServer .Close ()
1305
1388
a .connCloseWait .Wait ()
1389
+
1306
1390
return nil
1307
1391
}
1308
1392