Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit895df54

Browse files
authored
fix: separate signals for passive, active, and forced shutdown (#12358)
* fix: separate signals for passive, active, and forced shutdown`SIGTERM`: Passive shutdown stopping provisioner daemons from accepting newjobs but waiting for existing jobs to successfully complete.`SIGINT` (old existing behavior): Notify provisioner daemons to cancel in-flight jobs, wait 5s for jobs to be exited, then force quit.`SIGKILL`: Untouched from before, will force-quit.* Revert dramatic signal changes* Rename* Fix shutdown behavior for provisioner daemons* Add test for graceful shutdown
1 parent2c947c1 commit895df54

18 files changed

+136
-35
lines changed

‎cli/agent.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
125125
args:=append(os.Args,"--no-reap")
126126
err:=reaper.ForkReap(
127127
reaper.WithExecArgs(args...),
128-
reaper.WithCatchSignals(InterruptSignals...),
128+
reaper.WithCatchSignals(StopSignals...),
129129
)
130130
iferr!=nil {
131131
logger.Error(ctx,"agent process reaper unable to fork",slog.Error(err))
@@ -144,7 +144,7 @@ func (r *RootCmd) workspaceAgent() *clibase.Cmd {
144144
// Note that we don't want to handle these signals in the
145145
// process that runs as PID 1, that's why we do this after
146146
// the reaper forked.
147-
ctx,stopNotify:=inv.SignalNotifyContext(ctx,InterruptSignals...)
147+
ctx,stopNotify:=inv.SignalNotifyContext(ctx,StopSignals...)
148148
deferstopNotify()
149149

150150
// DumpHandler does signal handling, so we call it after the

‎cli/exp_scaletest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -890,7 +890,7 @@ func (r *RootCmd) scaletestWorkspaceTraffic() *clibase.Cmd {
890890
Handler:func(inv*clibase.Invocation) (errerror) {
891891
ctx:=inv.Context()
892892

893-
notifyCtx,stop:=signal.NotifyContext(ctx,InterruptSignals...)// Checked later.
893+
notifyCtx,stop:=signal.NotifyContext(ctx,StopSignals...)// Checked later.
894894
deferstop()
895895
ctx=notifyCtx
896896

‎cli/externalauth.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fi
6565
Handler:func(inv*clibase.Invocation)error {
6666
ctx:=inv.Context()
6767

68-
ctx,stop:=inv.SignalNotifyContext(ctx,InterruptSignals...)
68+
ctx,stop:=inv.SignalNotifyContext(ctx,StopSignals...)
6969
deferstop()
7070

7171
client,err:=r.createAgentClient()

‎cli/gitaskpass.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func (r *RootCmd) gitAskpass() *clibase.Cmd {
2525
Handler:func(inv*clibase.Invocation)error {
2626
ctx:=inv.Context()
2727

28-
ctx,stop:=inv.SignalNotifyContext(ctx,InterruptSignals...)
28+
ctx,stop:=inv.SignalNotifyContext(ctx,StopSignals...)
2929
deferstop()
3030

3131
user,host,err:=gitauth.ParseAskpass(inv.Args[0])

‎cli/gitssh.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func (r *RootCmd) gitssh() *clibase.Cmd {
2929

3030
// Catch interrupt signals to ensure the temporary private
3131
// key file is cleaned up on most cases.
32-
ctx,stop:=inv.SignalNotifyContext(ctx,InterruptSignals...)
32+
ctx,stop:=inv.SignalNotifyContext(ctx,StopSignals...)
3333
deferstop()
3434

3535
// Early check so errors are reported immediately.

‎cli/server.go

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -337,16 +337,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
337337

338338
// Register signals early on so that graceful shutdown can't
339339
// be interrupted by additional signals. Note that we avoid
340-
// shadowing cancel() (from above) here becausenotifyStop()
340+
// shadowing cancel() (from above) here becausestopCancel()
341341
// restores default behavior for the signals. This protects
342342
// the shutdown sequence from abruptly terminating things
343343
// like: database migrations, provisioner work, workspace
344344
// cleanup in dev-mode, etc.
345345
//
346346
// To get out of a graceful shutdown, the user can send
347347
// SIGQUIT with ctrl+\ or SIGKILL with `kill -9`.
348-
notifyCtx,notifyStop:=inv.SignalNotifyContext(ctx,InterruptSignals...)
349-
defernotifyStop()
348+
stopCtx,stopCancel:=signalNotifyContext(ctx,inv,StopSignalsNoInterrupt...)
349+
deferstopCancel()
350+
interruptCtx,interruptCancel:=signalNotifyContext(ctx,inv,InterruptSignals...)
351+
deferinterruptCancel()
350352

351353
cacheDir:=vals.CacheDir.String()
352354
err=os.MkdirAll(cacheDir,0o700)
@@ -1028,13 +1030,18 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10281030
hangDetector.Start()
10291031
deferhangDetector.Close()
10301032

1033+
waitForProvisionerJobs:=false
10311034
// Currently there is no way to ask the server to shut
10321035
// itself down, so any exit signal will result in a non-zero
10331036
// exit of the server.
10341037
varexitErrerror
10351038
select {
1036-
case<-notifyCtx.Done():
1037-
exitErr=notifyCtx.Err()
1039+
case<-stopCtx.Done():
1040+
exitErr=stopCtx.Err()
1041+
waitForProvisionerJobs=true
1042+
_,_=io.WriteString(inv.Stdout,cliui.Bold("Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit"))
1043+
case<-interruptCtx.Done():
1044+
exitErr=interruptCtx.Err()
10381045
_,_=io.WriteString(inv.Stdout,cliui.Bold("Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit"))
10391046
case<-tunnelDone:
10401047
exitErr=xerrors.New("dev tunnel closed unexpectedly")
@@ -1082,7 +1089,16 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
10821089
deferwg.Done()
10831090

10841091
r.Verbosef(inv,"Shutting down provisioner daemon %d...",id)
1085-
err:=shutdownWithTimeout(provisionerDaemon.Shutdown,5*time.Second)
1092+
timeout:=5*time.Second
1093+
ifwaitForProvisionerJobs {
1094+
// It can last for a long time...
1095+
timeout=30*time.Minute
1096+
}
1097+
1098+
err:=shutdownWithTimeout(func(ctx context.Context)error {
1099+
// We only want to cancel active jobs if we aren't exiting gracefully.
1100+
returnprovisionerDaemon.Shutdown(ctx,!waitForProvisionerJobs)
1101+
},timeout)
10861102
iferr!=nil {
10871103
cliui.Errorf(inv.Stderr,"Failed to shut down provisioner daemon %d: %s\n",id,err)
10881104
return
@@ -2512,3 +2528,12 @@ func escapePostgresURLUserInfo(v string) (string, error) {
25122528

25132529
returnv,nil
25142530
}
2531+
2532+
funcsignalNotifyContext(ctx context.Context,inv*clibase.Invocation,sig...os.Signal) (context.Context, context.CancelFunc) {
2533+
// On Windows, some of our signal functions lack support.
2534+
// If we pass in no signals, we should just return the context as-is.
2535+
iflen(sig)==0 {
2536+
returncontext.WithCancel(ctx)
2537+
}
2538+
returninv.SignalNotifyContext(ctx,sig...)
2539+
}

‎cli/server_createadminuser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func (r *RootCmd) newCreateAdminUserCommand() *clibase.Cmd {
4747
logger=logger.Leveled(slog.LevelDebug)
4848
}
4949

50-
ctx,cancel:=inv.SignalNotifyContext(ctx,InterruptSignals...)
50+
ctx,cancel:=inv.SignalNotifyContext(ctx,StopSignals...)
5151
defercancel()
5252

5353
ifnewUserDBURL=="" {

‎cli/server_test.go

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"net/url"
2222
"os"
2323
"path/filepath"
24+
"reflect"
2425
"runtime"
2526
"strconv"
2627
"strings"
@@ -1605,7 +1606,7 @@ func TestServer_Production(t *testing.T) {
16051606
}
16061607

16071608
//nolint:tparallel,paralleltest // This test cannot be run in parallel due to signal handling.
1608-
funcTestServer_Shutdown(t*testing.T) {
1609+
funcTestServer_InterruptShutdown(t*testing.T) {
16091610
t.Skip("This test issues an interrupt signal which will propagate to the test runner.")
16101611

16111612
ifruntime.GOOS=="windows" {
@@ -1638,6 +1639,46 @@ func TestServer_Shutdown(t *testing.T) {
16381639
require.NoError(t,err)
16391640
}
16401641

1642+
funcTestServer_GracefulShutdown(t*testing.T) {
1643+
t.Parallel()
1644+
ifruntime.GOOS=="windows" {
1645+
// Sending interrupt signal isn't supported on Windows!
1646+
t.SkipNow()
1647+
}
1648+
ctx,cancelFunc:=context.WithCancel(context.Background())
1649+
defercancelFunc()
1650+
1651+
root,cfg:=clitest.New(t,
1652+
"server",
1653+
"--in-memory",
1654+
"--http-address",":0",
1655+
"--access-url","http://example.com",
1656+
"--provisioner-daemons","1",
1657+
"--cache-dir",t.TempDir(),
1658+
)
1659+
varstopFunc context.CancelFunc
1660+
root=root.WithTestSignalNotifyContext(t,func(parent context.Context,signals...os.Signal) (context.Context, context.CancelFunc) {
1661+
if!reflect.DeepEqual(cli.StopSignalsNoInterrupt,signals) {
1662+
returncontext.WithCancel(ctx)
1663+
}
1664+
varctx context.Context
1665+
ctx,stopFunc=context.WithCancel(parent)
1666+
returnctx,stopFunc
1667+
})
1668+
serverErr:=make(chanerror,1)
1669+
pty:=ptytest.New(t).Attach(root)
1670+
gofunc() {
1671+
serverErr<-root.WithContext(ctx).Run()
1672+
}()
1673+
_=waitAccessURL(t,cfg)
1674+
// It's fair to assume `stopFunc` isn't nil here, because the server
1675+
// has started and access URL is propagated.
1676+
stopFunc()
1677+
pty.ExpectMatch("waiting for provisioner jobs to complete")
1678+
err:=<-serverErr
1679+
require.NoError(t,err)
1680+
}
1681+
16411682
funcBenchmarkServerHelp(b*testing.B) {
16421683
// server --help is a good proxy for measuring the
16431684
// constant overhead of each command.

‎cli/signal_unix.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,23 @@ import (
77
"syscall"
88
)
99

10-
varInterruptSignals= []os.Signal{
10+
// StopSignals is the list of signals that are used for handling
11+
// shutdown behavior.
12+
varStopSignals= []os.Signal{
1113
os.Interrupt,
1214
syscall.SIGTERM,
1315
syscall.SIGHUP,
1416
}
17+
18+
// StopSignals is the list of signals that are used for handling
19+
// graceful shutdown behavior.
20+
varStopSignalsNoInterrupt= []os.Signal{
21+
syscall.SIGTERM,
22+
syscall.SIGHUP,
23+
}
24+
25+
// InterruptSignals is the list of signals that are used for handling
26+
// immediate shutdown behavior.
27+
varInterruptSignals= []os.Signal{
28+
os.Interrupt,
29+
}

‎cli/signal_windows.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,12 @@ import (
66
"os"
77
)
88

9-
varInterruptSignals= []os.Signal{os.Interrupt}
9+
varStopSignals= []os.Signal{
10+
os.Interrupt,
11+
}
12+
13+
varStopSignalsNoInterrupt= []os.Signal{}
14+
15+
varInterruptSignals= []os.Signal{
16+
os.Interrupt,
17+
}

‎cli/ssh.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func (r *RootCmd) ssh() *clibase.Cmd {
7373
// session can persist for up to 72 hours, since we set a long
7474
// timeout on the Agent side of the connection. In particular,
7575
// OpenSSH sends SIGHUP to terminate a proxy command.
76-
ctx,stop:=inv.SignalNotifyContext(inv.Context(),InterruptSignals...)
76+
ctx,stop:=inv.SignalNotifyContext(inv.Context(),StopSignals...)
7777
deferstop()
7878
ctx,cancel:=context.WithCancel(ctx)
7979
defercancel()

‎cli/templatepull_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ func TestTemplatePull_ToDir(t *testing.T) {
328328

329329
require.NoError(t,inv.Run())
330330

331-
// Validatebehaviour of choosing template name in the absence of an output path argument.
331+
// Validatebehavior of choosing template name in the absence of an output path argument.
332332
destPath:=actualDest
333333
ifdestPath=="" {
334334
destPath=template.Name

‎coderd/coderdtest/coderdtest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ func (c *provisionerdCloser) Close() error {
498498
c.closed=true
499499
ctx,cancel:=context.WithTimeout(context.Background(),testutil.WaitShort)
500500
defercancel()
501-
shutdownErr:=c.d.Shutdown(ctx)
501+
shutdownErr:=c.d.Shutdown(ctx,true)
502502
closeErr:=c.d.Close()
503503
ifshutdownErr!=nil {
504504
returnshutdownErr

‎enterprise/cli/provisionerdaemons.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,10 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
8888
ctx,cancel:=context.WithCancel(inv.Context())
8989
defercancel()
9090

91-
notifyCtx,notifyStop:=inv.SignalNotifyContext(ctx,agpl.InterruptSignals...)
92-
defernotifyStop()
91+
stopCtx,stopCancel:=inv.SignalNotifyContext(ctx,agpl.StopSignalsNoInterrupt...)
92+
deferstopCancel()
93+
interruptCtx,interruptCancel:=inv.SignalNotifyContext(ctx,agpl.InterruptSignals...)
94+
deferinterruptCancel()
9395

9496
tags,err:=agpl.ParseProvisionerTags(rawTags)
9597
iferr!=nil {
@@ -212,10 +214,17 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
212214
Metrics:metrics,
213215
})
214216

217+
waitForProvisionerJobs:=false
215218
varexitErrerror
216219
select {
217-
case<-notifyCtx.Done():
218-
exitErr=notifyCtx.Err()
220+
case<-stopCtx.Done():
221+
exitErr=stopCtx.Err()
222+
_,_=fmt.Fprintln(inv.Stdout,cliui.Bold(
223+
"Stop caught, waiting for provisioner jobs to complete and gracefully exiting. Use ctrl+\\ to force quit",
224+
))
225+
waitForProvisionerJobs=true
226+
case<-interruptCtx.Done():
227+
exitErr=interruptCtx.Err()
219228
_,_=fmt.Fprintln(inv.Stdout,cliui.Bold(
220229
"Interrupt caught, gracefully exiting. Use ctrl+\\ to force quit",
221230
))
@@ -225,7 +234,7 @@ func (r *RootCmd) provisionerDaemonStart() *clibase.Cmd {
225234
cliui.Errorf(inv.Stderr,"Unexpected error, shutting down server: %s\n",exitErr)
226235
}
227236

228-
err=srv.Shutdown(ctx)
237+
err=srv.Shutdown(ctx,waitForProvisionerJobs)
229238
iferr!=nil {
230239
returnxerrors.Errorf("shutdown: %w",err)
231240
}

‎enterprise/cli/proxyserver.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ func (r *RootCmd) proxyServer() *clibase.Cmd {
142142
//
143143
// To get out of a graceful shutdown, the user can send
144144
// SIGQUIT with ctrl+\ or SIGKILL with `kill -9`.
145-
notifyCtx,notifyStop:=inv.SignalNotifyContext(ctx,cli.InterruptSignals...)
145+
notifyCtx,notifyStop:=inv.SignalNotifyContext(ctx,cli.StopSignals...)
146146
defernotifyStop()
147147

148148
// Clean up idle connections at the end, e.g.

‎enterprise/coderd/provisionerdaemons_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ func TestProvisionerDaemonServe(t *testing.T) {
441441
build:=coderdtest.AwaitWorkspaceBuildJobCompleted(t,client,workspace.LatestBuild.ID)
442442
require.Equal(t,codersdk.WorkspaceStatusRunning,build.Status)
443443

444-
err=pd.Shutdown(ctx)
444+
err=pd.Shutdown(ctx,false)
445445
require.NoError(t,err)
446446
err=terraformServer.Close()
447447
require.NoError(t,err)

‎provisionerd/provisionerd.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,15 +474,18 @@ func (p *Server) isClosed() bool {
474474
}
475475
}
476476

477-
// Shutdown triggers a graceful exit of each registered provisioner.
478-
func (p*Server)Shutdown(ctx context.Context)error {
477+
// Shutdown gracefully exists with the option to cancel the active job.
478+
// If false, it will wait for the job to complete.
479+
//
480+
//nolint:revive
481+
func (p*Server)Shutdown(ctx context.Context,cancelActiveJobbool)error {
479482
p.mutex.Lock()
480483
p.opts.Logger.Info(ctx,"attempting graceful shutdown")
481484
if!p.shuttingDownB {
482485
close(p.shuttingDownCh)
483486
p.shuttingDownB=true
484487
}
485-
ifp.activeJob!=nil {
488+
ifcancelActiveJob&&p.activeJob!=nil {
486489
p.activeJob.Cancel()
487490
}
488491
p.mutex.Unlock()

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp