- Notifications
You must be signed in to change notification settings - Fork907
fix(agent): ensure SSH server shutdown with process groups#17227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes fromall commits
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -582,6 +582,12 @@ func (s *Server) sessionStart(logger slog.Logger, session ssh.Session, env []str | ||
func (s *Server) startNonPTYSession(logger slog.Logger, session ssh.Session, magicTypeLabel string, cmd *exec.Cmd) error { | ||
s.metrics.sessionsTotal.WithLabelValues(magicTypeLabel, "no").Add(1) | ||
// Create a process group and send SIGHUP to child processes, | ||
// otherwise context cancellation will not propagate properly | ||
// and SSH server close may be delayed. | ||
cmd.SysProcAttr = cmdSysProcAttr() | ||
cmd.Cancel = cmdCancel(session.Context(), logger, cmd) | ||
cmd.Stdout = session | ||
cmd.Stderr = session.Stderr() | ||
// This blocks forever until stdin is received if we don't | ||
@@ -926,7 +932,12 @@ func (s *Server) CreateCommand(ctx context.Context, script string, env []string, | ||
// Serve starts the server to handle incoming connections on the provided listener. | ||
// It returns an error if no host keys are set or if there is an issue accepting connections. | ||
func (s *Server) Serve(l net.Listener) (retErr error) { | ||
// Ensure we're not mutating HostSigners as we're reading it. | ||
s.mu.RLock() | ||
noHostKeys := len(s.srv.HostSigners) == 0 | ||
s.mu.RUnlock() | ||
if noHostKeys { | ||
return xerrors.New("no host keys set") | ||
} | ||
@@ -1054,43 +1065,72 @@ func (s *Server) Close() error { | ||
} | ||
s.closing = make(chan struct{}) | ||
ctx := context.Background() | ||
s.logger.Debug(ctx, "closing server") | ||
// Stop accepting new connections. | ||
s.logger.Debug(ctx, "closing all active listeners", slog.F("count", len(s.listeners))) | ||
for l := range s.listeners { | ||
_ = l.Close() | ||
} | ||
// Close all active sessions to gracefully | ||
// terminate client connections. | ||
s.logger.Debug(ctx, "closing all active sessions", slog.F("count", len(s.sessions))) | ||
for ss := range s.sessions { | ||
// We call Close on the underlying channel here because we don't | ||
// want to send an exit status to the client (via Exit()). | ||
// Typically OpenSSH clients will return 255 as the exit status. | ||
_ = ss.Close() | ||
} | ||
s.logger.Debug(ctx, "closing all active connections", slog.F("count", len(s.conns))) | ||
for c := range s.conns { | ||
_ = c.Close() | ||
} | ||
s.logger.Debug(ctx, "closingSSH server") | ||
err := s.srv.Close() | ||
s.mu.Unlock() | ||
s.logger.Debug(ctx, "waiting for all goroutines to exit") | ||
s.wg.Wait() // Wait for all goroutines to exit. | ||
s.mu.Lock() | ||
close(s.closing) | ||
s.closing = nil | ||
s.mu.Unlock() | ||
s.logger.Debug(ctx, "closing server done") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. suggestion: log elapsed time for closing | ||
return err | ||
} | ||
// Shutdown stops accepting new connections. The current implementation | ||
// calls Close() for simplicity instead of waiting for existing | ||
// connections to close. If the context times out, Shutdown will return | ||
// but Close() may not have completed. | ||
func (s *Server) Shutdown(ctx context.Context) error { | ||
ch := make(chan error, 1) | ||
go func() { | ||
// TODO(mafredri): Implement shutdown, SIGHUP running commands, etc. | ||
// For now we just close the server. | ||
ch <- s.Close() | ||
}() | ||
var err error | ||
select { | ||
case <-ctx.Done(): | ||
err = ctx.Err() | ||
case err = <-ch: | ||
} | ||
// Re-check for context cancellation precedence. | ||
if ctx.Err() != nil { | ||
err = ctx.Err() | ||
} | ||
if err != nil { | ||
return xerrors.Errorf("close server: %w", err) | ||
} | ||
return nil | ||
} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
//go:build !windows | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Review: These are copy-pasta from agentscripts package. | ||
package agentssh | ||
import ( | ||
"context" | ||
"os/exec" | ||
"syscall" | ||
"cdr.dev/slog" | ||
) | ||
func cmdSysProcAttr() *syscall.SysProcAttr { | ||
return &syscall.SysProcAttr{ | ||
Setsid: true, | ||
} | ||
} | ||
func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error { | ||
return func() error { | ||
logger.Debug(ctx, "cmdCancel: sending SIGHUP to process and children", slog.F("pid", cmd.Process.Pid)) | ||
johnstcn marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
return syscall.Kill(-cmd.Process.Pid, syscall.SIGHUP) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package agentssh | ||
import ( | ||
"context" | ||
"os" | ||
"os/exec" | ||
"syscall" | ||
"cdr.dev/slog" | ||
) | ||
func cmdSysProcAttr() *syscall.SysProcAttr { | ||
return &syscall.SysProcAttr{} | ||
} | ||
func cmdCancel(ctx context.Context, logger slog.Logger, cmd *exec.Cmd) func() error { | ||
return func() error { | ||
logger.Debug(ctx, "cmdCancel: sending interrupt to process", slog.F("pid", cmd.Process.Pid)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. 📋🍝 suggestion: check that | ||
return cmd.Process.Signal(os.Interrupt) | ||
} | ||
} |
Uh oh!
There was an error while loading.Please reload this page.