@@ -162,8 +162,15 @@ func (rpty *screenReconnectingPTY) Attach(ctx context.Context, _ string, conn ne
162
162
163
163
go heartbeat (ctx ,rpty .timer ,rpty .timeout )
164
164
165
- ptty ,process ,err := rpty .doAttach (ctx ,height ,width ,logger )
165
+ ptty ,process ,err := rpty .doAttach (ctx ,conn , height ,width ,logger )
166
166
if err != nil {
167
+ if errors .Is (err ,context .Canceled ) {
168
+ // Likely the process was too short-lived and canceled the version command.
169
+ // TODO: Is it worth distinguishing between that and a cancel from the
170
+ // Attach() caller? Additionally, since this could also happen if
171
+ // the command was invalid, should we check the process's exit code?
172
+ return nil
173
+ }
167
174
return err
168
175
}
169
176
@@ -180,53 +187,6 @@ func (rpty *screenReconnectingPTY) Attach(ctx context.Context, _ string, conn ne
180
187
}
181
188
}()
182
189
183
- // Pipe pty -> conn.
184
- // We do not need to separately monitor for the process exiting. When it
185
- // exits, our ptty.OutputReader() will return EOF after reading all process
186
- // output.
187
- go func () {
188
- // Close the connection when the process exits. Log only for debugging
189
- // since the connection might have already closed on its own.
190
- defer func () {
191
- err := conn .Close ()
192
- if err != nil {
193
- logger .Debug (ctx ,"closed connection with error" ,slog .Error (err ))
194
- }
195
- }()
196
- buffer := make ([]byte ,1024 )
197
- for {
198
- read ,err := ptty .OutputReader ().Read (buffer )
199
- if err != nil {
200
- // When the PTY is closed, this is triggered.
201
- // Error is typically a benign EOF, so only log for debugging.
202
- if errors .Is (err ,io .EOF ) {
203
- logger .Debug (ctx ,"unable to read pty output; screen might have exited" ,slog .Error (err ))
204
- }else {
205
- logger .Warn (ctx ,"unable to read pty output; screen might have exited" ,slog .Error (err ))
206
- rpty .metrics .WithLabelValues ("screen_output_reader" ).Add (1 )
207
- }
208
- // The process might have died because the session itself died or it
209
- // might have been separately killed and the session is still up (for
210
- // example `exit` or we killed it when the connection closed). If the
211
- // session is still up we might leave the reconnecting pty in memory
212
- // around longer than it needs to be but it will eventually clean up
213
- // with the timer or context, or the next attach will respawn the screen
214
- // daemon which is fine too.
215
- break
216
- }
217
- part := buffer [:read ]
218
- _ ,err = conn .Write (part )
219
- if err != nil {
220
- // Connection might have been closed.
221
- if errors .Unwrap (err ).Error ()!= "endpoint is closed for send" {
222
- logger .Warn (ctx ,"error writing to active conn" ,slog .Error (err ))
223
- rpty .metrics .WithLabelValues ("screen_write" ).Add (1 )
224
- }
225
- break
226
- }
227
- }
228
- }()
229
-
230
190
// Pipe conn -> pty and block.
231
191
readConnLoop (ctx ,conn ,ptty ,rpty .metrics ,logger )
232
192
return nil
@@ -235,7 +195,7 @@ func (rpty *screenReconnectingPTY) Attach(ctx context.Context, _ string, conn ne
235
195
// doAttach spawns the screen client and starts the heartbeat. It exists
236
196
// separately only so we can defer the mutex unlock which is not possible in
237
197
// Attach since it blocks.
238
- func (rpty * screenReconnectingPTY )doAttach (ctx context.Context ,height ,width uint16 ,logger slog.Logger ) (pty.PTYCmd , pty.Process ,error ) {
198
+ func (rpty * screenReconnectingPTY )doAttach (ctx context.Context ,conn net. Conn , height ,width uint16 ,logger slog.Logger ) (pty.PTYCmd , pty.Process ,error ) {
239
199
// Ensure another attach does not come in and spawn a duplicate session.
240
200
rpty .mutex .Lock ()
241
201
defer rpty .mutex .Unlock ()
@@ -273,11 +233,63 @@ func (rpty *screenReconnectingPTY) doAttach(ctx context.Context, height, width u
273
233
return nil ,nil ,err
274
234
}
275
235
236
+ // This context lets us abort the version command if the process dies.
237
+ versionCtx ,versionCancel := context .WithCancel (ctx )
238
+ defer versionCancel ()
239
+
240
+ // Pipe pty -> conn and close the connection when the process exits.
241
+ // We do not need to separately monitor for the process exiting. When it
242
+ // exits, our ptty.OutputReader() will return EOF after reading all process
243
+ // output.
244
+ go func () {
245
+ defer versionCancel ()
246
+ defer func () {
247
+ err := conn .Close ()
248
+ if err != nil {
249
+ // Log only for debugging since the connection might have already closed
250
+ // on its own.
251
+ logger .Debug (ctx ,"closed connection with error" ,slog .Error (err ))
252
+ }
253
+ }()
254
+ buffer := make ([]byte ,1024 )
255
+ for {
256
+ read ,err := ptty .OutputReader ().Read (buffer )
257
+ if err != nil {
258
+ // When the PTY is closed, this is triggered.
259
+ // Error is typically a benign EOF, so only log for debugging.
260
+ if errors .Is (err ,io .EOF ) {
261
+ logger .Debug (ctx ,"unable to read pty output; screen might have exited" ,slog .Error (err ))
262
+ }else {
263
+ logger .Warn (ctx ,"unable to read pty output; screen might have exited" ,slog .Error (err ))
264
+ rpty .metrics .WithLabelValues ("screen_output_reader" ).Add (1 )
265
+ }
266
+ // The process might have died because the session itself died or it
267
+ // might have been separately killed and the session is still up (for
268
+ // example `exit` or we killed it when the connection closed). If the
269
+ // session is still up we might leave the reconnecting pty in memory
270
+ // around longer than it needs to be but it will eventually clean up
271
+ // with the timer or context, or the next attach will respawn the screen
272
+ // daemon which is fine too.
273
+ break
274
+ }
275
+ part := buffer [:read ]
276
+ _ ,err = conn .Write (part )
277
+ if err != nil {
278
+ // Connection might have been closed.
279
+ if errors .Unwrap (err ).Error ()!= "endpoint is closed for send" {
280
+ logger .Warn (ctx ,"error writing to active conn" ,slog .Error (err ))
281
+ rpty .metrics .WithLabelValues ("screen_write" ).Add (1 )
282
+ }
283
+ break
284
+ }
285
+ }
286
+ }()
287
+
276
288
// Version seems to be the only command without a side effect (other than
277
289
// making the version pop up briefly) so use it to wait for the session to
278
290
// come up. If we do not wait we could end up spawning multiple sessions with
279
291
// the same name.
280
- err = rpty .sendCommand (ctx ,"version" ,nil )
292
+ err = rpty .sendCommand (versionCtx ,"version" ,nil )
281
293
if err != nil {
282
294
closeErr := ptty .Close ()
283
295
if closeErr != nil {
@@ -298,8 +310,9 @@ func (rpty *screenReconnectingPTY) doAttach(ctx context.Context, height, width u
298
310
// command fails with an error matching anything in successErrors it will be
299
311
// considered a success state (for example "no session" when quitting and the
300
312
// session is already dead). The command will be retried until successful, the
301
- // timeout is reached, or the context ends in which case the context error is
302
- // returned together with the last error from the command.
313
+ // timeout is reached, or the context ends. A canceled context will return the
314
+ // canceled context's error as-is while a timed-out context returns together
315
+ // with the last error from the command.
303
316
func (rpty * screenReconnectingPTY )sendCommand (ctx context.Context ,command string ,successErrors []string )error {
304
317
ctx ,cancel := context .WithTimeout (ctx ,attachTimeout )
305
318
defer cancel ()
@@ -352,6 +365,9 @@ func (rpty *screenReconnectingPTY) sendCommand(ctx context.Context, command stri
352
365
for {
353
366
select {
354
367
case <- ctx .Done ():
368
+ if errors .Is (ctx .Err (),context .Canceled ) {
369
+ return ctx .Err ()
370
+ }
355
371
return errors .Join (ctx .Err (),lastErr )
356
372
case <- ticker .C :
357
373
if done := run ();done {