@@ -48,6 +48,7 @@ import java.net.URI
48
48
import java.time.Duration
49
49
import java.time.LocalDateTime
50
50
import java.time.format.DateTimeFormatter
51
+ import java.util.concurrent.TimeUnit
51
52
import java.util.concurrent.TimeoutException
52
53
import kotlin.coroutines.resume
53
54
import kotlin.coroutines.resumeWithException
@@ -227,16 +228,9 @@ class CoderRemoteConnectionHandle {
227
228
228
229
// Wait for the IDE to come up.
229
230
indicator.text= " Waiting for${workspace.ideName} backend..."
230
- var status: UnattendedHostStatus ? = null
231
231
val remoteProjectPath= accessor.makeRemotePath(ShellArgument .PlainText (workspace.projectPath))
232
232
val logsDir= accessor.getLogsDir(workspace.ideProduct.productCode, remoteProjectPath)
233
- while (lifetime.status== LifetimeStatus .Alive ) {
234
- status= ensureIDEBackend(accessor, workspace, ideDir, remoteProjectPath, logsDir, lifetime,null )
235
- if (! status?.joinLink.isNullOrBlank()) {
236
- break
237
- }
238
- delay(5000 )
239
- }
233
+ var status= ensureIDEBackend(accessor, workspace, ideDir, remoteProjectPath, logsDir, lifetime,null )
240
234
241
235
// We wait for non-null, so this only happens on cancellation.
242
236
val joinLink= status?.joinLink
@@ -302,6 +296,7 @@ class CoderRemoteConnectionHandle {
302
296
}
303
297
// Continue once the client is present.
304
298
handle.onClientPresenceChanged.advise(lifetime) {
299
+ logger.info(" ${workspace.ideName} client to${workspace.hostname} presence:${handle.clientPresent} " )
305
300
if (handle.clientPresent&& continuation.isActive) {
306
301
continuation.resume(true )
307
302
}
@@ -437,8 +432,8 @@ class CoderRemoteConnectionHandle {
437
432
}
438
433
439
434
/* *
440
- * Ensure the backend is started.Status and/or links may be null if the
441
- *backend has not started .
435
+ * Ensure the backend is started.It will not return until a join link is
436
+ *received or the lifetime expires .
442
437
*/
443
438
private suspend fun ensureIDEBackend (
444
439
accessor : HighLevelHostAccessor ,
@@ -449,41 +444,67 @@ class CoderRemoteConnectionHandle {
449
444
lifetime : LifetimeDefinition ,
450
445
currentStatus : UnattendedHostStatus ? ,
451
446
):UnattendedHostStatus ? {
452
- val details= " ${workspace.hostname} :${ideDir.toRawString()} , project=${remoteProjectPath.toRawString()} "
453
- return try {
454
- if (currentStatus?.appPid!= null &&
455
- ! currentStatus.joinLink.isNullOrBlank()&&
456
- accessor.isPidAlive(currentStatus.appPid.toInt())
457
- ) {
458
- // If the PID is alive, assume the join link we have is still
459
- // valid. The join link seems to change even if it is the same
460
- // backend running, so if we always fetched the link the client
461
- // would relaunch over and over.
447
+ val details= " $${workspace.hostname} :${ideDir.toRawString()} , project=${remoteProjectPath.toRawString()} "
448
+
449
+ // Check if the current IDE is alive.
450
+ if (currentStatus!= null ) {
451
+ val isAlive= try {
452
+ val isAlive= accessor.isPidAlive(currentStatus.appPid.toInt())
453
+ logger.info(" ${workspace.ideName} status: pid=${currentStatus.appPid} , alive=$isAlive " )
454
+ isAlive
455
+ }catch (ex: Exception ) {
456
+ logger.info(" Failed to check if${workspace.ideName} is alive on$details : pid=${currentStatus.appPid} " , ex)
457
+ false
458
+ }
459
+ if (isAlive) {
460
+ // Use the current status and join link.
462
461
return currentStatus
462
+ }else {
463
+ logger.info(" Relaunching${workspace.ideName} since it is not alive..." )
463
464
}
465
+ }else {
466
+ logger.info(" Launching${workspace.ideName} for the first time on${workspace.hostname} ..." )
467
+ }
464
468
465
- // See if there is already a backend running. Weirdly, there is
466
- // always a PID, even if there is no backend running, and
467
- // backendUnresponsive is always false, but the links are null so
468
- // hopefully that is an accurate indicator that the IDE is up.
469
- val status= accessor.getHostIdeStatus(ideDir, remoteProjectPath)
470
- if (! status.joinLink.isNullOrBlank()) {
471
- logger.info(" Found existing${workspace.ideName} backend on$details " )
472
- return status
473
- }
469
+ // If the PID is not alive, spawn a new backend. This may not be
470
+ // idempotent, so only call if we are really sure we need to.
471
+ accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
474
472
475
- // Otherwise, spawn a new backend. This does not seem to spawn a
476
- // second backend if one is already running, yet it does somehow
477
- // cause a second client to launch. So only run this if we are
478
- // really sure we have to launch a new backend.
479
- logger.info(" Starting${workspace.ideName} backend on$details " )
480
- accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
481
- // Get the newly spawned PID and join link.
482
- return accessor.getHostIdeStatus(ideDir, remoteProjectPath)
483
- }catch (ex: Exception ) {
484
- logger.info(" Failed to get${workspace.ideName} status from$details " , ex)
485
- currentStatus
473
+ // Get the newly spawned PID and join link.
474
+ var attempts= 0
475
+ val maxAttempts= 6
476
+ val wait= TimeUnit .SECONDS .toMillis(5 )
477
+ while (lifetime.status== LifetimeStatus .Alive ) {
478
+ try {
479
+ attempts++
480
+ val status= accessor.getHostIdeStatus(ideDir, remoteProjectPath)
481
+ if (! status.joinLink.isNullOrBlank()) {
482
+ logger.info(" Found join link for${workspace.ideName} ; proceeding to connect: pid=${status.appPid} " )
483
+ return status
484
+ }
485
+ // If we did not get a join link, see if the IDE is alive in
486
+ // case it died and we need to respawn.
487
+ val isAlive= status.appPid> 0 && accessor.isPidAlive(status.appPid.toInt())
488
+ logger.info(" ${workspace.ideName} status: pid=${status.appPid} , alive=$isAlive , unresponsive=${status.backendUnresponsive} , attempt=$attempts " )
489
+ // It is not clear whether the PID can be trusted because we get
490
+ // one even when there is no backend at all. For now give it
491
+ // some time and if it is still dead, only then try to respawn.
492
+ if (! isAlive&& attempts>= maxAttempts) {
493
+ logger.info(" ${workspace.ideName} is still not alive after$attempts checks, respawning backend and waiting$wait ms to try again" )
494
+ accessor.startHostIdeInBackgroundAndDetach(lifetime, ideDir, remoteProjectPath, logsDir)
495
+ attempts= 0
496
+ }else {
497
+ logger.info(" No join link found in status; waiting$wait ms to try again" )
498
+ }
499
+ }catch (ex: Exception ) {
500
+ logger.info(" Failed to get${workspace.ideName} status from$details ; waiting$wait ms to try again" , ex)
501
+ }
502
+ delay(wait)
486
503
}
504
+
505
+ // This means the lifetime is no longer alive.
506
+ logger.info(" Connection to${workspace.ideName} on$details aborted by user" )
507
+ return null
487
508
}
488
509
489
510
companion object {