@@ -249,6 +249,17 @@ static pid_t StartupPID = 0,
249249PgStatPID = 0 ,
250250SysLoggerPID = 0 ;
251251
252+ /* Startup process's status */
253+ typedef enum
254+ {
255+ STARTUP_NOT_RUNNING ,
256+ STARTUP_RUNNING ,
257+ STARTUP_SIGNALED ,/* we sent it a SIGQUIT or SIGKILL */
258+ STARTUP_CRASHED
259+ }StartupStatusEnum ;
260+
261+ static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING ;
262+
252263/* Startup/shutdown state */
253264#define NoShutdown 0
254265#define SmartShutdown 1
@@ -258,7 +269,6 @@ static pid_t StartupPID = 0,
258269static int Shutdown = NoShutdown ;
259270
260271static bool FatalError = false;/* T if recovering from backend crash */
261- static bool RecoveryError = false;/* T if WAL recovery failed */
262272
263273/*
264274 * We use a simple state machine to control startup, shutdown, and
@@ -301,8 +311,6 @@ static bool RecoveryError = false;/* T if WAL recovery failed */
301311 * states, nor in PM_SHUTDOWN states (because we don't enter those states
302312 * when trying to recover from a crash). It can be true in PM_STARTUP state,
303313 * because we don't clear it until we've successfully started WAL redo.
304- * Similarly, RecoveryError means that we have crashed during recovery, and
305- * should not try to restart.
306314 */
307315typedef enum
308316{
@@ -1246,6 +1254,7 @@ PostmasterMain(int argc, char *argv[])
12461254 */
12471255StartupPID = StartupDataBase ();
12481256Assert (StartupPID != 0 );
1257+ StartupStatus = STARTUP_RUNNING ;
12491258pmState = PM_STARTUP ;
12501259
12511260/* Some workers may be scheduled to start now */
@@ -1666,7 +1675,7 @@ ServerLoop(void)
16661675
16671676/* If we have lost the archiver, try to start a new one. */
16681677if (PgArchPID == 0 && PgArchStartupAllowed ())
1669- PgArchPID = pgarch_start ();
1678+ PgArchPID = pgarch_start ();
16701679
16711680/* If we need to signal the autovacuum launcher, do so now */
16721681if (avlauncher_needs_signal )
@@ -2591,6 +2600,7 @@ reaper(SIGNAL_ARGS)
25912600if (Shutdown > NoShutdown &&
25922601(EXIT_STATUS_0 (exitstatus )|| EXIT_STATUS_1 (exitstatus )))
25932602{
2603+ StartupStatus = STARTUP_NOT_RUNNING ;
25942604pmState = PM_WAIT_BACKENDS ;
25952605/* PostmasterStateMachine logic does the rest */
25962606continue ;
@@ -2600,6 +2610,7 @@ reaper(SIGNAL_ARGS)
26002610{
26012611ereport (LOG ,
26022612(errmsg ("shutdown at recovery target" )));
2613+ StartupStatus = STARTUP_NOT_RUNNING ;
26032614Shutdown = SmartShutdown ;
26042615TerminateChildren (SIGTERM );
26052616pmState = PM_WAIT_BACKENDS ;
@@ -2624,16 +2635,18 @@ reaper(SIGNAL_ARGS)
26242635/*
26252636 * After PM_STARTUP, any unexpected exit (including FATAL exit) of
26262637 * the startup process is catastrophic, so kill other children,
2627- * and setRecoveryError so we don't try to reinitialize after
2628- * they're gone. Exception: ifFatalError isalready set, that
2629- *implies we previously sent the startup process a SIGQUIT, so
2638+ * and setStartupStatus so we don't try to reinitialize after
2639+ * they're gone. Exception: ifStartupStatus isSTARTUP_SIGNALED,
2640+ *then we previously sent the startup process a SIGQUIT; so
26302641 * that's probably the reason it died, and we do want to try to
26312642 * restart in that case.
26322643 */
26332644if (!EXIT_STATUS_0 (exitstatus ))
26342645{
2635- if (!FatalError )
2636- RecoveryError = true;
2646+ if (StartupStatus == STARTUP_SIGNALED )
2647+ StartupStatus = STARTUP_NOT_RUNNING ;
2648+ else
2649+ StartupStatus = STARTUP_CRASHED ;
26372650HandleChildCrash (pid ,exitstatus ,
26382651_ ("startup process" ));
26392652continue ;
@@ -2642,6 +2655,7 @@ reaper(SIGNAL_ARGS)
26422655/*
26432656 * Startup succeeded, commence normal operations
26442657 */
2658+ StartupStatus = STARTUP_NOT_RUNNING ;
26452659FatalError = false;
26462660Assert (AbortStartTime == 0 );
26472661ReachedNormalRunning = true;
@@ -2962,7 +2976,7 @@ CleanupBackgroundWorker(int pid,
29622976ReportBackgroundWorkerPID (rw );/* report child death */
29632977
29642978LogChildExit (EXIT_STATUS_0 (exitstatus ) ?DEBUG1 :LOG ,
2965- namebuf ,pid ,exitstatus );
2979+ namebuf ,pid ,exitstatus );
29662980
29672981return true;
29682982}
@@ -3190,14 +3204,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31903204
31913205/* Take care of the startup process too */
31923206if (pid == StartupPID )
3207+ {
31933208StartupPID = 0 ;
3209+ StartupStatus = STARTUP_CRASHED ;
3210+ }
31943211else if (StartupPID != 0 && take_action )
31953212{
31963213ereport (DEBUG2 ,
31973214(errmsg_internal ("sending %s to process %d" ,
31983215 (SendStop ?"SIGSTOP" :"SIGQUIT" ),
31993216 (int )StartupPID )));
32003217signal_child (StartupPID , (SendStop ?SIGSTOP :SIGQUIT ));
3218+ StartupStatus = STARTUP_SIGNALED ;
32013219}
32023220
32033221/* Take care of the bgwriter too */
@@ -3589,13 +3607,14 @@ PostmasterStateMachine(void)
35893607}
35903608
35913609/*
3592- * Ifrecovery failed, or the user does not want an automatic restart
3593- * after backend crashes, wait for all non-syslogger children to exit, and
3594- * then exit postmaster. We don't try to reinitialize whenrecovery fails,
3595- * because more than likely it will just fail again and we will keep
3596- * trying forever.
3610+ * Ifthe startup process failed, or the user does not want an automatic
3611+ *restart after backend crashes, wait for all non-syslogger children to
3612+ *exit, and then exit postmaster. We don't try to reinitialize whenthe
3613+ *startup process fails, because more than likely it will just fail again
3614+ *and we will keep trying forever.
35973615 */
3598- if (pmState == PM_NO_CHILDREN && (RecoveryError || !restart_after_crash ))
3616+ if (pmState == PM_NO_CHILDREN &&
3617+ (StartupStatus == STARTUP_CRASHED || !restart_after_crash ))
35993618ExitPostmaster (1 );
36003619
36013620/*
@@ -3615,6 +3634,7 @@ PostmasterStateMachine(void)
36153634
36163635StartupPID = StartupDataBase ();
36173636Assert (StartupPID != 0 );
3637+ StartupStatus = STARTUP_RUNNING ;
36183638pmState = PM_STARTUP ;
36193639/* crash recovery started, reset SIGKILL flag */
36203640AbortStartTime = 0 ;
@@ -3746,7 +3766,11 @@ TerminateChildren(int signal)
37463766{
37473767SignalChildren (signal );
37483768if (StartupPID != 0 )
3769+ {
37493770signal_child (StartupPID ,signal );
3771+ if (signal == SIGQUIT || signal == SIGKILL )
3772+ StartupStatus = STARTUP_SIGNALED ;
3773+ }
37503774if (BgWriterPID != 0 )
37513775signal_child (BgWriterPID ,signal );
37523776if (CheckpointerPID != 0 )