2828#include <time.h>
2929#include <sys/types.h>
3030#include <sys/stat.h>
31+ #include <sys/wait.h>
3132#include <unistd.h>
3233
3334#ifdef HAVE_SYS_RESOURCE_H
@@ -153,10 +154,10 @@ static intCreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
153154static pgpid_t get_pgpid (bool is_status_request );
154155static char * * readfile (const char * path );
155156static void free_readfile (char * * optlines );
156- static int start_postmaster (void );
157+ static pgpid_t start_postmaster (void );
157158static void read_post_opts (void );
158159
159- static PGPing test_postmaster_connection (bool );
160+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
160161static bool postmaster_is_alive (pid_t pid );
161162
162163#if defined(HAVE_GETRLIMIT )&& defined(RLIMIT_CORE )
@@ -419,36 +420,73 @@ free_readfile(char **optlines)
419420 * start/test/stop routines
420421 */
421422
422- static int
423+ /*
424+ * Start the postmaster and return its PID.
425+ *
426+ * Currently, on Windows what we return is the PID of the shell process
427+ * that launched the postmaster (and, we trust, is waiting for it to exit).
428+ * So the PID is usable for "is the postmaster still running" checks,
429+ * but cannot be compared directly to postmaster.pid.
430+ *
431+ * On Windows, we also save aside a handle to the shell process in
432+ * "postmasterProcess", which the caller should close when done with it.
433+ */
434+ static pgpid_t
423435start_postmaster (void )
424436{
425437char cmd [MAXPGPATH ];
426438
427439#ifndef WIN32
440+ pgpid_t pm_pid ;
441+
442+ /* Flush stdio channels just before fork, to avoid double-output problems */
443+ fflush (stdout );
444+ fflush (stderr );
445+
446+ pm_pid = fork ();
447+ if (pm_pid < 0 )
448+ {
449+ /* fork failed */
450+ write_stderr (_ ("%s: could not start server: %s\n" ),
451+ progname ,strerror (errno ));
452+ exit (1 );
453+ }
454+ if (pm_pid > 0 )
455+ {
456+ /* fork succeeded, in parent */
457+ return pm_pid ;
458+ }
459+
460+ /* fork succeeded, in child */
428461
429462/*
430463 * Since there might be quotes to handle here, it is easier simply to pass
431- * everything to a shell to process them.
432- *
433- * XXX it would be better to fork and exec so that we would know the child
434- * postmaster's PID directly; then test_postmaster_connection could use
435- * the PID without having to rely on reading it back from the pidfile.
464+ * everything to a shell to process them. Use exec so that the postmaster
465+ * has the same PID as the current child process.
436466 */
437467if (log_file != NULL )
438- snprintf (cmd ,MAXPGPATH ,"\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 & " ,
468+ snprintf (cmd ,MAXPGPATH ,"exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
439469exec_path ,pgdata_opt ,post_opts ,
440470DEVNULL ,log_file );
441471else
442- snprintf (cmd ,MAXPGPATH ,"\"%s\" %s%s < \"%s\" 2>&1 & " ,
472+ snprintf (cmd ,MAXPGPATH ,"exec \"%s\" %s%s < \"%s\" 2>&1" ,
443473exec_path ,pgdata_opt ,post_opts ,DEVNULL );
444474
445- return system (cmd );
475+ (void )execl ("/bin/sh" ,"/bin/sh" ,"-c" ,cmd , (char * )NULL );
476+
477+ /* exec failed */
478+ write_stderr (_ ("%s: could not start server: %s\n" ),
479+ progname ,strerror (errno ));
480+ exit (1 );
481+
482+ return 0 ;/* keep dumb compilers quiet */
483+
446484#else /* WIN32 */
447485
448486/*
449- *On win32 we don't use system(). So we don't need to use& (which would
450- *be START /B on win32). However, we still call the shell ( CMD.EXE) with
451- *it tohandle redirection etc .
487+ *As with the Unix case, it's easiest to usethe shell (CMD.EXE) to
488+ *handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
489+ *"exec", so we don't get tofind out the postmaster's PID immediately .
452490 */
453491PROCESS_INFORMATION pi ;
454492
@@ -460,10 +498,15 @@ start_postmaster(void)
460498exec_path ,pgdata_opt ,post_opts ,DEVNULL );
461499
462500if (!CreateRestrictedProcess (cmd ,& pi , false))
463- return GetLastError ();
464- CloseHandle (pi .hProcess );
501+ {
502+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
503+ progname , (unsigned long )GetLastError ());
504+ exit (1 );
505+ }
506+ /* Don't close command process handle here; caller must do so */
507+ postmasterProcess = pi .hProcess ;
465508CloseHandle (pi .hThread );
466- return 0 ;
509+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
467510#endif /* WIN32 */
468511}
469512
@@ -472,15 +515,21 @@ start_postmaster(void)
472515/*
473516 * Find the pgport and try a connection
474517 *
518+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
519+ * it may be the PID of an ancestor shell process, so we can't check the
520+ * contents of postmaster.pid quite as carefully.
521+ *
522+ * On Windows, the static variable postmasterProcess is an implicit argument
523+ * to this routine; it contains a handle to the postmaster process or an
524+ * ancestor shell process thereof.
525+ *
475526 * Note that the checkpoint parameter enables a Windows service control
476527 * manager checkpoint, it's got nothing to do with database checkpoints!!
477528 */
478529static PGPing
479- test_postmaster_connection (bool do_checkpoint )
530+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
480531{
481532PGPing ret = PQPING_NO_RESPONSE ;
482- bool found_stale_pidfile = false;
483- pgpid_t pm_pid = 0 ;
484533char connstr [MAXPGPATH * 2 + 256 ];
485534int i ;
486535
@@ -535,29 +584,27 @@ test_postmaster_connection(bool do_checkpoint)
535584optlines [5 ]!= NULL )
536585{
537586/* File is complete enough for us, parse it */
538- long pmpid ;
587+ pgpid_t pmpid ;
539588time_t pmstart ;
540589
541590/*
542- * Make sanity checks. If it's for a standalone backend
543- * (negative PID), or the recorded start time is before
544- * pg_ctl started, then either we are looking at the wrong
545- * data directory, or this is a pre-existing pidfile that
546- * hasn't (yet?) been overwritten by our child postmaster.
547- * Allow 2 seconds slop for possible cross-process clock
548- * skew.
591+ * Make sanity checks. If it's for the wrong PID, or the
592+ * recorded start time is before pg_ctl started, then
593+ * either we are looking at the wrong data directory, or
594+ * this is a pre-existing pidfile that hasn't (yet?) been
595+ * overwritten by our child postmaster. Allow 2 seconds
596+ * slop for possible cross-process clock skew.
549597 */
550598pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
551599pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
552- if (pmpid <=0 || pmstart < start_time - 2 )
553- {
554- /*
555- * Set flag to report stale pidfile if it doesn't get
556- * overwritten before we give up waiting.
557- */
558- found_stale_pidfile = true;
559- }
560- else
600+ if (pmstart >=start_time - 2 &&
601+ #ifndef WIN32
602+ pmpid == pm_pid
603+ #else
604+ /* Windows can only reject standalone-backend PIDs */
605+ pmpid > 0
606+ #endif
607+ )
561608{
562609/*
563610 * OK, seems to be a valid pidfile from our child.
@@ -567,9 +614,6 @@ test_postmaster_connection(bool do_checkpoint)
567614char * hostaddr ;
568615char host_str [MAXPGPATH ];
569616
570- found_stale_pidfile = false;
571- pm_pid = (pgpid_t )pmpid ;
572-
573617/*
574618 * Extract port number and host string to use. Prefer
575619 * using Unix socket if available.
@@ -635,42 +679,23 @@ test_postmaster_connection(bool do_checkpoint)
635679}
636680
637681/*
638- * The postmaster should create postmaster.pid very soon after being
639- * started. If it's not there after we've waited 5 or more seconds,
640- * assume startup failed and give up waiting. (Note this covers both
641- * cases where the pidfile was never created, and where it was created
642- * and then removed during postmaster exit.) Also, if there *is* a
643- * file there but it appears stale, issue a suitable warning and give
644- * up waiting.
682+ * Check whether the child postmaster process is still alive. This
683+ * lets us exit early if the postmaster fails during startup.
684+ *
685+ * On Windows, we may be checking the postmaster's parent shell, but
686+ * that's fine for this purpose.
645687 */
646- if ( i >= 5 )
688+ #ifndef WIN32
647689{
648- struct stat statbuf ;
690+ int exitstatus ;
649691
650- if (stat (pid_file ,& statbuf )!= 0 )
651- {
652- if (errno != ENOENT )
653- write_stderr (_ ("\n%s: could not stat file \"%s\": %s\n" ),
654- progname ,pid_file ,strerror (errno ));
655- return PQPING_NO_RESPONSE ;
656- }
657-
658- if (found_stale_pidfile )
659- {
660- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
661- progname );
692+ if (waitpid ((pid_t )pm_pid ,& exitstatus ,WNOHANG )== (pid_t )pm_pid )
662693return PQPING_NO_RESPONSE ;
663- }
664694}
665-
666- /*
667- * If we've been able to identify the child postmaster's PID, check
668- * the process is still alive. This covers cases where the postmaster
669- * successfully created the pidfile but then crashed without removing
670- * it.
671- */
672- if (pm_pid > 0 && !postmaster_is_alive ((pid_t )pm_pid ))
695+ #else
696+ if (WaitForSingleObject (postmasterProcess ,0 )== WAIT_OBJECT_0 )
673697return PQPING_NO_RESPONSE ;
698+ #endif
674699
675700/* No response, or startup still in process; wait */
676701#if defined(WIN32 )
@@ -836,7 +861,7 @@ static void
836861do_start (void )
837862{
838863pgpid_t old_pid = 0 ;
839- int exitcode ;
864+ pgpid_t pm_pid ;
840865
841866if (ctl_command != RESTART_COMMAND )
842867{
@@ -876,19 +901,13 @@ do_start(void)
876901}
877902#endif
878903
879- exitcode = start_postmaster ();
880- if (exitcode != 0 )
881- {
882- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
883- progname ,exitcode );
884- exit (1 );
885- }
904+ pm_pid = start_postmaster ();
886905
887906if (do_wait )
888907{
889908print_msg (_ ("waiting for server to start..." ));
890909
891- switch (test_postmaster_connection (false))
910+ switch (test_postmaster_connection (pm_pid , false))
892911{
893912case PQPING_OK :
894913print_msg (_ (" done\n" ));
@@ -914,6 +933,12 @@ do_start(void)
914933}
915934else
916935print_msg (_ ("server starting\n" ));
936+
937+ #ifdef WIN32
938+ /* Now we don't need the handle to the shell process anymore */
939+ CloseHandle (postmasterProcess );
940+ postmasterProcess = INVALID_HANDLE_VALUE ;
941+ #endif
917942}
918943
919944
@@ -1585,7 +1610,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
15851610if (do_wait )
15861611{
15871612write_eventlog (EVENTLOG_INFORMATION_TYPE ,_ ("Waiting for server startup...\n" ));
1588- if (test_postmaster_connection (true)!= PQPING_OK )
1613+ if (test_postmaster_connection (postmasterPID , true)!= PQPING_OK )
15891614{
15901615write_eventlog (EVENTLOG_ERROR_TYPE ,_ ("Timed out waiting for server startup\n" ));
15911616pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1606,10 +1631,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
16061631{
16071632/*
16081633 * status.dwCheckPoint can be incremented by
1609- * test_postmaster_connection(true), so it might not start
1610- * from 0.
1634+ * test_postmaster_connection(), so it might not start from 0.
16111635 */
1612- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;;
1636+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
16131637
16141638kill (postmasterPID ,SIGINT );
16151639