11/*
2- * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.21 2009/03/26 22:29:13 tgl Exp $
2+ * $PostgreSQL: pgsql/contrib/pg_standby/pg_standby.c,v 1.22 2009/05/14 20:31:09 heikki Exp $
33 *
44 *
55 * pg_standby.c
2626#include <ctype.h>
2727#include <dirent.h>
2828#include <sys/stat.h>
29+ #include <fcntl.h>
2930#include <signal.h>
3031
3132#ifdef WIN32
@@ -52,7 +53,6 @@ intmaxwaittime = 0;/* how long are we prepared to wait for? */
5253int keepfiles = 0 ;/* number of WAL files to keep, 0 keep all */
5354int maxretries = 3 ;/* number of retries on restore command */
5455bool debug = false;/* are we debugging? */
55- bool triggered = false;/* have we been triggered? */
5656bool need_cleanup = false;/* do we need to remove files from
5757 * archive? */
5858
@@ -69,6 +69,30 @@ charrestoreCommand[MAXPGPATH];/* run this to restore */
6969char exclusiveCleanupFileName [MAXPGPATH ];/* the file we need to
7070 * get from archive */
7171
72+ /*
73+ * Two types of failover are supported (smart and fast failover).
74+ *
75+ * The content of the trigger file determines the type of failover. If the
76+ * trigger file contains the word "smart" (or the file is empty), smart
77+ * failover is chosen: pg_standby acts as cp or ln command itself, on
78+ * successful completion all the available WAL records will be applied
79+ * resulting in zero data loss. But, it might take a long time to finish
80+ * recovery if there's a lot of unapplied WAL.
81+ *
82+ * On the other hand, if the trigger file contains the word "fast", the
83+ * recovery is finished immediately even if unapplied WAL files remain. Any
84+ * transactions in the unapplied WAL files are lost.
85+ *
86+ * An empty trigger file performs smart failover. SIGUSR or SIGINT triggers
87+ * fast failover. A timeout causes fast failover (smart failover would have
88+ * the same effect, since if the timeout is reached there is no unapplied WAL).
89+ */
90+ #define NoFailover 0
91+ #define SmartFailover 1
92+ #define FastFailover 2
93+
94+ static int Failover = NoFailover ;
95+
7296#define RESTORE_COMMAND_COPY 0
7397#define RESTORE_COMMAND_LINK 1
7498int restoreCommandType ;
@@ -108,7 +132,6 @@ struct stat stat_buf;
108132 *
109133 *As an example, and probably the common case, we use either
110134 *cp/ln commands on *nix, or copy/move command on Windows.
111- *
112135 */
113136static void
114137CustomizableInitialize (void )
@@ -352,41 +375,96 @@ SetWALFileNameForCleanup(void)
352375/*
353376 * CheckForExternalTrigger()
354377 *
355- * Is there a trigger file?
378+ * Is there a trigger file? Sets global 'Failover' variable to indicate
379+ * what kind of a trigger file it was. A "fast" trigger file is turned
380+ * into a "smart" file as a side-effect.
356381 */
357- static bool
382+ static void
358383CheckForExternalTrigger (void )
359384{
360- int rc ;
385+ char buf [32 ];
386+ int fd ;
387+ int len ;
361388
362389/*
363390 * Look for a trigger file, if that option has been selected
364391 *
365392 * We use stat() here because triggerPath is always a file rather than
366393 * potentially being in an archive
367394 */
368- if (triggerPath && stat (triggerPath ,& stat_buf )== 0 )
395+ if (!triggerPath || stat (triggerPath ,& stat_buf )!= 0 )
396+ return ;
397+
398+ /*
399+ * An empty trigger file performs smart failover. There's a little race
400+ * condition here: if the writer of the trigger file has just created
401+ * the file, but not yet written anything to it, we'll treat that as
402+ * smart shutdown even if the other process was just about to write "fast"
403+ * to it. But that's fine: we'll restore one more WAL file, and when we're
404+ * invoked next time, we'll see the word "fast" and fail over immediately.
405+ */
406+ if (stat_buf .st_size == 0 )
369407{
370- fprintf (stderr ,"trigger file found\n" );
408+ Failover = SmartFailover ;
409+ fprintf (stderr ,"trigger file found: smart failover\n" );
410+ fflush (stderr );
411+ return ;
412+ }
413+
414+ if ((fd = open (triggerPath ,O_RDWR ,0 ))< 0 )
415+ {
416+ fprintf (stderr ,"WARNING: could not open \"%s\": %s\n" ,
417+ triggerPath ,strerror (errno ));
418+ fflush (stderr );
419+ return ;
420+ }
421+
422+ if ((len = read (fd ,buf ,sizeof (buf )))< 0 )
423+ {
424+ fprintf (stderr ,"WARNING: could not read \"%s\": %s\n" ,
425+ triggerPath ,strerror (errno ));
426+ fflush (stderr );
427+ close (fd );
428+ return ;
429+ }
430+ buf [len ]= '\0' ;
431+
432+ if (strncmp (buf ,"smart" ,5 )== 0 )
433+ {
434+ Failover = SmartFailover ;
435+ fprintf (stderr ,"trigger file found: smart failover\n" );
436+ fflush (stderr );
437+ close (fd );
438+ return ;
439+ }
440+
441+ if (strncmp (buf ,"fast" ,4 )== 0 )
442+ {
443+ Failover = FastFailover ;
444+
445+ fprintf (stderr ,"trigger file found: fast failover\n" );
371446fflush (stderr );
372447
373448/*
374- * If trigger file found, we *must* delete it. Here's why: When
375- * recovery completes, we will be asked again for the same file from
376- * the archive using pg_standby so must remove trigger file so we can
377- * reload file again and come up correctly.
449+ * Turn it into a "smart" trigger by truncating the file. Otherwise
450+ * if the server asks us again to restore a segment that was restored
451+ * restored already, we would return "not found" and upset the server.
378452 */
379- rc = unlink (triggerPath );
380- if (rc != 0 )
453+ if (ftruncate (fd ,0 )< 0 )
381454{
382- fprintf (stderr ,"\n ERROR: could not remove \"%s\": %s" ,triggerPath ,strerror (errno ));
455+ fprintf (stderr ,"WARNING: could not read \"%s\": %s\n" ,
456+ triggerPath ,strerror (errno ));
383457fflush (stderr );
384- exit (rc );
385458}
386- return true;
387- }
459+ close (fd );
388460
389- return false;
461+ return ;
462+ }
463+ close (fd );
464+
465+ fprintf (stderr ,"WARNING: invalid content in \"%s\"\n" ,triggerPath );
466+ fflush (stderr );
467+ return ;
390468}
391469
392470/*
@@ -402,7 +480,7 @@ RestoreWALFileForRecovery(void)
402480
403481if (debug )
404482{
405- fprintf (stderr ,"\nrunning restore:" );
483+ fprintf (stderr ,"running restore:" );
406484fflush (stderr );
407485}
408486
@@ -413,7 +491,7 @@ RestoreWALFileForRecovery(void)
413491{
414492if (debug )
415493{
416- fprintf (stderr ," OK" );
494+ fprintf (stderr ," OK\n " );
417495fflush (stderr );
418496}
419497return true;
@@ -425,7 +503,7 @@ RestoreWALFileForRecovery(void)
425503 * Allow caller to add additional info
426504 */
427505if (debug )
428- fprintf (stderr ,"not restored: " );
506+ fprintf (stderr ,"not restored\n " );
429507return false;
430508}
431509
@@ -552,8 +630,6 @@ main(int argc, char **argv)
552630break ;
553631case 't' :/* Trigger file */
554632triggerPath = optarg ;
555- if (CheckForExternalTrigger ())
556- exit (1 );/* Normal exit, with non-zero */
557633break ;
558634case 'w' :/* Max wait time */
559635maxwaittime = atoi (optarg );
@@ -633,20 +709,20 @@ main(int argc, char **argv)
633709
634710if (debug )
635711{
636- fprintf (stderr ,"\nTrigger file : %s" ,triggerPath ?triggerPath :"<not set>" );
637- fprintf (stderr ,"\nWaiting for WAL file: %s" ,nextWALFileName );
638- fprintf (stderr ,"\nWAL file path: %s" ,WALFilePath );
639- fprintf (stderr ,"\nRestoring to... : %s" ,xlogFilePath );
640- fprintf (stderr ,"\nSleep interval: %d second%s" ,
712+ fprintf (stderr ,"Trigger file : %s\n " ,triggerPath ?triggerPath :"<not set>" );
713+ fprintf (stderr ,"Waiting for WAL file: %s\n " ,nextWALFileName );
714+ fprintf (stderr ,"WAL file path: %s\n " ,WALFilePath );
715+ fprintf (stderr ,"Restoring to: %s\n " ,xlogFilePath );
716+ fprintf (stderr ,"Sleep interval: %d second%s\n " ,
641717sleeptime , (sleeptime > 1 ?"s" :" " ));
642- fprintf (stderr ,"\nMax wait interval: %d %s" ,
718+ fprintf (stderr ,"Max wait interval: %d %s\n " ,
643719maxwaittime , (maxwaittime > 0 ?"seconds" :"forever" ));
644- fprintf (stderr ,"\nCommand for restore: %s" ,restoreCommand );
645- fprintf (stderr ,"\nKeep archive history: " );
720+ fprintf (stderr ,"Command for restore: %s\n " ,restoreCommand );
721+ fprintf (stderr ,"Keep archive history: " );
646722if (need_cleanup )
647- fprintf (stderr ,"%s and later" ,exclusiveCleanupFileName );
723+ fprintf (stderr ,"%s and later\n " ,exclusiveCleanupFileName );
648724else
649- fprintf (stderr ,"No cleanup required" );
725+ fprintf (stderr ,"No cleanup required\n " );
650726fflush (stderr );
651727}
652728
@@ -676,56 +752,74 @@ main(int argc, char **argv)
676752/*
677753 * Main wait loop
678754 */
679- while (! CustomizableNextWALFileReady () && ! triggered )
755+ for (;; )
680756{
681- if (sleeptime <=60 )
682- pg_usleep (sleeptime * 1000000L );
683-
757+ /* Check for trigger file or signal first */
758+ CheckForExternalTrigger ();
684759if (signaled )
685760{
686- triggered = true ;
761+ Failover = FastFailover ;
687762if (debug )
688763{
689- fprintf (stderr ,"\nsignaled to exit\n" );
764+ fprintf (stderr ,"signaled to exit: fast failover \n" );
690765fflush (stderr );
691766}
692767}
693- else
768+
769+ /*
770+ * Check for fast failover immediately, before checking if the
771+ * requested WAL file is available
772+ */
773+ if (Failover == FastFailover )
774+ exit (1 );
775+
776+ if (CustomizableNextWALFileReady ())
694777{
778+ /*
779+ * Once we have restored this file successfully we can remove some
780+ * prior WAL files. If this restore fails we musn't remove any file
781+ * because some of them will be requested again immediately after
782+ * the failed restore, or when we restart recovery.
783+ */
784+ if (RestoreWALFileForRecovery ())
785+ {
786+ if (need_cleanup )
787+ CustomizableCleanupPriorWALFiles ();
695788
696- if (debug )
789+ exit (0 );
790+ }
791+ else
697792{
698- fprintf (stderr ,"\nWAL file not present yet." );
699- if (triggerPath )
700- fprintf (stderr ," Checking for trigger file..." );
701- fflush (stderr );
793+ /* Something went wrong in copying the file */
794+ exit (1 );
702795}
796+ }
797+
798+ /* Check for smart failover if the next WAL file was not available */
799+ if (Failover == SmartFailover )
800+ exit (1 );
703801
704- waittime += sleeptime ;
802+ if (sleeptime <=60 )
803+ pg_usleep (sleeptime * 1000000L );
705804
706- if (!triggered && (CheckForExternalTrigger ()|| (waittime >=maxwaittime && maxwaittime > 0 )))
805+ waittime += sleeptime ;
806+ if (waittime >=maxwaittime && maxwaittime > 0 )
807+ {
808+ Failover = FastFailover ;
809+ if (debug )
707810{
708- triggered = true;
709- if ( debug && waittime >= maxwaittime && maxwaittime > 0 )
710- fprintf (stderr , "\nTimed out after %d seconds\n" , waittime );
811+ fprintf ( stderr , "Timed out after %d seconds: fast failover\n" ,
812+ waittime );
813+ fflush (stderr );
711814}
712815}
816+ if (debug )
817+ {
818+ fprintf (stderr ,"WAL file not present yet." );
819+ if (triggerPath )
820+ fprintf (stderr ," Checking for trigger file..." );
821+ fprintf (stderr ,"\n" );
822+ fflush (stderr );
823+ }
713824}
714-
715- /*
716- * Action on exit
717- */
718- if (triggered )
719- exit (1 );/* Normal exit, with non-zero */
720-
721- /*
722- * Once we have restored this file successfully we can remove some prior
723- * WAL files. If this restore fails we musn't remove any file because some
724- * of them will be requested again immediately after the failed restore,
725- * or when we restart recovery.
726- */
727- if (RestoreWALFileForRecovery ()&& need_cleanup )
728- CustomizableCleanupPriorWALFiles ();
729-
730- return 0 ;
731825}