@@ -188,7 +188,18 @@ static bool LocalHotStandbyActive = false;
188188 */
189189static int LocalXLogInsertAllowed = -1 ;
190190
191- /* Are we recovering using offline XLOG archives? */
191+ /*
192+ * When ArchiveRecoveryRequested is set, archive recovery was requested,
193+ * ie. recovery.conf file was present. When InArchiveRecovery is set, we are
194+ * currently recovering using offline XLOG archives. These variables are only
195+ * valid in the startup process.
196+ *
197+ * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
198+ * currently performing crash recovery using only XLOG files in pg_xlog, but
199+ * will switch to using offline XLOG archives as soon as we reach the end of
200+ * WAL in pg_xlog.
201+ */
202+ static bool ArchiveRecoveryRequested = false;
192203static bool InArchiveRecovery = false;
193204
194205/* Was the last xlog file restored from archive, or local? */
@@ -206,10 +217,13 @@ static TimestampTz recoveryTargetTime;
206217static char * recoveryTargetName ;
207218
208219/* options taken from recovery.conf for XLOG streaming */
209- static bool StandbyMode = false;
220+ static bool StandbyModeRequested = false;
210221static char * PrimaryConnInfo = NULL ;
211222static char * TriggerFile = NULL ;
212223
224+ /* are we currently in standby mode? */
225+ bool StandbyMode = false;
226+
213227/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
214228static TransactionId recoveryStopXid ;
215229static TimestampTz recoveryStopTime ;
@@ -4236,6 +4250,43 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt)
42364250readFile = -1 ;
42374251}
42384252
4253+ /*
4254+ * If archive recovery was requested, but we were still doing crash
4255+ * recovery, switch to archive recovery and retry using the offline
4256+ * archive. We have now replayed all the valid WAL in pg_xlog, so
4257+ * we are presumably now consistent.
4258+ *
4259+ * We require that there's at least some valid WAL present in
4260+ * pg_xlog, however (!fetch_ckpt). We could recover using the WAL
4261+ * from the archive, even if pg_xlog is completely empty, but we'd
4262+ * have no idea how far we'd have to replay to reach consistency.
4263+ * So err on the safe side and give up.
4264+ */
4265+ if (!InArchiveRecovery && ArchiveRecoveryRequested && !fetching_ckpt )
4266+ {
4267+ ereport (DEBUG1 ,
4268+ (errmsg_internal ("reached end of WAL in pg_xlog, entering archive recovery" )));
4269+ InArchiveRecovery = true;
4270+ if (StandbyModeRequested )
4271+ StandbyMode = true;
4272+
4273+ /* initialize minRecoveryPoint to this record */
4274+ LWLockAcquire (ControlFileLock ,LW_EXCLUSIVE );
4275+ ControlFile -> state = DB_IN_ARCHIVE_RECOVERY ;
4276+ if (XLByteLT (ControlFile -> minRecoveryPoint ,EndRecPtr ))
4277+ ControlFile -> minRecoveryPoint = EndRecPtr ;
4278+
4279+ /* update local copy */
4280+ minRecoveryPoint = ControlFile -> minRecoveryPoint ;
4281+
4282+ UpdateControlFile ();
4283+ LWLockRelease (ControlFileLock );
4284+
4285+ CheckRecoveryConsistency ();
4286+
4287+ gotoretry ;
4288+ }
4289+
42394290/* In standby-mode, keep trying */
42404291if (StandbyMode )
42414292gotoretry ;
@@ -5631,7 +5682,7 @@ readRecoveryCommandFile(void)
56315682}
56325683else if (strcmp (item -> name ,"standby_mode" )== 0 )
56335684{
5634- if (!parse_bool (item -> value ,& StandbyMode ))
5685+ if (!parse_bool (item -> value ,& StandbyModeRequested ))
56355686ereport (ERROR ,
56365687(errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
56375688errmsg ("parameter \"%s\" requires a Boolean value" ,
@@ -5662,7 +5713,7 @@ readRecoveryCommandFile(void)
56625713/*
56635714 * Check for compulsory parameters
56645715 */
5665- if (StandbyMode )
5716+ if (StandbyModeRequested )
56665717{
56675718if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL )
56685719ereport (WARNING ,
@@ -5679,7 +5730,7 @@ readRecoveryCommandFile(void)
56795730}
56805731
56815732/* Enable fetching from archive recovery area */
5682- InArchiveRecovery = true;
5733+ ArchiveRecoveryRequested = true;
56835734
56845735/*
56855736 * If user specified recovery_target_timeline, validate it or compute the
@@ -5689,6 +5740,11 @@ readRecoveryCommandFile(void)
56895740 */
56905741if (rtliGiven )
56915742{
5743+ /*
5744+ * Temporarily set InArchiveRecovery, so that existsTimeLineHistory
5745+ * or findNewestTimeLine below will check the archive.
5746+ */
5747+ InArchiveRecovery = true;
56925748if (rtli )
56935749{
56945750/* Timeline 1 does not have a history file, all else should */
@@ -5705,6 +5761,7 @@ readRecoveryCommandFile(void)
57055761recoveryTargetTLI = findNewestTimeLine (recoveryTargetTLI );
57065762recoveryTargetIsLatest = true;
57075763}
5764+ InArchiveRecovery = false;
57085765}
57095766
57105767FreeConfigVariables (head );
@@ -6283,9 +6340,9 @@ StartupXLOG(void)
62836340archiveCleanupCommand ?archiveCleanupCommand :"" ,
62846341sizeof (XLogCtl -> archiveCleanupCommand ));
62856342
6286- if (InArchiveRecovery )
6343+ if (ArchiveRecoveryRequested )
62876344{
6288- if (StandbyMode )
6345+ if (StandbyModeRequested )
62896346ereport (LOG ,
62906347(errmsg ("entering standby mode" )));
62916348else if (recoveryTarget == RECOVERY_TARGET_XID )
@@ -6309,12 +6366,21 @@ StartupXLOG(void)
63096366 * Take ownership of the wakeup latch if we're going to sleep during
63106367 * recovery.
63116368 */
6312- if (StandbyMode )
6369+ if (StandbyModeRequested )
63136370OwnLatch (& XLogCtl -> recoveryWakeupLatch );
63146371
63156372if (read_backup_label (& checkPointLoc ,& backupEndRequired ,
63166373& backupFromStandby ))
63176374{
6375+ /*
6376+ * Archive recovery was requested, and thanks to the backup label file,
6377+ * we know how far we need to replay to reach consistency. Enter
6378+ * archive recovery directly.
6379+ */
6380+ InArchiveRecovery = true;
6381+ if (StandbyModeRequested )
6382+ StandbyMode = true;
6383+
63186384/*
63196385 * When a backup_label file is present, we want to roll forward from
63206386 * the checkpoint it identifies, rather than using pg_control.
@@ -6355,6 +6421,33 @@ StartupXLOG(void)
63556421}
63566422else
63576423{
6424+ /*
6425+ * It's possible that archive recovery was requested, but we don't
6426+ * know how far we need to replay the WAL before we reach consistency.
6427+ * This can happen for example if a base backup is taken from a running
6428+ * server using an atomic filesystem snapshot, without calling
6429+ * pg_start/stop_backup. Or if you just kill a running master server
6430+ * and put it into archive recovery by creating a recovery.conf file.
6431+ *
6432+ * Our strategy in that case is to perform crash recovery first,
6433+ * replaying all the WAL present in pg_xlog, and only enter archive
6434+ * recovery after that.
6435+ *
6436+ * But usually we already know how far we need to replay the WAL (up to
6437+ * minRecoveryPoint, up to backupEndPoint, or until we see an
6438+ * end-of-backup record), and we can enter archive recovery directly.
6439+ */
6440+ if (ArchiveRecoveryRequested &&
6441+ (!XLByteEQ (ControlFile -> minRecoveryPoint ,InvalidXLogRecPtr )||
6442+ ControlFile -> backupEndRequired ||
6443+ !XLByteEQ (ControlFile -> backupEndPoint ,InvalidXLogRecPtr )||
6444+ ControlFile -> state == DB_SHUTDOWNED ))
6445+ {
6446+ InArchiveRecovery = true;
6447+ if (StandbyModeRequested )
6448+ StandbyMode = true;
6449+ }
6450+
63586451/*
63596452 * Get the last valid checkpoint record. If the latest one according
63606453 * to pg_control is broken, try the next-to-last one.
@@ -6454,7 +6547,7 @@ StartupXLOG(void)
64546547}
64556548else if (ControlFile -> state != DB_SHUTDOWNED )
64566549InRecovery = true;
6457- else if (InArchiveRecovery )
6550+ else if (ArchiveRecoveryRequested )
64586551{
64596552/* force recovery due to presence of recovery.conf */
64606553InRecovery = true;
@@ -6487,12 +6580,6 @@ StartupXLOG(void)
64876580ControlFile -> prevCheckPoint = ControlFile -> checkPoint ;
64886581ControlFile -> checkPoint = checkPointLoc ;
64896582ControlFile -> checkPointCopy = checkPoint ;
6490- if (InArchiveRecovery )
6491- {
6492- /* initialize minRecoveryPoint if not set yet */
6493- if (XLByteLT (ControlFile -> minRecoveryPoint ,checkPoint .redo ))
6494- ControlFile -> minRecoveryPoint = checkPoint .redo ;
6495- }
64966583
64976584/*
64986585 * Set backupStartPoint if we're starting recovery from a base backup.
@@ -6571,7 +6658,7 @@ StartupXLOG(void)
65716658 * control file and we've established a recovery snapshot from a
65726659 * running-xacts WAL record.
65736660 */
6574- if (InArchiveRecovery && EnableHotStandby )
6661+ if (ArchiveRecoveryRequested && EnableHotStandby )
65756662{
65766663TransactionId * xids ;
65776664int nxids ;
@@ -6669,7 +6756,7 @@ StartupXLOG(void)
66696756 * process in addition to postmaster! Also, fsync requests are
66706757 * subsequently to be handled by the checkpointer, not locally.
66716758 */
6672- if (InArchiveRecovery && IsUnderPostmaster )
6759+ if (ArchiveRecoveryRequested && IsUnderPostmaster )
66736760{
66746761PublishStartupProcessInformation ();
66756762SetForwardFsyncRequests ();
@@ -6873,7 +6960,7 @@ StartupXLOG(void)
68736960 * We don't need the latch anymore. It's not strictly necessary to disown
68746961 * it, but let's do it for the sake of tidiness.
68756962 */
6876- if (StandbyMode )
6963+ if (StandbyModeRequested )
68776964DisownLatch (& XLogCtl -> recoveryWakeupLatch );
68786965
68796966/*
@@ -6918,7 +7005,7 @@ StartupXLOG(void)
69187005 * crashes while an online backup is in progress. We must not treat
69197006 * that as an error, or the database will refuse to start up.
69207007 */
6921- if (InArchiveRecovery || ControlFile -> backupEndRequired )
7008+ if (ArchiveRecoveryRequested || ControlFile -> backupEndRequired )
69227009{
69237010if (ControlFile -> backupEndRequired )
69247011ereport (FATAL ,
@@ -6948,8 +7035,10 @@ StartupXLOG(void)
69487035 *
69497036 * In a normal crash recovery, we can just extend the timeline we were in.
69507037 */
6951- if (InArchiveRecovery )
7038+ if (ArchiveRecoveryRequested )
69527039{
7040+ Assert (InArchiveRecovery );
7041+
69537042ThisTimeLineID = findNewestTimeLine (recoveryTargetTLI )+ 1 ;
69547043ereport (LOG ,
69557044(errmsg ("selected new timeline ID: %u" ,ThisTimeLineID )));
@@ -6966,7 +7055,7 @@ StartupXLOG(void)
69667055 * that we also have a copy of the last block of the old WAL in readBuf;
69677056 * we will use that below.)
69687057 */
6969- if (InArchiveRecovery )
7058+ if (ArchiveRecoveryRequested )
69707059exitArchiveRecovery (curFileTLI ,endLogId ,endLogSeg );
69717060
69727061/*
@@ -8799,7 +8888,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
87998888 * record, the backup was canceled and the end-of-backup record will
88008889 * never arrive.
88018890 */
8802- if (InArchiveRecovery &&
8891+ if (ArchiveRecoveryRequested &&
88038892!XLogRecPtrIsInvalid (ControlFile -> backupStartPoint )&&
88048893XLogRecPtrIsInvalid (ControlFile -> backupEndPoint ))
88058894ereport (PANIC ,
@@ -10263,7 +10352,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
1026310352 * Request a restartpoint if we've replayed too much xlog since the
1026410353 * last one.
1026510354 */
10266- if (StandbyMode && bgwriterLaunched )
10355+ if (StandbyModeRequested && bgwriterLaunched )
1026710356{
1026810357if (XLogCheckpointNeeded (readId ,readSeg ))
1026910358{