77 * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.393 2010/04/12 10:40:42 heikki Exp $
10+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.394 2010/04/13 14:17:46 heikki Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -496,6 +496,7 @@ static TimeLineID lastPageTLI = 0;
496496static XLogRecPtr minRecoveryPoint ;/* local copy of
497497 * ControlFile->minRecoveryPoint */
498498static bool updateMinRecoveryPoint = true;
499+ static bool reachedMinRecoveryPoint = false;
499500
500501static bool InRedo = false;
501502
@@ -551,6 +552,7 @@ static void ValidateXLOGDirectoryStructure(void);
551552static void CleanupBackupHistory (void );
552553static void UpdateMinRecoveryPoint (XLogRecPtr lsn ,bool force );
553554static XLogRecord * ReadRecord (XLogRecPtr * RecPtr ,int emode ,bool fetching_ckpt );
555+ static void CheckRecoveryConsistency (void );
554556static bool ValidXLOGHeader (XLogPageHeader hdr ,int emode );
555557static XLogRecord * ReadCheckpointRecord (XLogRecPtr RecPtr ,int whichChkpt );
556558static List * readTimeLineHistory (TimeLineID targetTLI );
@@ -5591,7 +5593,6 @@ StartupXLOG(void)
55915593uint32 freespace ;
55925594TransactionId oldestActiveXID ;
55935595bool bgwriterLaunched = false;
5594- bool backendsAllowed = false;
55955596
55965597/*
55975598 * Read control file and check XLOG status looks valid.
@@ -5838,6 +5839,8 @@ StartupXLOG(void)
58385839if (InRecovery )
58395840{
58405841int rmid ;
5842+ /* use volatile pointer to prevent code rearrangement */
5843+ volatile XLogCtlData * xlogctl = XLogCtl ;
58415844
58425845/*
58435846 * Update pg_control to show that we are recovering and to show the
@@ -5930,6 +5933,33 @@ StartupXLOG(void)
59305933StartupMultiXact ();
59315934
59325935ProcArrayInitRecoveryInfo (oldestActiveXID );
5936+
5937+ /*
5938+ * If we're beginning at a shutdown checkpoint, we know that
5939+ * nothing was running on the master at this point. So fake-up
5940+ * an empty running-xacts record and use that here and now.
5941+ * Recover additional standby state for prepared transactions.
5942+ */
5943+ if (wasShutdown )
5944+ {
5945+ RunningTransactionsData running ;
5946+
5947+ /*
5948+ * Construct a RunningTransactions snapshot representing a shut
5949+ * down server, with only prepared transactions still alive.
5950+ * We're never overflowed at this point because all subxids
5951+ * are listed with their parent prepared transactions.
5952+ */
5953+ running .xcnt = nxids ;
5954+ running .subxid_overflow = false;
5955+ running .nextXid = checkPoint .nextXid ;
5956+ running .oldestRunningXid = oldestActiveXID ;
5957+ running .xids = xids ;
5958+
5959+ ProcArrayApplyRecoveryInfo (& running );
5960+
5961+ StandbyRecoverPreparedTransactions (false);
5962+ }
59335963}
59345964
59355965/* Initialize resource managers */
@@ -5939,6 +5969,46 @@ StartupXLOG(void)
59395969RmgrTable [rmid ].rm_startup ();
59405970}
59415971
5972+ /*
5973+ * Initialize shared replayEndRecPtr and recoveryLastRecPtr.
5974+ *
5975+ * This is slightly confusing if we're starting from an online
5976+ * checkpoint; we've just read and replayed the chekpoint record,
5977+ * but we're going to start replay from its redo pointer, which
5978+ * precedes the location of the checkpoint record itself. So even
5979+ * though the last record we've replayed is indeed ReadRecPtr, we
5980+ * haven't replayed all the preceding records yet. That's OK for
5981+ * the current use of these variables.
5982+ */
5983+ SpinLockAcquire (& xlogctl -> info_lck );
5984+ xlogctl -> replayEndRecPtr = ReadRecPtr ;
5985+ xlogctl -> recoveryLastRecPtr = ReadRecPtr ;
5986+ SpinLockRelease (& xlogctl -> info_lck );
5987+
5988+ /*
5989+ * Let postmaster know we've started redo now, so that it can
5990+ * launch bgwriter to perform restartpoints. We don't bother
5991+ * during crash recovery as restartpoints can only be performed
5992+ * during archive recovery. And we'd like to keep crash recovery
5993+ * simple, to avoid introducing bugs that could you from
5994+ * recovering after crash.
5995+ *
5996+ * After this point, we can no longer assume that we're the only
5997+ * process in addition to postmaster! Also, fsync requests are
5998+ * subsequently to be handled by the bgwriter, not locally.
5999+ */
6000+ if (InArchiveRecovery && IsUnderPostmaster )
6001+ {
6002+ SetForwardFsyncRequests ();
6003+ SendPostmasterSignal (PMSIGNAL_RECOVERY_STARTED );
6004+ bgwriterLaunched = true;
6005+ }
6006+
6007+ /*
6008+ * Allow read-only connections immediately if we're consistent already.
6009+ */
6010+ CheckRecoveryConsistency ();
6011+
59426012/*
59436013 * Find the first record that logically follows the checkpoint --- it
59446014 * might physically precede it, though.
@@ -5958,43 +6028,14 @@ StartupXLOG(void)
59586028{
59596029bool recoveryContinue = true;
59606030bool recoveryApply = true;
5961- bool reachedMinRecoveryPoint = false;
59626031ErrorContextCallback errcontext ;
59636032
5964- /* use volatile pointer to prevent code rearrangement */
5965- volatile XLogCtlData * xlogctl = XLogCtl ;
5966-
5967- /* initialize shared replayEndRecPtr and recoveryLastRecPtr */
5968- SpinLockAcquire (& xlogctl -> info_lck );
5969- xlogctl -> replayEndRecPtr = ReadRecPtr ;
5970- xlogctl -> recoveryLastRecPtr = ReadRecPtr ;
5971- SpinLockRelease (& xlogctl -> info_lck );
5972-
59736033InRedo = true;
59746034
59756035ereport (LOG ,
59766036(errmsg ("redo starts at %X/%X" ,
59776037ReadRecPtr .xlogid ,ReadRecPtr .xrecoff )));
59786038
5979- /*
5980- * Let postmaster know we've started redo now, so that it can
5981- * launch bgwriter to perform restartpoints. We don't bother
5982- * during crash recovery as restartpoints can only be performed
5983- * during archive recovery. And we'd like to keep crash recovery
5984- * simple, to avoid introducing bugs that could you from
5985- * recovering after crash.
5986- *
5987- * After this point, we can no longer assume that we're the only
5988- * process in addition to postmaster! Also, fsync requests are
5989- * subsequently to be handled by the bgwriter, not locally.
5990- */
5991- if (InArchiveRecovery && IsUnderPostmaster )
5992- {
5993- SetForwardFsyncRequests ();
5994- SendPostmasterSignal (PMSIGNAL_RECOVERY_STARTED );
5995- bgwriterLaunched = true;
5996- }
5997-
59986039/*
59996040 * main redo apply loop
60006041 */
@@ -6024,32 +6065,8 @@ StartupXLOG(void)
60246065/* Handle interrupt signals of startup process */
60256066HandleStartupProcInterrupts ();
60266067
6027- /*
6028- * Have we passed our safe starting point?
6029- */
6030- if (!reachedMinRecoveryPoint &&
6031- XLByteLE (minRecoveryPoint ,EndRecPtr )&&
6032- XLogRecPtrIsInvalid (ControlFile -> backupStartPoint ))
6033- {
6034- reachedMinRecoveryPoint = true;
6035- ereport (LOG ,
6036- (errmsg ("consistent recovery state reached at %X/%X" ,
6037- EndRecPtr .xlogid ,EndRecPtr .xrecoff )));
6038- }
6039-
6040- /*
6041- * Have we got a valid starting snapshot that will allow
6042- * queries to be run? If so, we can tell postmaster that the
6043- * database is consistent now, enabling connections.
6044- */
6045- if (standbyState == STANDBY_SNAPSHOT_READY &&
6046- !backendsAllowed &&
6047- reachedMinRecoveryPoint &&
6048- IsUnderPostmaster )
6049- {
6050- backendsAllowed = true;
6051- SendPostmasterSignal (PMSIGNAL_RECOVERY_CONSISTENT );
6052- }
6068+ /* Allow read-only connections if we're consistent now */
6069+ CheckRecoveryConsistency ();
60536070
60546071/*
60556072 * Have we reached our recovery target?
@@ -6398,6 +6415,44 @@ StartupXLOG(void)
63986415}
63996416}
64006417
6418+ /*
6419+ * Checks if recovery has reached a consistent state. When consistency is
6420+ * reached and we have a valid starting standby snapshot, tell postmaster
6421+ * that it can start accepting read-only connections.
6422+ */
6423+ static void
6424+ CheckRecoveryConsistency (void )
6425+ {
6426+ static bool backendsAllowed = false;
6427+
6428+ /*
6429+ * Have we passed our safe starting point?
6430+ */
6431+ if (!reachedMinRecoveryPoint &&
6432+ XLByteLE (minRecoveryPoint ,EndRecPtr )&&
6433+ XLogRecPtrIsInvalid (ControlFile -> backupStartPoint ))
6434+ {
6435+ reachedMinRecoveryPoint = true;
6436+ ereport (LOG ,
6437+ (errmsg ("consistent recovery state reached at %X/%X" ,
6438+ EndRecPtr .xlogid ,EndRecPtr .xrecoff )));
6439+ }
6440+
6441+ /*
6442+ * Have we got a valid starting snapshot that will allow
6443+ * queries to be run? If so, we can tell postmaster that the
6444+ * database is consistent now, enabling connections.
6445+ */
6446+ if (standbyState == STANDBY_SNAPSHOT_READY &&
6447+ !backendsAllowed &&
6448+ reachedMinRecoveryPoint &&
6449+ IsUnderPostmaster )
6450+ {
6451+ backendsAllowed = true;
6452+ SendPostmasterSignal (PMSIGNAL_RECOVERY_CONSISTENT );
6453+ }
6454+ }
6455+
64016456/*
64026457 * Is the system still in recovery?
64036458 *
@@ -7657,13 +7712,36 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
76577712if (standbyState != STANDBY_DISABLED )
76587713CheckRequiredParameterValues (checkPoint );
76597714
7715+ /*
7716+ * If we see a shutdown checkpoint, we know that nothing was
7717+ * running on the master at this point. So fake-up an empty
7718+ * running-xacts record and use that here and now. Recover
7719+ * additional standby state for prepared transactions.
7720+ */
76607721if (standbyState >=STANDBY_INITIALIZED )
76617722{
7723+ TransactionId * xids ;
7724+ int nxids ;
7725+ TransactionId oldestActiveXID ;
7726+ RunningTransactionsData running ;
7727+
7728+ oldestActiveXID = PrescanPreparedTransactions (& xids ,& nxids );
7729+
76627730/*
7663- * Remove stale transactions, if any.
7731+ * Construct a RunningTransactions snapshot representing a shut
7732+ * down server, with only prepared transactions still alive.
7733+ * We're never overflowed at this point because all subxids
7734+ * are listed with their parent prepared transactions.
76647735 */
7665- ExpireOldKnownAssignedTransactionIds (checkPoint .nextXid );
7666- StandbyReleaseOldLocks (checkPoint .nextXid );
7736+ running .xcnt = nxids ;
7737+ running .subxid_overflow = false;
7738+ running .nextXid = checkPoint .nextXid ;
7739+ running .oldestRunningXid = oldestActiveXID ;
7740+ running .xids = xids ;
7741+
7742+ ProcArrayApplyRecoveryInfo (& running );
7743+
7744+ StandbyRecoverPreparedTransactions (true);
76677745}
76687746
76697747/* ControlFile->checkPointCopy always tracks the latest ckpt XID */