NotificationsYou must be signed in to change notification settings
Fork6
Star31

Commit361bd16

committed

Allow Hot Standby to begin from a shutdown checkpoint.

Patch by Simon Riggs & me

1 parentea9c103 commit361bd16Copy full SHA for 361bd16

File tree

3 files changed

+224

-62

lines changed

src
- backend/access/transam
  - twophase.c
  - xlog.c
- include/access
  - twophase.h

3 files changed

+224

-62

lines changed

`‎src/backend/access/transam/twophase.c‎`

Lines changed: 84 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1994, Regents of the University of California`
`8`	`8`	`*`
`9`	`9`	`* IDENTIFICATION`
`10`		`- *$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.59 2010/02/26 02:00:34 momjian Exp $`
	`10`	`+ *$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.60 2010/04/13 14:17:46 heikki Exp $`
`11`	`11`	`*`
`12`	`12`	`* NOTES`
`13`	`13`	`*Each global transaction is associated with a global transaction`
`@@ -1718,6 +1718,89 @@ PrescanPreparedTransactions(TransactionId *xids_p, int nxids_p)`
`1718`	`1718`	`returnresult;`
`1719`	`1719`	`}`
`1720`	`1720`
	`1721`	`+/*`
	`1722`	`+ * StandbyRecoverPreparedTransactions`
	`1723`	`+ *`
	`1724`	`+ * Scan the pg_twophase directory and setup all the required information to`
	`1725`	`+ * allow standby queries to treat prepared transactions as still active.`
	`1726`	`+ * This is never called at the end of recovery - we use`
	`1727`	`+ * RecoverPreparedTransactions() at that point.`
	`1728`	`+ *`
	`1729`	`+ * Currently we simply call SubTransSetParent() for any subxids of prepared`
	`1730`	`+ * transactions. If overwriteOK is true, it's OK if some XIDs have already`
	`1731`	`+ * been marked in pg_subtrans.`
	`1732`	`+ */`
	`1733`	`+void`
	`1734`	`+StandbyRecoverPreparedTransactions(booloverwriteOK)`
	`1735`	`+{`
	`1736`	`+DIR*cldir;`
	`1737`	`+structdirent*clde;`
	`1738`	`+`
	`1739`	`+cldir=AllocateDir(TWOPHASE_DIR);`
	`1740`	`+while ((clde=ReadDir(cldir,TWOPHASE_DIR))!=NULL)`
	`1741`	`+{`
	`1742`	`+if (strlen(clde->d_name)==8&&`
	`1743`	`+strspn(clde->d_name,"0123456789ABCDEF")==8)`
	`1744`	`+{`
	`1745`	`+TransactionIdxid;`
	`1746`	`+char*buf;`
	`1747`	`+TwoPhaseFileHeader*hdr;`
	`1748`	`+TransactionId*subxids;`
	`1749`	`+inti;`
	`1750`	`+`
	`1751`	`+xid= (TransactionId)strtoul(clde->d_name,NULL,16);`
	`1752`	`+`
	`1753`	`+/* Already processed? */`
	`1754`	`+if (TransactionIdDidCommit(xid)\|\|TransactionIdDidAbort(xid))`
	`1755`	`+{`
	`1756`	`+ereport(WARNING,`
	`1757`	`+(errmsg("removing stale two-phase state file \"%s\"",`
	`1758`	`+clde->d_name)));`
	`1759`	`+RemoveTwoPhaseFile(xid, true);`
	`1760`	`+continue;`
	`1761`	`+}`
	`1762`	`+`
	`1763`	`+/* Read and validate file */`
	`1764`	`+buf=ReadTwoPhaseFile(xid, true);`
	`1765`	`+if (buf==NULL)`
	`1766`	`+{`
	`1767`	`+ereport(WARNING,`
	`1768`	`+ (errmsg("removing corrupt two-phase state file \"%s\"",`
	`1769`	`+clde->d_name)));`
	`1770`	`+RemoveTwoPhaseFile(xid, true);`
	`1771`	`+continue;`
	`1772`	`+}`
	`1773`	`+`
	`1774`	`+/* Deconstruct header */`
	`1775`	`+hdr= (TwoPhaseFileHeader*)buf;`
	`1776`	`+if (!TransactionIdEquals(hdr->xid,xid))`
	`1777`	`+{`
	`1778`	`+ereport(WARNING,`
	`1779`	`+ (errmsg("removing corrupt two-phase state file \"%s\"",`
	`1780`	`+clde->d_name)));`
	`1781`	`+RemoveTwoPhaseFile(xid, true);`
	`1782`	`+pfree(buf);`
	`1783`	`+continue;`
	`1784`	`+}`
	`1785`	`+`
	`1786`	`+/*`
	`1787`	`+ * Examine subtransaction XIDs ... they should all follow main`
	`1788`	`+ * XID.`
	`1789`	`+ */`
	`1790`	`+subxids= (TransactionId*)`
	`1791`	`+(buf+MAXALIGN(sizeof(TwoPhaseFileHeader)));`
	`1792`	`+for (i=0;i<hdr->nsubxacts;i++)`
	`1793`	`+{`
	`1794`	`+TransactionIdsubxid=subxids[i];`
	`1795`	`+`
	`1796`	`+Assert(TransactionIdFollows(subxid,xid));`
	`1797`	`+SubTransSetParent(xid,subxid,overwriteOK);`
	`1798`	`+}`
	`1799`	`+}`
	`1800`	`+}`
	`1801`	`+FreeDir(cldir);`
	`1802`	`+}`
	`1803`	`+`
`1721`	`1804`	`/*`
`1722`	`1805`	`* RecoverPreparedTransactions`
`1723`	`1806`	`*`

`‎src/backend/access/transam/xlog.c‎`

Lines changed: 138 additions & 60 deletions

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group`
`8`	`8`	`* Portions Copyright (c) 1994, Regents of the University of California`
`9`	`9`	`*`
`10`		`- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.393 2010/04/12 10:40:42 heikki Exp $`
	`10`	`+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.394 2010/04/13 14:17:46 heikki Exp $`
`11`	`11`	`*`
`12`	`12`	`*-------------------------------------------------------------------------`
`13`	`13`	`*/`
`@@ -496,6 +496,7 @@ static TimeLineID lastPageTLI = 0;`
`496`	`496`	`staticXLogRecPtrminRecoveryPoint;/* local copy of`
`497`	`497`	`* ControlFile->minRecoveryPoint */`
`498`	`498`	`staticboolupdateMinRecoveryPoint= true;`
	`499`	`+staticboolreachedMinRecoveryPoint= false;`
`499`	`500`
`500`	`501`	`staticboolInRedo= false;`
`501`	`502`
`@@ -551,6 +552,7 @@ static void ValidateXLOGDirectoryStructure(void);`
`551`	`552`	`staticvoidCleanupBackupHistory(void);`
`552`	`553`	`staticvoidUpdateMinRecoveryPoint(XLogRecPtrlsn,boolforce);`
`553`	`554`	`staticXLogRecordReadRecord(XLogRecPtrRecPtr,intemode,boolfetching_ckpt);`
	`555`	`+staticvoidCheckRecoveryConsistency(void);`
`554`	`556`	`staticboolValidXLOGHeader(XLogPageHeaderhdr,intemode);`
`555`	`557`	`staticXLogRecord*ReadCheckpointRecord(XLogRecPtrRecPtr,intwhichChkpt);`
`556`	`558`	`staticList*readTimeLineHistory(TimeLineIDtargetTLI);`
`@@ -5591,7 +5593,6 @@ StartupXLOG(void)`
`5591`	`5593`	`uint32freespace;`
`5592`	`5594`	`TransactionIdoldestActiveXID;`
`5593`	`5595`	`boolbgwriterLaunched= false;`
`5594`		`-boolbackendsAllowed= false;`
`5595`	`5596`
`5596`	`5597`	`/*`
`5597`	`5598`	`* Read control file and check XLOG status looks valid.`
`@@ -5838,6 +5839,8 @@ StartupXLOG(void)`
`5838`	`5839`	`if (InRecovery)`
`5839`	`5840`	`{`
`5840`	`5841`	`intrmid;`
	`5842`	`+/* use volatile pointer to prevent code rearrangement */`
	`5843`	`+volatileXLogCtlData*xlogctl=XLogCtl;`
`5841`	`5844`
`5842`	`5845`	`/*`
`5843`	`5846`	`* Update pg_control to show that we are recovering and to show the`
`@@ -5930,6 +5933,33 @@ StartupXLOG(void)`
`5930`	`5933`	`StartupMultiXact();`
`5931`	`5934`
`5932`	`5935`	`ProcArrayInitRecoveryInfo(oldestActiveXID);`
	`5936`	`+`
	`5937`	`+/*`
	`5938`	`+ * If we're beginning at a shutdown checkpoint, we know that`
	`5939`	`+ * nothing was running on the master at this point. So fake-up`
	`5940`	`+ * an empty running-xacts record and use that here and now.`
	`5941`	`+ * Recover additional standby state for prepared transactions.`
	`5942`	`+ */`
	`5943`	`+if (wasShutdown)`
	`5944`	`+{`
	`5945`	`+RunningTransactionsDatarunning;`
	`5946`	`+`
	`5947`	`+/*`
	`5948`	`+ * Construct a RunningTransactions snapshot representing a shut`
	`5949`	`+ * down server, with only prepared transactions still alive.`
	`5950`	`+ * We're never overflowed at this point because all subxids`
	`5951`	`+ * are listed with their parent prepared transactions.`
	`5952`	`+ */`
	`5953`	`+running.xcnt=nxids;`
	`5954`	`+running.subxid_overflow= false;`
	`5955`	`+running.nextXid=checkPoint.nextXid;`
	`5956`	`+running.oldestRunningXid=oldestActiveXID;`
	`5957`	`+running.xids=xids;`
	`5958`	`+`
	`5959`	`+ProcArrayApplyRecoveryInfo(&running);`
	`5960`	`+`
	`5961`	`+StandbyRecoverPreparedTransactions(false);`
	`5962`	`+}`
`5933`	`5963`	`}`
`5934`	`5964`
`5935`	`5965`	`/* Initialize resource managers */`
`@@ -5939,6 +5969,46 @@ StartupXLOG(void)`
`5939`	`5969`	`RmgrTable[rmid].rm_startup();`
`5940`	`5970`	`}`
`5941`	`5971`
	`5972`	`+/*`
	`5973`	`+ * Initialize shared replayEndRecPtr and recoveryLastRecPtr.`
	`5974`	`+ *`
	`5975`	`+ * This is slightly confusing if we're starting from an online`
	`5976`	`+ * checkpoint; we've just read and replayed the chekpoint record,`
	`5977`	`+ * but we're going to start replay from its redo pointer, which`
	`5978`	`+ * precedes the location of the checkpoint record itself. So even`
	`5979`	`+ * though the last record we've replayed is indeed ReadRecPtr, we`
	`5980`	`+ * haven't replayed all the preceding records yet. That's OK for`
	`5981`	`+ * the current use of these variables.`
	`5982`	`+ */`
	`5983`	`+SpinLockAcquire(&xlogctl->info_lck);`
	`5984`	`+xlogctl->replayEndRecPtr=ReadRecPtr;`
	`5985`	`+xlogctl->recoveryLastRecPtr=ReadRecPtr;`
	`5986`	`+SpinLockRelease(&xlogctl->info_lck);`
	`5987`	`+`
	`5988`	`+/*`
	`5989`	`+ * Let postmaster know we've started redo now, so that it can`
	`5990`	`+ * launch bgwriter to perform restartpoints. We don't bother`
	`5991`	`+ * during crash recovery as restartpoints can only be performed`
	`5992`	`+ * during archive recovery. And we'd like to keep crash recovery`
	`5993`	`+ * simple, to avoid introducing bugs that could you from`
	`5994`	`+ * recovering after crash.`
	`5995`	`+ *`
	`5996`	`+ * After this point, we can no longer assume that we're the only`
	`5997`	`+ * process in addition to postmaster! Also, fsync requests are`
	`5998`	`+ * subsequently to be handled by the bgwriter, not locally.`
	`5999`	`+ */`
	`6000`	`+if (InArchiveRecovery&&IsUnderPostmaster)`
	`6001`	`+{`
	`6002`	`+SetForwardFsyncRequests();`
	`6003`	`+SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);`
	`6004`	`+bgwriterLaunched= true;`
	`6005`	`+}`
	`6006`	`+`
	`6007`	`+/*`
	`6008`	`+ * Allow read-only connections immediately if we're consistent already.`
	`6009`	`+ */`
	`6010`	`+CheckRecoveryConsistency();`
	`6011`	`+`
`5942`	`6012`	`/*`
`5943`	`6013`	`* Find the first record that logically follows the checkpoint --- it`
`5944`	`6014`	`* might physically precede it, though.`
`@@ -5958,43 +6028,14 @@ StartupXLOG(void)`
`5958`	`6028`	`{`
`5959`	`6029`	`boolrecoveryContinue= true;`
`5960`	`6030`	`boolrecoveryApply= true;`
`5961`		`-boolreachedMinRecoveryPoint= false;`
`5962`	`6031`	`ErrorContextCallbackerrcontext;`
`5963`	`6032`
`5964`		`-/* use volatile pointer to prevent code rearrangement */`
`5965`		`-volatileXLogCtlData*xlogctl=XLogCtl;`
`5966`		`-`
`5967`		`-/* initialize shared replayEndRecPtr and recoveryLastRecPtr */`
`5968`		`-SpinLockAcquire(&xlogctl->info_lck);`
`5969`		`-xlogctl->replayEndRecPtr=ReadRecPtr;`
`5970`		`-xlogctl->recoveryLastRecPtr=ReadRecPtr;`
`5971`		`-SpinLockRelease(&xlogctl->info_lck);`
`5972`		`-`
`5973`	`6033`	`InRedo= true;`
`5974`	`6034`
`5975`	`6035`	`ereport(LOG,`
`5976`	`6036`	`(errmsg("redo starts at %X/%X",`
`5977`	`6037`	`ReadRecPtr.xlogid,ReadRecPtr.xrecoff)));`
`5978`	`6038`
`5979`		`-/*`
`5980`		`- * Let postmaster know we've started redo now, so that it can`
`5981`		`- * launch bgwriter to perform restartpoints. We don't bother`
`5982`		`- * during crash recovery as restartpoints can only be performed`
`5983`		`- * during archive recovery. And we'd like to keep crash recovery`
`5984`		`- * simple, to avoid introducing bugs that could you from`
`5985`		`- * recovering after crash.`
`5986`		`- *`
`5987`		`- * After this point, we can no longer assume that we're the only`
`5988`		`- * process in addition to postmaster! Also, fsync requests are`
`5989`		`- * subsequently to be handled by the bgwriter, not locally.`
`5990`		`- */`
`5991`		`-if (InArchiveRecovery&&IsUnderPostmaster)`
`5992`		`-{`
`5993`		`-SetForwardFsyncRequests();`
`5994`		`-SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);`
`5995`		`-bgwriterLaunched= true;`
`5996`		`-}`
`5997`		`-`
`5998`	`6039`	`/*`
`5999`	`6040`	`* main redo apply loop`
`6000`	`6041`	`*/`
`@@ -6024,32 +6065,8 @@ StartupXLOG(void)`
`6024`	`6065`	`/* Handle interrupt signals of startup process */`
`6025`	`6066`	`HandleStartupProcInterrupts();`
`6026`	`6067`
`6027`		`-/*`
`6028`		`- * Have we passed our safe starting point?`
`6029`		`- */`
`6030`		`-if (!reachedMinRecoveryPoint&&`
`6031`		`-XLByteLE(minRecoveryPoint,EndRecPtr)&&`
`6032`		`-XLogRecPtrIsInvalid(ControlFile->backupStartPoint))`
`6033`		`-{`
`6034`		`-reachedMinRecoveryPoint= true;`
`6035`		`-ereport(LOG,`
`6036`		`-(errmsg("consistent recovery state reached at %X/%X",`
`6037`		`-EndRecPtr.xlogid,EndRecPtr.xrecoff)));`
`6038`		`-}`
`6039`		`-`
`6040`		`-/*`
`6041`		`- * Have we got a valid starting snapshot that will allow`
`6042`		`- * queries to be run? If so, we can tell postmaster that the`
`6043`		`- * database is consistent now, enabling connections.`
`6044`		`- */`
`6045`		`-if (standbyState==STANDBY_SNAPSHOT_READY&&`
`6046`		`-!backendsAllowed&&`
`6047`		`-reachedMinRecoveryPoint&&`
`6048`		`-IsUnderPostmaster)`
`6049`		`-{`
`6050`		`-backendsAllowed= true;`
`6051`		`-SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);`
`6052`		`-}`
	`6068`	`+/* Allow read-only connections if we're consistent now */`
	`6069`	`+CheckRecoveryConsistency();`
`6053`	`6070`
`6054`	`6071`	`/*`
`6055`	`6072`	`* Have we reached our recovery target?`
`@@ -6398,6 +6415,44 @@ StartupXLOG(void)`
`6398`	`6415`	`}`
`6399`	`6416`	`}`
`6400`	`6417`
	`6418`	`+/*`
	`6419`	`+ * Checks if recovery has reached a consistent state. When consistency is`
	`6420`	`+ * reached and we have a valid starting standby snapshot, tell postmaster`
	`6421`	`+ * that it can start accepting read-only connections.`
	`6422`	`+ */`
	`6423`	`+staticvoid`
	`6424`	`+CheckRecoveryConsistency(void)`
	`6425`	`+{`
	`6426`	`+staticboolbackendsAllowed= false;`
	`6427`	`+`
	`6428`	`+/*`
	`6429`	`+ * Have we passed our safe starting point?`
	`6430`	`+ */`
	`6431`	`+if (!reachedMinRecoveryPoint&&`
	`6432`	`+XLByteLE(minRecoveryPoint,EndRecPtr)&&`
	`6433`	`+XLogRecPtrIsInvalid(ControlFile->backupStartPoint))`
	`6434`	`+{`
	`6435`	`+reachedMinRecoveryPoint= true;`
	`6436`	`+ereport(LOG,`
	`6437`	`+(errmsg("consistent recovery state reached at %X/%X",`
	`6438`	`+EndRecPtr.xlogid,EndRecPtr.xrecoff)));`
	`6439`	`+}`
	`6440`	`+`
	`6441`	`+/*`
	`6442`	`+ * Have we got a valid starting snapshot that will allow`
	`6443`	`+ * queries to be run? If so, we can tell postmaster that the`
	`6444`	`+ * database is consistent now, enabling connections.`
	`6445`	`+ */`
	`6446`	`+if (standbyState==STANDBY_SNAPSHOT_READY&&`
	`6447`	`+!backendsAllowed&&`
	`6448`	`+reachedMinRecoveryPoint&&`
	`6449`	`+IsUnderPostmaster)`
	`6450`	`+{`
	`6451`	`+backendsAllowed= true;`
	`6452`	`+SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);`
	`6453`	`+}`
	`6454`	`+}`
	`6455`	`+`
`6401`	`6456`	`/*`
`6402`	`6457`	`* Is the system still in recovery?`
`6403`	`6458`	`*`
`@@ -7657,13 +7712,36 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)`
`7657`	`7712`	`if (standbyState!=STANDBY_DISABLED)`
`7658`	`7713`	`CheckRequiredParameterValues(checkPoint);`
`7659`	`7714`
	`7715`	`+/*`
	`7716`	`+ * If we see a shutdown checkpoint, we know that nothing was`
	`7717`	`+ * running on the master at this point. So fake-up an empty`
	`7718`	`+ * running-xacts record and use that here and now. Recover`
	`7719`	`+ * additional standby state for prepared transactions.`
	`7720`	`+ */`
`7660`	`7721`	`if (standbyState >=STANDBY_INITIALIZED)`
`7661`	`7722`	`{`
	`7723`	`+TransactionId*xids;`
	`7724`	`+intnxids;`
	`7725`	`+TransactionIdoldestActiveXID;`
	`7726`	`+RunningTransactionsDatarunning;`
	`7727`	`+`
	`7728`	`+oldestActiveXID=PrescanPreparedTransactions(&xids,&nxids);`
	`7729`	`+`
`7662`	`7730`	`/*`
`7663`		`- * Remove stale transactions, if any.`
	`7731`	`+ * Construct a RunningTransactions snapshot representing a shut`
	`7732`	`+ * down server, with only prepared transactions still alive.`
	`7733`	`+ * We're never overflowed at this point because all subxids`
	`7734`	`+ * are listed with their parent prepared transactions.`
`7664`	`7735`	`*/`
`7665`		`-ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);`
`7666`		`-StandbyReleaseOldLocks(checkPoint.nextXid);`
	`7736`	`+running.xcnt=nxids;`
	`7737`	`+running.subxid_overflow= false;`
	`7738`	`+running.nextXid=checkPoint.nextXid;`
	`7739`	`+running.oldestRunningXid=oldestActiveXID;`
	`7740`	`+running.xids=xids;`
	`7741`	`+`
	`7742`	`+ProcArrayApplyRecoveryInfo(&running);`
	`7743`	`+`
	`7744`	`+StandbyRecoverPreparedTransactions(true);`
`7667`	`7745`	`}`
`7668`	`7746`
`7669`	`7747`	`/* ControlFile->checkPointCopy always tracks the latest ckpt XID */`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit361bd16

File tree

3 files changed

3 files changed

`‎src/backend/access/transam/twophase.c‎`

`‎src/backend/access/transam/xlog.c‎`

0 commit comments