Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit361bd16

Browse files
committed
Allow Hot Standby to begin from a shutdown checkpoint.
Patch by Simon Riggs & me
1 parentea9c103 commit361bd16

File tree

3 files changed

+224
-62
lines changed

3 files changed

+224
-62
lines changed

‎src/backend/access/transam/twophase.c

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
*$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.59 2010/02/26 02:00:34 momjian Exp $
10+
*$PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.60 2010/04/13 14:17:46 heikki Exp $
1111
*
1212
* NOTES
1313
*Each global transaction is associated with a global transaction
@@ -1718,6 +1718,89 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
17181718
returnresult;
17191719
}
17201720

1721+
/*
1722+
* StandbyRecoverPreparedTransactions
1723+
*
1724+
* Scan the pg_twophase directory and setup all the required information to
1725+
* allow standby queries to treat prepared transactions as still active.
1726+
* This is never called at the end of recovery - we use
1727+
* RecoverPreparedTransactions() at that point.
1728+
*
1729+
* Currently we simply call SubTransSetParent() for any subxids of prepared
1730+
* transactions. If overwriteOK is true, it's OK if some XIDs have already
1731+
* been marked in pg_subtrans.
1732+
*/
1733+
void
1734+
StandbyRecoverPreparedTransactions(booloverwriteOK)
1735+
{
1736+
DIR*cldir;
1737+
structdirent*clde;
1738+
1739+
cldir=AllocateDir(TWOPHASE_DIR);
1740+
while ((clde=ReadDir(cldir,TWOPHASE_DIR))!=NULL)
1741+
{
1742+
if (strlen(clde->d_name)==8&&
1743+
strspn(clde->d_name,"0123456789ABCDEF")==8)
1744+
{
1745+
TransactionIdxid;
1746+
char*buf;
1747+
TwoPhaseFileHeader*hdr;
1748+
TransactionId*subxids;
1749+
inti;
1750+
1751+
xid= (TransactionId)strtoul(clde->d_name,NULL,16);
1752+
1753+
/* Already processed? */
1754+
if (TransactionIdDidCommit(xid)||TransactionIdDidAbort(xid))
1755+
{
1756+
ereport(WARNING,
1757+
(errmsg("removing stale two-phase state file \"%s\"",
1758+
clde->d_name)));
1759+
RemoveTwoPhaseFile(xid, true);
1760+
continue;
1761+
}
1762+
1763+
/* Read and validate file */
1764+
buf=ReadTwoPhaseFile(xid, true);
1765+
if (buf==NULL)
1766+
{
1767+
ereport(WARNING,
1768+
(errmsg("removing corrupt two-phase state file \"%s\"",
1769+
clde->d_name)));
1770+
RemoveTwoPhaseFile(xid, true);
1771+
continue;
1772+
}
1773+
1774+
/* Deconstruct header */
1775+
hdr= (TwoPhaseFileHeader*)buf;
1776+
if (!TransactionIdEquals(hdr->xid,xid))
1777+
{
1778+
ereport(WARNING,
1779+
(errmsg("removing corrupt two-phase state file \"%s\"",
1780+
clde->d_name)));
1781+
RemoveTwoPhaseFile(xid, true);
1782+
pfree(buf);
1783+
continue;
1784+
}
1785+
1786+
/*
1787+
* Examine subtransaction XIDs ... they should all follow main
1788+
* XID.
1789+
*/
1790+
subxids= (TransactionId*)
1791+
(buf+MAXALIGN(sizeof(TwoPhaseFileHeader)));
1792+
for (i=0;i<hdr->nsubxacts;i++)
1793+
{
1794+
TransactionIdsubxid=subxids[i];
1795+
1796+
Assert(TransactionIdFollows(subxid,xid));
1797+
SubTransSetParent(xid,subxid,overwriteOK);
1798+
}
1799+
}
1800+
}
1801+
FreeDir(cldir);
1802+
}
1803+
17211804
/*
17221805
* RecoverPreparedTransactions
17231806
*

‎src/backend/access/transam/xlog.c

Lines changed: 138 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.393 2010/04/12 10:40:42 heikki Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.394 2010/04/13 14:17:46 heikki Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -496,6 +496,7 @@ static TimeLineID lastPageTLI = 0;
496496
staticXLogRecPtrminRecoveryPoint;/* local copy of
497497
* ControlFile->minRecoveryPoint */
498498
staticboolupdateMinRecoveryPoint= true;
499+
staticboolreachedMinRecoveryPoint= false;
499500

500501
staticboolInRedo= false;
501502

@@ -551,6 +552,7 @@ static void ValidateXLOGDirectoryStructure(void);
551552
staticvoidCleanupBackupHistory(void);
552553
staticvoidUpdateMinRecoveryPoint(XLogRecPtrlsn,boolforce);
553554
staticXLogRecord*ReadRecord(XLogRecPtr*RecPtr,intemode,boolfetching_ckpt);
555+
staticvoidCheckRecoveryConsistency(void);
554556
staticboolValidXLOGHeader(XLogPageHeaderhdr,intemode);
555557
staticXLogRecord*ReadCheckpointRecord(XLogRecPtrRecPtr,intwhichChkpt);
556558
staticList*readTimeLineHistory(TimeLineIDtargetTLI);
@@ -5591,7 +5593,6 @@ StartupXLOG(void)
55915593
uint32freespace;
55925594
TransactionIdoldestActiveXID;
55935595
boolbgwriterLaunched= false;
5594-
boolbackendsAllowed= false;
55955596

55965597
/*
55975598
* Read control file and check XLOG status looks valid.
@@ -5838,6 +5839,8 @@ StartupXLOG(void)
58385839
if (InRecovery)
58395840
{
58405841
intrmid;
5842+
/* use volatile pointer to prevent code rearrangement */
5843+
volatileXLogCtlData*xlogctl=XLogCtl;
58415844

58425845
/*
58435846
* Update pg_control to show that we are recovering and to show the
@@ -5930,6 +5933,33 @@ StartupXLOG(void)
59305933
StartupMultiXact();
59315934

59325935
ProcArrayInitRecoveryInfo(oldestActiveXID);
5936+
5937+
/*
5938+
* If we're beginning at a shutdown checkpoint, we know that
5939+
* nothing was running on the master at this point. So fake-up
5940+
* an empty running-xacts record and use that here and now.
5941+
* Recover additional standby state for prepared transactions.
5942+
*/
5943+
if (wasShutdown)
5944+
{
5945+
RunningTransactionsDatarunning;
5946+
5947+
/*
5948+
* Construct a RunningTransactions snapshot representing a shut
5949+
* down server, with only prepared transactions still alive.
5950+
* We're never overflowed at this point because all subxids
5951+
* are listed with their parent prepared transactions.
5952+
*/
5953+
running.xcnt=nxids;
5954+
running.subxid_overflow= false;
5955+
running.nextXid=checkPoint.nextXid;
5956+
running.oldestRunningXid=oldestActiveXID;
5957+
running.xids=xids;
5958+
5959+
ProcArrayApplyRecoveryInfo(&running);
5960+
5961+
StandbyRecoverPreparedTransactions(false);
5962+
}
59335963
}
59345964

59355965
/* Initialize resource managers */
@@ -5939,6 +5969,46 @@ StartupXLOG(void)
59395969
RmgrTable[rmid].rm_startup();
59405970
}
59415971

5972+
/*
5973+
* Initialize shared replayEndRecPtr and recoveryLastRecPtr.
5974+
*
5975+
* This is slightly confusing if we're starting from an online
5976+
* checkpoint; we've just read and replayed the chekpoint record,
5977+
* but we're going to start replay from its redo pointer, which
5978+
* precedes the location of the checkpoint record itself. So even
5979+
* though the last record we've replayed is indeed ReadRecPtr, we
5980+
* haven't replayed all the preceding records yet. That's OK for
5981+
* the current use of these variables.
5982+
*/
5983+
SpinLockAcquire(&xlogctl->info_lck);
5984+
xlogctl->replayEndRecPtr=ReadRecPtr;
5985+
xlogctl->recoveryLastRecPtr=ReadRecPtr;
5986+
SpinLockRelease(&xlogctl->info_lck);
5987+
5988+
/*
5989+
* Let postmaster know we've started redo now, so that it can
5990+
* launch bgwriter to perform restartpoints. We don't bother
5991+
* during crash recovery as restartpoints can only be performed
5992+
* during archive recovery. And we'd like to keep crash recovery
5993+
* simple, to avoid introducing bugs that could you from
5994+
* recovering after crash.
5995+
*
5996+
* After this point, we can no longer assume that we're the only
5997+
* process in addition to postmaster! Also, fsync requests are
5998+
* subsequently to be handled by the bgwriter, not locally.
5999+
*/
6000+
if (InArchiveRecovery&&IsUnderPostmaster)
6001+
{
6002+
SetForwardFsyncRequests();
6003+
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
6004+
bgwriterLaunched= true;
6005+
}
6006+
6007+
/*
6008+
* Allow read-only connections immediately if we're consistent already.
6009+
*/
6010+
CheckRecoveryConsistency();
6011+
59426012
/*
59436013
* Find the first record that logically follows the checkpoint --- it
59446014
* might physically precede it, though.
@@ -5958,43 +6028,14 @@ StartupXLOG(void)
59586028
{
59596029
boolrecoveryContinue= true;
59606030
boolrecoveryApply= true;
5961-
boolreachedMinRecoveryPoint= false;
59626031
ErrorContextCallbackerrcontext;
59636032

5964-
/* use volatile pointer to prevent code rearrangement */
5965-
volatileXLogCtlData*xlogctl=XLogCtl;
5966-
5967-
/* initialize shared replayEndRecPtr and recoveryLastRecPtr */
5968-
SpinLockAcquire(&xlogctl->info_lck);
5969-
xlogctl->replayEndRecPtr=ReadRecPtr;
5970-
xlogctl->recoveryLastRecPtr=ReadRecPtr;
5971-
SpinLockRelease(&xlogctl->info_lck);
5972-
59736033
InRedo= true;
59746034

59756035
ereport(LOG,
59766036
(errmsg("redo starts at %X/%X",
59776037
ReadRecPtr.xlogid,ReadRecPtr.xrecoff)));
59786038

5979-
/*
5980-
* Let postmaster know we've started redo now, so that it can
5981-
* launch bgwriter to perform restartpoints. We don't bother
5982-
* during crash recovery as restartpoints can only be performed
5983-
* during archive recovery. And we'd like to keep crash recovery
5984-
* simple, to avoid introducing bugs that could you from
5985-
* recovering after crash.
5986-
*
5987-
* After this point, we can no longer assume that we're the only
5988-
* process in addition to postmaster! Also, fsync requests are
5989-
* subsequently to be handled by the bgwriter, not locally.
5990-
*/
5991-
if (InArchiveRecovery&&IsUnderPostmaster)
5992-
{
5993-
SetForwardFsyncRequests();
5994-
SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
5995-
bgwriterLaunched= true;
5996-
}
5997-
59986039
/*
59996040
* main redo apply loop
60006041
*/
@@ -6024,32 +6065,8 @@ StartupXLOG(void)
60246065
/* Handle interrupt signals of startup process */
60256066
HandleStartupProcInterrupts();
60266067

6027-
/*
6028-
* Have we passed our safe starting point?
6029-
*/
6030-
if (!reachedMinRecoveryPoint&&
6031-
XLByteLE(minRecoveryPoint,EndRecPtr)&&
6032-
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
6033-
{
6034-
reachedMinRecoveryPoint= true;
6035-
ereport(LOG,
6036-
(errmsg("consistent recovery state reached at %X/%X",
6037-
EndRecPtr.xlogid,EndRecPtr.xrecoff)));
6038-
}
6039-
6040-
/*
6041-
* Have we got a valid starting snapshot that will allow
6042-
* queries to be run? If so, we can tell postmaster that the
6043-
* database is consistent now, enabling connections.
6044-
*/
6045-
if (standbyState==STANDBY_SNAPSHOT_READY&&
6046-
!backendsAllowed&&
6047-
reachedMinRecoveryPoint&&
6048-
IsUnderPostmaster)
6049-
{
6050-
backendsAllowed= true;
6051-
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
6052-
}
6068+
/* Allow read-only connections if we're consistent now */
6069+
CheckRecoveryConsistency();
60536070

60546071
/*
60556072
* Have we reached our recovery target?
@@ -6398,6 +6415,44 @@ StartupXLOG(void)
63986415
}
63996416
}
64006417

6418+
/*
6419+
* Checks if recovery has reached a consistent state. When consistency is
6420+
* reached and we have a valid starting standby snapshot, tell postmaster
6421+
* that it can start accepting read-only connections.
6422+
*/
6423+
staticvoid
6424+
CheckRecoveryConsistency(void)
6425+
{
6426+
staticboolbackendsAllowed= false;
6427+
6428+
/*
6429+
* Have we passed our safe starting point?
6430+
*/
6431+
if (!reachedMinRecoveryPoint&&
6432+
XLByteLE(minRecoveryPoint,EndRecPtr)&&
6433+
XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
6434+
{
6435+
reachedMinRecoveryPoint= true;
6436+
ereport(LOG,
6437+
(errmsg("consistent recovery state reached at %X/%X",
6438+
EndRecPtr.xlogid,EndRecPtr.xrecoff)));
6439+
}
6440+
6441+
/*
6442+
* Have we got a valid starting snapshot that will allow
6443+
* queries to be run? If so, we can tell postmaster that the
6444+
* database is consistent now, enabling connections.
6445+
*/
6446+
if (standbyState==STANDBY_SNAPSHOT_READY&&
6447+
!backendsAllowed&&
6448+
reachedMinRecoveryPoint&&
6449+
IsUnderPostmaster)
6450+
{
6451+
backendsAllowed= true;
6452+
SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
6453+
}
6454+
}
6455+
64016456
/*
64026457
* Is the system still in recovery?
64036458
*
@@ -7657,13 +7712,36 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
76577712
if (standbyState!=STANDBY_DISABLED)
76587713
CheckRequiredParameterValues(checkPoint);
76597714

7715+
/*
7716+
* If we see a shutdown checkpoint, we know that nothing was
7717+
* running on the master at this point. So fake-up an empty
7718+
* running-xacts record and use that here and now. Recover
7719+
* additional standby state for prepared transactions.
7720+
*/
76607721
if (standbyState >=STANDBY_INITIALIZED)
76617722
{
7723+
TransactionId*xids;
7724+
intnxids;
7725+
TransactionIdoldestActiveXID;
7726+
RunningTransactionsDatarunning;
7727+
7728+
oldestActiveXID=PrescanPreparedTransactions(&xids,&nxids);
7729+
76627730
/*
7663-
* Remove stale transactions, if any.
7731+
* Construct a RunningTransactions snapshot representing a shut
7732+
* down server, with only prepared transactions still alive.
7733+
* We're never overflowed at this point because all subxids
7734+
* are listed with their parent prepared transactions.
76647735
*/
7665-
ExpireOldKnownAssignedTransactionIds(checkPoint.nextXid);
7666-
StandbyReleaseOldLocks(checkPoint.nextXid);
7736+
running.xcnt=nxids;
7737+
running.subxid_overflow= false;
7738+
running.nextXid=checkPoint.nextXid;
7739+
running.oldestRunningXid=oldestActiveXID;
7740+
running.xids=xids;
7741+
7742+
ProcArrayApplyRecoveryInfo(&running);
7743+
7744+
StandbyRecoverPreparedTransactions(true);
76677745
}
76687746

76697747
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp