77 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.248 2006/08/17 23:04:05 tgl Exp $
10+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -312,10 +312,8 @@ static XLogRecPtr RedoRecPtr;
312312 * new log file.
313313 *
314314 * CheckpointLock: must be held to do a checkpoint (ensures only one
315- * checkpointer at a time; even though the postmaster won't launch
316- * parallel checkpoint processes, we need this because manual checkpoints
317- * could be launched simultaneously). XXX now that all checkpoints are
318- * done by the bgwriter, isn't this lock redundant?
315+ * checkpointer at a time; currently, with all checkpoints done by the
316+ * bgwriter, this is just pro forma).
319317 *
320318 *----------
321319 */
@@ -363,9 +361,13 @@ typedef struct XLogCtlData
363361{
364362/* Protected by WALInsertLock: */
365363XLogCtlInsert Insert ;
364+
366365/* Protected by info_lck: */
367366XLogwrtRqst LogwrtRqst ;
368367XLogwrtResult LogwrtResult ;
368+ uint32 ckptXidEpoch ;/* nextXID & epoch of latest checkpoint */
369+ TransactionId ckptXid ;
370+
369371/* Protected by WALWriteLock: */
370372XLogCtlWrite Write ;
371373
@@ -380,7 +382,7 @@ typedef struct XLogCtlData
380382int XLogCacheBlck ;/* highest allocated xlog buffer index */
381383TimeLineID ThisTimeLineID ;
382384
383- slock_t info_lck ;/* locks sharedLogwrtRqst/LogwrtResult */
385+ slock_t info_lck ;/* locks sharedvariables shown above */
384386}XLogCtlData ;
385387
386388static XLogCtlData * XLogCtl = NULL ;
@@ -4086,6 +4088,7 @@ BootStrapXLOG(void)
40864088checkPoint .redo .xrecoff = SizeOfXLogLongPHD ;
40874089checkPoint .undo = checkPoint .redo ;
40884090checkPoint .ThisTimeLineID = ThisTimeLineID ;
4091+ checkPoint .nextXidEpoch = 0 ;
40894092checkPoint .nextXid = FirstNormalTransactionId ;
40904093checkPoint .nextOid = FirstBootstrapObjectId ;
40914094checkPoint .nextMulti = FirstMultiXactId ;
@@ -4752,8 +4755,9 @@ StartupXLOG(void)
47524755checkPoint .undo .xlogid ,checkPoint .undo .xrecoff ,
47534756wasShutdown ?"TRUE" :"FALSE" )));
47544757ereport (LOG ,
4755- (errmsg ("next transaction ID: %u; next OID: %u" ,
4756- checkPoint .nextXid ,checkPoint .nextOid )));
4758+ (errmsg ("next transaction ID: %u/%u; next OID: %u" ,
4759+ checkPoint .nextXidEpoch ,checkPoint .nextXid ,
4760+ checkPoint .nextOid )));
47574761ereport (LOG ,
47584762(errmsg ("next MultiXactId: %u; next MultiXactOffset: %u" ,
47594763checkPoint .nextMulti ,checkPoint .nextMultiOffset )));
@@ -5135,6 +5139,10 @@ StartupXLOG(void)
51355139/* start the archive_timeout timer running */
51365140XLogCtl -> Write .lastSegSwitchTime = ControlFile -> time ;
51375141
5142+ /* initialize shared-memory copy of latest checkpoint XID/epoch */
5143+ XLogCtl -> ckptXidEpoch = ControlFile -> checkPointCopy .nextXidEpoch ;
5144+ XLogCtl -> ckptXid = ControlFile -> checkPointCopy .nextXid ;
5145+
51385146/* Start up the commit log and related stuff, too */
51395147StartupCLOG ();
51405148StartupSUBTRANS (oldestActiveXID );
@@ -5364,6 +5372,46 @@ GetRecentNextXid(void)
53645372return ControlFile -> checkPointCopy .nextXid ;
53655373}
53665374
5375+ /*
5376+ * GetNextXidAndEpoch - get the current nextXid value and associated epoch
5377+ *
5378+ * This is exported for use by code that would like to have 64-bit XIDs.
5379+ * We don't really support such things, but all XIDs within the system
5380+ * can be presumed "close to" the result, and thus the epoch associated
5381+ * with them can be determined.
5382+ */
5383+ void
5384+ GetNextXidAndEpoch (TransactionId * xid ,uint32 * epoch )
5385+ {
5386+ uint32 ckptXidEpoch ;
5387+ TransactionId ckptXid ;
5388+ TransactionId nextXid ;
5389+
5390+ /* Must read checkpoint info first, else have race condition */
5391+ {
5392+ /* use volatile pointer to prevent code rearrangement */
5393+ volatile XLogCtlData * xlogctl = XLogCtl ;
5394+
5395+ SpinLockAcquire (& xlogctl -> info_lck );
5396+ ckptXidEpoch = xlogctl -> ckptXidEpoch ;
5397+ ckptXid = xlogctl -> ckptXid ;
5398+ SpinLockRelease (& xlogctl -> info_lck );
5399+ }
5400+
5401+ /* Now fetch current nextXid */
5402+ nextXid = ReadNewTransactionId ();
5403+
5404+ /*
5405+ * nextXid is certainly logically later than ckptXid. So if it's
5406+ * numerically less, it must have wrapped into the next epoch.
5407+ */
5408+ if (nextXid < ckptXid )
5409+ ckptXidEpoch ++ ;
5410+
5411+ * xid = nextXid ;
5412+ * epoch = ckptXidEpoch ;
5413+ }
5414+
53675415/*
53685416 * This must be called ONCE during postmaster or standalone-backend shutdown
53695417 */
@@ -5531,6 +5579,11 @@ CreateCheckPoint(bool shutdown, bool force)
55315579checkPoint .nextXid = ShmemVariableCache -> nextXid ;
55325580LWLockRelease (XidGenLock );
55335581
5582+ /* Increase XID epoch if we've wrapped around since last checkpoint */
5583+ checkPoint .nextXidEpoch = ControlFile -> checkPointCopy .nextXidEpoch ;
5584+ if (checkPoint .nextXid < ControlFile -> checkPointCopy .nextXid )
5585+ checkPoint .nextXidEpoch ++ ;
5586+
55345587LWLockAcquire (OidGenLock ,LW_SHARED );
55355588checkPoint .nextOid = ShmemVariableCache -> nextOid ;
55365589if (!shutdown )
@@ -5600,6 +5653,17 @@ CreateCheckPoint(bool shutdown, bool force)
56005653UpdateControlFile ();
56015654LWLockRelease (ControlFileLock );
56025655
5656+ /* Update shared-memory copy of checkpoint XID/epoch */
5657+ {
5658+ /* use volatile pointer to prevent code rearrangement */
5659+ volatile XLogCtlData * xlogctl = XLogCtl ;
5660+
5661+ SpinLockAcquire (& xlogctl -> info_lck );
5662+ xlogctl -> ckptXidEpoch = checkPoint .nextXidEpoch ;
5663+ xlogctl -> ckptXid = checkPoint .nextXid ;
5664+ SpinLockRelease (& xlogctl -> info_lck );
5665+ }
5666+
56035667/*
56045668 * We are now done with critical updates; no need for system panic if we
56055669 * have trouble while fooling with offline log segments.
@@ -5803,6 +5867,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
58035867MultiXactSetNextMXact (checkPoint .nextMulti ,
58045868checkPoint .nextMultiOffset );
58055869
5870+ /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
5871+ ControlFile -> checkPointCopy .nextXidEpoch = checkPoint .nextXidEpoch ;
5872+ ControlFile -> checkPointCopy .nextXid = checkPoint .nextXid ;
5873+
58065874/*
58075875 * TLI may change in a shutdown checkpoint, but it shouldn't decrease
58085876 */
@@ -5836,6 +5904,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
58365904}
58375905MultiXactAdvanceNextMXact (checkPoint .nextMulti ,
58385906checkPoint .nextMultiOffset );
5907+
5908+ /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
5909+ ControlFile -> checkPointCopy .nextXidEpoch = checkPoint .nextXidEpoch ;
5910+ ControlFile -> checkPointCopy .nextXid = checkPoint .nextXid ;
5911+
58395912/* TLI should not change in an on-line checkpoint */
58405913if (checkPoint .ThisTimeLineID != ThisTimeLineID )
58415914ereport (PANIC ,
@@ -5861,10 +5934,11 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
58615934CheckPoint * checkpoint = (CheckPoint * )rec ;
58625935
58635936appendStringInfo (buf ,"checkpoint: redo %X/%X; undo %X/%X; "
5864- "tli %u; xid %u; oid %u; multi %u; offset %u; %s" ,
5937+ "tli %u; xid %u/%u ; oid %u; multi %u; offset %u; %s" ,
58655938checkpoint -> redo .xlogid ,checkpoint -> redo .xrecoff ,
58665939checkpoint -> undo .xlogid ,checkpoint -> undo .xrecoff ,
5867- checkpoint -> ThisTimeLineID ,checkpoint -> nextXid ,
5940+ checkpoint -> ThisTimeLineID ,
5941+ checkpoint -> nextXidEpoch ,checkpoint -> nextXid ,
58685942checkpoint -> nextOid ,
58695943checkpoint -> nextMulti ,
58705944checkpoint -> nextMultiOffset ,