Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit39e98d9

Browse files
committed
Repair sometimes-incorrect computation of StartUpID after a crash, per
example from Rao Kumar. This is a very corner corner-case, requiringa minimum of three closely-spaced database crashes and an unluckypositioning of the second recovery's checkpoint record before you'd noticeany problem. But the consequences are dire enough that it's a must-fix.
1 parentbaba071 commit39e98d9

File tree

1 file changed

+64
-52
lines changed
  • src/backend/access/transam

1 file changed

+64
-52
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 64 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.115 2003/05/10 18:01:31 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.116 2003/05/22 14:39:28 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -2568,7 +2568,18 @@ StartupXLOG(void)
25682568
ShmemVariableCache->nextOid=checkPoint.nextOid;
25692569
ShmemVariableCache->oidCount=0;
25702570

2571-
ThisStartUpID=checkPoint.ThisStartUpID;
2571+
/*
2572+
* If it was a shutdown checkpoint, then any following WAL entries were
2573+
* created under the next StartUpID; if it was a regular checkpoint then
2574+
* any following WAL entries were created under the same StartUpID.
2575+
* We must replay WAL entries using the same StartUpID they were created
2576+
* under, so temporarily adopt that SUI (see also xlog_redo()).
2577+
*/
2578+
if (wasShutdown)
2579+
ThisStartUpID=checkPoint.ThisStartUpID+1;
2580+
else
2581+
ThisStartUpID=checkPoint.ThisStartUpID;
2582+
25722583
RedoRecPtr=XLogCtl->Insert.RedoRecPtr=
25732584
XLogCtl->SavedRedoRecPtr=checkPoint.redo;
25742585

@@ -2672,55 +2683,21 @@ StartupXLOG(void)
26722683
ControlFile->logSeg=openLogSeg+1;
26732684
Insert=&XLogCtl->Insert;
26742685
Insert->PrevRecord=LastRec;
2686+
XLogCtl->xlblocks[0].xlogid=openLogId;
2687+
XLogCtl->xlblocks[0].xrecoff=
2688+
((EndOfLog.xrecoff-1) /BLCKSZ+1)*BLCKSZ;
26752689

26762690
/*
2677-
* If the next record will go to the new page then initialize for that
2678-
* one.
2691+
* Tricky point here: readBuf contains the *last* block that the
2692+
* LastRec record spans, not the one it starts in.The last block
2693+
* is indeed the one we want to use.
26792694
*/
2680-
if ((BLCKSZ-EndOfLog.xrecoff %BLCKSZ)<SizeOfXLogRecord)
2681-
EndOfLog.xrecoff+= (BLCKSZ-EndOfLog.xrecoff %BLCKSZ);
2682-
if (EndOfLog.xrecoff %BLCKSZ==0)
2683-
{
2684-
XLogRecPtrNewPageEndPtr;
2685-
2686-
NewPageEndPtr=EndOfLog;
2687-
if (NewPageEndPtr.xrecoff >=XLogFileSize)
2688-
{
2689-
/* crossing a logid boundary */
2690-
NewPageEndPtr.xlogid+=1;
2691-
NewPageEndPtr.xrecoff=BLCKSZ;
2692-
}
2693-
else
2694-
NewPageEndPtr.xrecoff+=BLCKSZ;
2695-
XLogCtl->xlblocks[0]=NewPageEndPtr;
2696-
Insert->currpage->xlp_magic=XLOG_PAGE_MAGIC;
2697-
if (InRecovery)
2698-
Insert->currpage->xlp_sui=ThisStartUpID;
2699-
else
2700-
Insert->currpage->xlp_sui=ThisStartUpID+1;
2701-
Insert->currpage->xlp_pageaddr.xlogid=NewPageEndPtr.xlogid;
2702-
Insert->currpage->xlp_pageaddr.xrecoff=NewPageEndPtr.xrecoff-BLCKSZ;
2703-
/* rest of buffer was zeroed in XLOGShmemInit */
2704-
Insert->currpos= (char*)Insert->currpage+SizeOfXLogPHD;
2705-
}
2706-
else
2707-
{
2708-
XLogCtl->xlblocks[0].xlogid=openLogId;
2709-
XLogCtl->xlblocks[0].xrecoff=
2710-
((EndOfLog.xrecoff-1) /BLCKSZ+1)*BLCKSZ;
2711-
2712-
/*
2713-
* Tricky point here: readBuf contains the *last* block that the
2714-
* LastRec record spans, not the one it starts in.The last block
2715-
* is indeed the one we want to use.
2716-
*/
2717-
Assert(readOff== (XLogCtl->xlblocks[0].xrecoff-BLCKSZ) %XLogSegSize);
2718-
memcpy((char*)Insert->currpage,readBuf,BLCKSZ);
2719-
Insert->currpos= (char*)Insert->currpage+
2720-
(EndOfLog.xrecoff+BLCKSZ-XLogCtl->xlblocks[0].xrecoff);
2721-
/* Make sure rest of page is zero */
2722-
memset(Insert->currpos,0,INSERT_FREESPACE(Insert));
2723-
}
2695+
Assert(readOff== (XLogCtl->xlblocks[0].xrecoff-BLCKSZ) %XLogSegSize);
2696+
memcpy((char*)Insert->currpage,readBuf,BLCKSZ);
2697+
Insert->currpos= (char*)Insert->currpage+
2698+
(EndOfLog.xrecoff+BLCKSZ-XLogCtl->xlblocks[0].xrecoff);
2699+
/* Make sure rest of page is zero */
2700+
MemSet(Insert->currpos,0,INSERT_FREESPACE(Insert));
27242701

27252702
LogwrtResult.Write=LogwrtResult.Flush=EndOfLog;
27262703

@@ -2774,9 +2751,22 @@ StartupXLOG(void)
27742751
MyXactMadeXLogEntry= false;
27752752
MyXactMadeTempRelUpdate= false;
27762753

2754+
/*
2755+
* At this point, ThisStartUpID is the largest SUI that we could
2756+
* find evidence for in the WAL entries. But check it against
2757+
* pg_control's latest checkpoint, to make sure that we can't
2758+
* accidentally re-use an already-used SUI.
2759+
*/
2760+
if (ThisStartUpID<ControlFile->checkPointCopy.ThisStartUpID)
2761+
ThisStartUpID=ControlFile->checkPointCopy.ThisStartUpID;
2762+
27772763
/*
27782764
* Perform a new checkpoint to update our recovery activity to disk.
27792765
*
2766+
* Note that we write a shutdown checkpoint. This is correct since
2767+
* the records following it will use SUI one more than what is shown
2768+
* in the checkpoint's ThisStartUpID.
2769+
*
27802770
* In case we had to use the secondary checkpoint, make sure that
27812771
* it will still be shown as the secondary checkpoint after this
27822772
* CreateCheckPoint operation; we don't want the broken primary
@@ -2790,21 +2780,39 @@ StartupXLOG(void)
27902780
*/
27912781
XLogCloseRelationCache();
27922782
}
2783+
else
2784+
{
2785+
/*
2786+
* If we are not doing recovery, then we saw a checkpoint with nothing
2787+
* after it, and we can safely use StartUpID equal to one more than
2788+
* the checkpoint's SUI. But just for paranoia's sake, check against
2789+
* pg_control too.
2790+
*/
2791+
ThisStartUpID=checkPoint.ThisStartUpID;
2792+
if (ThisStartUpID<ControlFile->checkPointCopy.ThisStartUpID)
2793+
ThisStartUpID=ControlFile->checkPointCopy.ThisStartUpID;
2794+
}
27932795

27942796
/*
27952797
* Preallocate additional log files, if wanted.
27962798
*/
27972799
PreallocXlogFiles(EndOfLog);
27982800

2801+
/*
2802+
* Advance StartUpID to one more than the highest value used previously.
2803+
*/
2804+
ThisStartUpID++;
2805+
XLogCtl->ThisStartUpID=ThisStartUpID;
2806+
2807+
/*
2808+
* Okay, we're officially UP.
2809+
*/
27992810
InRecovery= false;
28002811

28012812
ControlFile->state=DB_IN_PRODUCTION;
28022813
ControlFile->time=time(NULL);
28032814
UpdateControlFile();
28042815

2805-
ThisStartUpID++;
2806-
XLogCtl->ThisStartUpID=ThisStartUpID;
2807-
28082816
/* Start up the commit log, too */
28092817
StartupCLOG();
28102818

@@ -3000,7 +3008,7 @@ CreateCheckPoint(bool shutdown, bool force)
30003008
UpdateControlFile();
30013009
}
30023010

3003-
memset(&checkPoint,0,sizeof(checkPoint));
3011+
MemSet(&checkPoint,0,sizeof(checkPoint));
30043012
checkPoint.ThisStartUpID=ThisStartUpID;
30053013
checkPoint.time=time(NULL);
30063014

@@ -3259,6 +3267,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
32593267
ShmemVariableCache->nextXid=checkPoint.nextXid;
32603268
ShmemVariableCache->nextOid=checkPoint.nextOid;
32613269
ShmemVariableCache->oidCount=0;
3270+
/* Any later WAL records should be run with shutdown SUI plus 1 */
3271+
ThisStartUpID=checkPoint.ThisStartUpID+1;
32623272
}
32633273
elseif (info==XLOG_CHECKPOINT_ONLINE)
32643274
{
@@ -3274,6 +3284,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
32743284
ShmemVariableCache->nextOid=checkPoint.nextOid;
32753285
ShmemVariableCache->oidCount=0;
32763286
}
3287+
/* Any later WAL records should be run with the then-active SUI */
3288+
ThisStartUpID=checkPoint.ThisStartUpID;
32773289
}
32783290
}
32793291

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp