Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0a87394

Browse files
committed
Fix the torn-page hazard for PITR base backups by forcing full page writes
to occur between pg_start_backup() and pg_stop_backup(), even if the GUCsetting full_page_writes is OFF. Per discussion, doing this in combinationwith the already-existing checkpoint during pg_start_backup() should ensuresafety against partial page updates being included in the backup. We donot have to force full page writes to occur during normal PITR operation,as I had first feared.
1 parent8e7aaeb commit0a87394

File tree

1 file changed

+156
-78
lines changed
  • src/backend/access/transam

1 file changed

+156
-78
lines changed

‎src/backend/access/transam/xlog.c

Lines changed: 156 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.235 2006/04/14 20:27:24 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.236 2006/04/17 18:55:05 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -344,6 +344,7 @@ typedef struct XLogCtlInsert
344344
XLogPageHeadercurrpage;/* points to header of block in cache */
345345
char*currpos;/* current insertion point in cache */
346346
XLogRecPtrRedoRecPtr;/* current redo point for insertions */
347+
boolforcePageWrites;/* forcing full-page writes for PITR? */
347348
}XLogCtlInsert;
348349

349350
/*
@@ -466,7 +467,7 @@ static void exitArchiveRecovery(TimeLineID endTLI,
466467
uint32endLogId,uint32endLogSeg);
467468
staticboolrecoveryStopsHere(XLogRecord*record,bool*includeThis);
468469

469-
staticboolXLogCheckBuffer(XLogRecData*rdata,
470+
staticboolXLogCheckBuffer(XLogRecData*rdata,booldoPageWrites,
470471
XLogRecPtr*lsn,BkpBlock*bkpb);
471472
staticboolAdvanceXLInsertBuffer(void);
472473
staticvoidXLogWrite(XLogwrtRqstWriteRqst,boolflexible);
@@ -544,6 +545,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
544545
unsignedi;
545546
XLogwrtRqstLogwrtRqst;
546547
boolupdrqst;
548+
booldoPageWrites;
547549
boolno_tran= (rmid==RM_XLOG_ID) ? true : false;
548550

549551
if (info&XLR_INFO_MASK)
@@ -591,6 +593,14 @@ begin:;
591593
dtbuf_bkp[i]= false;
592594
}
593595

596+
/*
597+
* Decide if we need to do full-page writes in this XLOG record: true if
598+
* full_page_writes is on or we have a PITR request for it. Since we
599+
* don't yet have the insert lock, forcePageWrites could change under us,
600+
* but we'll recheck it once we have the lock.
601+
*/
602+
doPageWrites=fullPageWrites||Insert->forcePageWrites;
603+
594604
INIT_CRC32(rdata_crc);
595605
len=0;
596606
for (rdt=rdata;;)
@@ -622,7 +632,8 @@ begin:;
622632
{
623633
/* OK, put it in this slot */
624634
dtbuf[i]=rdt->buffer;
625-
if (XLogCheckBuffer(rdt,&(dtbuf_lsn[i]),&(dtbuf_xlg[i])))
635+
if (XLogCheckBuffer(rdt,doPageWrites,
636+
&(dtbuf_lsn[i]),&(dtbuf_xlg[i])))
626637
{
627638
dtbuf_bkp[i]= true;
628639
rdt->data=NULL;
@@ -735,30 +746,51 @@ begin:;
735746
* Check to see if my RedoRecPtr is out of date. If so, may have to go
736747
* back and recompute everything. This can only happen just after a
737748
* checkpoint, so it's better to be slow in this case and fast otherwise.
749+
*
750+
* If we aren't doing full-page writes then RedoRecPtr doesn't actually
751+
* affect the contents of the XLOG record, so we'll update our local
752+
* copy but not force a recomputation.
738753
*/
739754
if (!XLByteEQ(RedoRecPtr,Insert->RedoRecPtr))
740755
{
741756
Assert(XLByteLT(RedoRecPtr,Insert->RedoRecPtr));
742757
RedoRecPtr=Insert->RedoRecPtr;
743758

744-
for (i=0;i<XLR_MAX_BKP_BLOCKS;i++)
759+
if (doPageWrites)
745760
{
746-
if (dtbuf[i]==InvalidBuffer)
747-
continue;
748-
if (dtbuf_bkp[i]== false&&
749-
XLByteLE(dtbuf_lsn[i],RedoRecPtr))
761+
for (i=0;i<XLR_MAX_BKP_BLOCKS;i++)
750762
{
751-
/*
752-
* Oops, this buffer now needs to be backed up, but we didn't
753-
* think so above.Start over.
754-
*/
755-
LWLockRelease(WALInsertLock);
756-
END_CRIT_SECTION();
757-
gotobegin;
763+
if (dtbuf[i]==InvalidBuffer)
764+
continue;
765+
if (dtbuf_bkp[i]== false&&
766+
XLByteLE(dtbuf_lsn[i],RedoRecPtr))
767+
{
768+
/*
769+
* Oops, this buffer now needs to be backed up, but we
770+
* didn't think so above. Start over.
771+
*/
772+
LWLockRelease(WALInsertLock);
773+
END_CRIT_SECTION();
774+
gotobegin;
775+
}
758776
}
759777
}
760778
}
761779

780+
/*
781+
* Also check to see if forcePageWrites was just turned on; if we
782+
* weren't already doing full-page writes then go back and recompute.
783+
* (If it was just turned off, we could recompute the record without
784+
* full pages, but we choose not to bother.)
785+
*/
786+
if (Insert->forcePageWrites&& !doPageWrites)
787+
{
788+
/* Oops, must redo it with full-page data */
789+
LWLockRelease(WALInsertLock);
790+
END_CRIT_SECTION();
791+
gotobegin;
792+
}
793+
762794
/*
763795
* Make additional rdata chain entries for the backup blocks, so that we
764796
* don't need to special-case them in the write loop. Note that we have
@@ -966,7 +998,7 @@ begin:;
966998
* save the buffer's LSN at *lsn.
967999
*/
9681000
staticbool
969-
XLogCheckBuffer(XLogRecData*rdata,
1001+
XLogCheckBuffer(XLogRecData*rdata,booldoPageWrites,
9701002
XLogRecPtr*lsn,BkpBlock*bkpb)
9711003
{
9721004
PageHeaderpage;
@@ -980,7 +1012,7 @@ XLogCheckBuffer(XLogRecData *rdata,
9801012
*/
9811013
*lsn=page->pd_lsn;
9821014

983-
if (fullPageWrites&&
1015+
if (doPageWrites&&
9841016
XLByteLE(page->pd_lsn,RedoRecPtr))
9851017
{
9861018
/*
@@ -5651,76 +5683,120 @@ pg_start_backup(PG_FUNCTION_ARGS)
56515683
PointerGetDatum(backupid)));
56525684

56535685
/*
5654-
* Force a CHECKPOINT.This is not strictly necessary, but it seems like
5655-
* a good idea to minimize the amount of past WAL needed to use the
5656-
* backup.Also, this guarantees that two successive backup runs will
5657-
* have different checkpoint positions and hence different history file
5658-
* names, even if nothing happened in between.
5686+
* Mark backup active in shared memory. We must do full-page WAL writes
5687+
* during an on-line backup even if not doing so at other times, because
5688+
* it's quite possible for the backup dump to obtain a "torn" (partially
5689+
* written) copy of a database page if it reads the page concurrently
5690+
* with our write to the same page. This can be fixed as long as the
5691+
* first write to the page in the WAL sequence is a full-page write.
5692+
* Hence, we turn on forcePageWrites and then force a CHECKPOINT, to
5693+
* ensure there are no dirty pages in shared memory that might get
5694+
* dumped while the backup is in progress without having a corresponding
5695+
* WAL record. (Once the backup is complete, we need not force full-page
5696+
* writes anymore, since we expect that any pages not modified during
5697+
* the backup interval must have been correctly captured by the backup.)
5698+
*
5699+
* We must hold WALInsertLock to change the value of forcePageWrites,
5700+
* to ensure adequate interlocking against XLogInsert().
56595701
*/
5660-
RequestCheckpoint(true, false);
5702+
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
5703+
if (XLogCtl->Insert.forcePageWrites)
5704+
{
5705+
LWLockRelease(WALInsertLock);
5706+
ereport(ERROR,
5707+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5708+
errmsg("a backup is already in progress"),
5709+
errhint("Run pg_stop_backup() and try again.")));
5710+
}
5711+
XLogCtl->Insert.forcePageWrites= true;
5712+
LWLockRelease(WALInsertLock);
56615713

5662-
/*
5663-
* Now we need to fetch the checkpoint record location, and also its REDO
5664-
* pointer. The oldest point in WAL that would be needed to restore
5665-
* starting from the checkpoint is precisely the REDO pointer.
5666-
*/
5667-
LWLockAcquire(ControlFileLock,LW_EXCLUSIVE);
5668-
checkpointloc=ControlFile->checkPoint;
5669-
startpoint=ControlFile->checkPointCopy.redo;
5670-
LWLockRelease(ControlFileLock);
5714+
/* Use a TRY block to ensure we release forcePageWrites if fail below */
5715+
PG_TRY();
5716+
{
5717+
/*
5718+
* Force a CHECKPOINT. Aside from being necessary to prevent torn
5719+
* page problems, this guarantees that two successive backup runs will
5720+
* have different checkpoint positions and hence different history
5721+
* file names, even if nothing happened in between.
5722+
*/
5723+
RequestCheckpoint(true, false);
56715724

5672-
XLByteToSeg(startpoint,_logId,_logSeg);
5673-
XLogFileName(xlogfilename,ThisTimeLineID,_logId,_logSeg);
5725+
/*
5726+
* Now we need to fetch the checkpoint record location, and also its
5727+
* REDO pointer. The oldest point in WAL that would be needed to
5728+
* restore starting from the checkpoint is precisely the REDO pointer.
5729+
*/
5730+
LWLockAcquire(ControlFileLock,LW_EXCLUSIVE);
5731+
checkpointloc=ControlFile->checkPoint;
5732+
startpoint=ControlFile->checkPointCopy.redo;
5733+
LWLockRelease(ControlFileLock);
56745734

5675-
/*
5676-
* We deliberately use strftime/localtime not the src/timezone functions,
5677-
* so that backup labels will consistently be recorded in the same
5678-
* timezone regardless of TimeZone setting. This matches elog.c's
5679-
* practice.
5680-
*/
5681-
stamp_time=time(NULL);
5682-
strftime(strfbuf,sizeof(strfbuf),
5683-
"%Y-%m-%d %H:%M:%S %Z",
5684-
localtime(&stamp_time));
5735+
XLByteToSeg(startpoint,_logId,_logSeg);
5736+
XLogFileName(xlogfilename,ThisTimeLineID,_logId,_logSeg);
56855737

5686-
/*
5687-
* Check for existing backup label --- implies a backup is already running
5688-
*/
5689-
if (stat(BACKUP_LABEL_FILE,&stat_buf)!=0)
5690-
{
5691-
if (errno!=ENOENT)
5738+
/*
5739+
* We deliberately use strftime/localtime not the src/timezone
5740+
* functions, so that backup labels will consistently be recorded in
5741+
* the same timezone regardless of TimeZone setting. This matches
5742+
* elog.c's practice.
5743+
*/
5744+
stamp_time=time(NULL);
5745+
strftime(strfbuf,sizeof(strfbuf),
5746+
"%Y-%m-%d %H:%M:%S %Z",
5747+
localtime(&stamp_time));
5748+
5749+
/*
5750+
* Check for existing backup label --- implies a backup is already
5751+
* running. (XXX given that we checked forcePageWrites above, maybe
5752+
* it would be OK to just unlink any such label file?)
5753+
*/
5754+
if (stat(BACKUP_LABEL_FILE,&stat_buf)!=0)
5755+
{
5756+
if (errno!=ENOENT)
5757+
ereport(ERROR,
5758+
(errcode_for_file_access(),
5759+
errmsg("could not stat file \"%s\": %m",
5760+
BACKUP_LABEL_FILE)));
5761+
}
5762+
else
5763+
ereport(ERROR,
5764+
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5765+
errmsg("a backup is already in progress"),
5766+
errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
5767+
BACKUP_LABEL_FILE)));
5768+
5769+
/*
5770+
* Okay, write the file
5771+
*/
5772+
fp=AllocateFile(BACKUP_LABEL_FILE,"w");
5773+
if (!fp)
56925774
ereport(ERROR,
56935775
(errcode_for_file_access(),
5694-
errmsg("could not stat file \"%s\": %m",
5776+
errmsg("could not create file \"%s\": %m",
5777+
BACKUP_LABEL_FILE)));
5778+
fprintf(fp,"START WAL LOCATION: %X/%X (file %s)\n",
5779+
startpoint.xlogid,startpoint.xrecoff,xlogfilename);
5780+
fprintf(fp,"CHECKPOINT LOCATION: %X/%X\n",
5781+
checkpointloc.xlogid,checkpointloc.xrecoff);
5782+
fprintf(fp,"START TIME: %s\n",strfbuf);
5783+
fprintf(fp,"LABEL: %s\n",backupidstr);
5784+
if (fflush(fp)||ferror(fp)||FreeFile(fp))
5785+
ereport(ERROR,
5786+
(errcode_for_file_access(),
5787+
errmsg("could not write file \"%s\": %m",
56955788
BACKUP_LABEL_FILE)));
56965789
}
5697-
else
5698-
ereport(ERROR,
5699-
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5700-
errmsg("a backup is already in progress"),
5701-
errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.",
5702-
BACKUP_LABEL_FILE)));
5790+
PG_CATCH();
5791+
{
5792+
/* Turn off forcePageWrites on failure */
5793+
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
5794+
XLogCtl->Insert.forcePageWrites= false;
5795+
LWLockRelease(WALInsertLock);
57035796

5704-
/*
5705-
* Okay, write the file
5706-
*/
5707-
fp=AllocateFile(BACKUP_LABEL_FILE,"w");
5708-
if (!fp)
5709-
ereport(ERROR,
5710-
(errcode_for_file_access(),
5711-
errmsg("could not create file \"%s\": %m",
5712-
BACKUP_LABEL_FILE)));
5713-
fprintf(fp,"START WAL LOCATION: %X/%X (file %s)\n",
5714-
startpoint.xlogid,startpoint.xrecoff,xlogfilename);
5715-
fprintf(fp,"CHECKPOINT LOCATION: %X/%X\n",
5716-
checkpointloc.xlogid,checkpointloc.xrecoff);
5717-
fprintf(fp,"START TIME: %s\n",strfbuf);
5718-
fprintf(fp,"LABEL: %s\n",backupidstr);
5719-
if (fflush(fp)||ferror(fp)||FreeFile(fp))
5720-
ereport(ERROR,
5721-
(errcode_for_file_access(),
5722-
errmsg("could not write file \"%s\": %m",
5723-
BACKUP_LABEL_FILE)));
5797+
PG_RE_THROW();
5798+
}
5799+
PG_END_TRY();
57245800

57255801
/*
57265802
* We're done. As a convenience, return the starting WAL offset.
@@ -5766,10 +5842,12 @@ pg_stop_backup(PG_FUNCTION_ARGS)
57665842

57675843
/*
57685844
* Get the current end-of-WAL position; it will be unsafe to use this dump
5769-
* to restore to a point in advance of this time.
5845+
* to restore to a point in advance of this time. We can also clear
5846+
* forcePageWrites here.
57705847
*/
57715848
LWLockAcquire(WALInsertLock,LW_EXCLUSIVE);
57725849
INSERT_RECPTR(stoppoint,Insert,Insert->curridx);
5850+
XLogCtl->Insert.forcePageWrites= false;
57735851
LWLockRelease(WALInsertLock);
57745852

57755853
XLByteToSeg(stoppoint,_logId,_logSeg);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp