Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit412ad7a

Browse files
committed
Fix possible recovery trouble if TRUNCATE overlaps a checkpoint.
If TRUNCATE causes some buffers to be invalidated and thus thecheckpoint does not flush them, TRUNCATE must also ensure that thecorresponding files are truncated on disk. Otherwise, a replayfrom the checkpoint might find that the buffers exist but havethe wrong contents, which may cause replay to fail.Report by Teja Mupparti. Patch by Kyotaro Horiguchi, per a designsuggestion from Heikki Linnakangas, with some changes to thecomments by me. Review of this and a prior patch that approachedthe issue differently by Heikki Linnakangas, Andres Freund, ÁlvaroHerrera, Masahiko Sawada, and Tom Lane.Discussion:http://postgr.es/m/BYAPR06MB6373BF50B469CA393C614257ABF00@BYAPR06MB6373.namprd06.prod.outlook.com
1 parent86459b3 commit412ad7a

File tree

11 files changed

+120
-28
lines changed

11 files changed

+120
-28
lines changed

‎src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3088,8 +3088,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30883088
* crash/basebackup, even though the state of the data directory would
30893089
* require it.
30903090
*/
3091-
Assert(!MyProc->delayChkpt);
3092-
MyProc->delayChkpt= true;
3091+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)==0);
3092+
MyProc->delayChkpt|=DELAY_CHKPT_START;
30933093

30943094
/* WAL log truncation */
30953095
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3115,7 +3115,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
31153115
/* Then offsets */
31163116
PerformOffsetsTruncation(oldestMulti,newOldestMulti);
31173117

3118-
MyProc->delayChkpt= false;
3118+
MyProc->delayChkpt&= ~DELAY_CHKPT_START;
31193119

31203120
END_CRIT_SECTION();
31213121
LWLockRelease(MultiXactTruncationLock);

‎src/backend/access/transam/twophase.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
475475
}
476476
proc->xid=xid;
477477
Assert(proc->xmin==InvalidTransactionId);
478-
proc->delayChkpt=false;
478+
proc->delayChkpt=0;
479479
proc->statusFlags=0;
480480
proc->pid=0;
481481
proc->databaseId=databaseid;
@@ -1164,7 +1164,8 @@ EndPrepare(GlobalTransaction gxact)
11641164

11651165
START_CRIT_SECTION();
11661166

1167-
MyProc->delayChkpt= true;
1167+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)==0);
1168+
MyProc->delayChkpt |=DELAY_CHKPT_START;
11681169

11691170
XLogBeginInsert();
11701171
for (record=records.head;record!=NULL;record=record->next)
@@ -1207,7 +1208,7 @@ EndPrepare(GlobalTransaction gxact)
12071208
* checkpoint starting after this will certainly see the gxact as a
12081209
* candidate for fsyncing.
12091210
*/
1210-
MyProc->delayChkpt= false;
1211+
MyProc->delayChkpt&= ~DELAY_CHKPT_START;
12111212

12121213
/*
12131214
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2266,7 +2267,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
22662267
START_CRIT_SECTION();
22672268

22682269
/* See notes in RecordTransactionCommit */
2269-
MyProc->delayChkpt= true;
2270+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)==0);
2271+
MyProc->delayChkpt |=DELAY_CHKPT_START;
22702272

22712273
/*
22722274
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2314,7 +2316,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
23142316
TransactionIdCommitTree(xid,nchildren,children);
23152317

23162318
/* Checkpoint can proceed now */
2317-
MyProc->delayChkpt= false;
2319+
MyProc->delayChkpt&= ~DELAY_CHKPT_START;
23182320

23192321
END_CRIT_SECTION();
23202322

‎src/backend/access/transam/xact.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,8 +1387,9 @@ RecordTransactionCommit(void)
13871387
* This makes checkpoint's determination of which xacts are delayChkpt
13881388
* a bit fuzzy, but it doesn't matter.
13891389
*/
1390+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)==0);
13901391
START_CRIT_SECTION();
1391-
MyProc->delayChkpt= true;
1392+
MyProc->delayChkpt|=DELAY_CHKPT_START;
13921393

13931394
SetCurrentTransactionStopTimestamp();
13941395

@@ -1489,7 +1490,7 @@ RecordTransactionCommit(void)
14891490
*/
14901491
if (markXidCommitted)
14911492
{
1492-
MyProc->delayChkpt= false;
1493+
MyProc->delayChkpt&= ~DELAY_CHKPT_START;
14931494
END_CRIT_SECTION();
14941495
}
14951496

‎src/backend/access/transam/xlog.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6517,18 +6517,30 @@ CreateCheckPoint(int flags)
65176517
* and we will correctly flush the update below. So we cannot miss any
65186518
* xacts we need to wait for.
65196519
*/
6520-
vxids=GetVirtualXIDsDelayingChkpt(&nvxids);
6520+
vxids=GetVirtualXIDsDelayingChkpt(&nvxids,DELAY_CHKPT_START);
65216521
if (nvxids>0)
65226522
{
65236523
do
65246524
{
65256525
pg_usleep(10000L);/* wait for 10 msec */
6526-
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids));
6526+
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids,
6527+
DELAY_CHKPT_START));
65276528
}
65286529
pfree(vxids);
65296530

65306531
CheckPointGuts(checkPoint.redo,flags);
65316532

6533+
vxids=GetVirtualXIDsDelayingChkpt(&nvxids,DELAY_CHKPT_COMPLETE);
6534+
if (nvxids>0)
6535+
{
6536+
do
6537+
{
6538+
pg_usleep(10000L);/* wait for 10 msec */
6539+
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids,
6540+
DELAY_CHKPT_COMPLETE));
6541+
}
6542+
pfree(vxids);
6543+
65326544
/*
65336545
* Take a snapshot of running transactions and write this to WAL. This
65346546
* allows us to reconstruct the state of running transactions during

‎src/backend/access/transam/xloginsert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
10111011
/*
10121012
* Ensure no checkpoint can change our view of RedoRecPtr.
10131013
*/
1014-
Assert(MyProc->delayChkpt);
1014+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)!=0);
10151015

10161016
/*
10171017
* Update RedoRecPtr so that we can make the right decision

‎src/backend/catalog/storage.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,22 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
325325

326326
RelationPreTruncate(rel);
327327

328+
/*
329+
* Make sure that a concurrent checkpoint can't complete while truncation
330+
* is in progress.
331+
*
332+
* The truncation operation might drop buffers that the checkpoint
333+
* otherwise would have flushed. If it does, then it's essential that
334+
* the files actually get truncated on disk before the checkpoint record
335+
* is written. Otherwise, if reply begins from that checkpoint, the
336+
* to-be-truncated blocks might still exist on disk but have older
337+
* contents than expected, which can cause replay to fail. It's OK for
338+
* the blocks to not exist on disk at all, but not for them to have the
339+
* wrong contents.
340+
*/
341+
Assert((MyProc->delayChkpt&DELAY_CHKPT_COMPLETE)==0);
342+
MyProc->delayChkpt |=DELAY_CHKPT_COMPLETE;
343+
328344
/*
329345
* We WAL-log the truncation before actually truncating, which means
330346
* trouble if the truncation fails. If we then crash, the WAL replay
@@ -363,13 +379,24 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
363379
XLogFlush(lsn);
364380
}
365381

366-
/* Do the real work to truncate relation forks */
382+
/*
383+
* This will first remove any buffers from the buffer pool that should no
384+
* longer exist after truncation is complete, and then truncate the
385+
* corresponding files on disk.
386+
*/
367387
smgrtruncate(RelationGetSmgr(rel),forks,nforks,blocks);
368388

389+
/* We've done all the critical work, so checkpoints are OK now. */
390+
MyProc->delayChkpt &= ~DELAY_CHKPT_COMPLETE;
391+
369392
/*
370393
* Update upper-level FSM pages to account for the truncation. This is
371394
* important because the just-truncated pages were likely marked as
372395
* all-free, and would be preferentially selected.
396+
*
397+
* NB: There's no point in delaying checkpoints until this is done.
398+
* Because the FSM is not WAL-logged, we have to be prepared for the
399+
* possibility of corruption after a crash anyway.
373400
*/
374401
if (need_fsm_vacuum)
375402
FreeSpaceMapVacuumRange(rel,nblocks,InvalidBlockNumber);

‎src/backend/storage/buffer/bufmgr.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3911,7 +3911,9 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
39113911
* essential that CreateCheckPoint waits for virtual transactions
39123912
* rather than full transactionids.
39133913
*/
3914-
MyProc->delayChkpt=delayChkpt= true;
3914+
Assert((MyProc->delayChkpt&DELAY_CHKPT_START)==0);
3915+
MyProc->delayChkpt |=DELAY_CHKPT_START;
3916+
delayChkpt= true;
39153917
lsn=XLogSaveBufferForHint(buffer,buffer_std);
39163918
}
39173919

@@ -3944,7 +3946,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
39443946
UnlockBufHdr(bufHdr,buf_state);
39453947

39463948
if (delayChkpt)
3947-
MyProc->delayChkpt= false;
3949+
MyProc->delayChkpt&= ~DELAY_CHKPT_START;
39483950

39493951
if (dirtied)
39503952
{

‎src/backend/storage/ipc/procarray.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,10 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
698698

699699
proc->lxid=InvalidLocalTransactionId;
700700
proc->xmin=InvalidTransactionId;
701-
proc->delayChkpt= false;/* be sure this is cleared in abort */
701+
702+
/* be sure this is cleared in abort */
703+
proc->delayChkpt=0;
704+
702705
proc->recoveryConflictPending= false;
703706

704707
/* must be cleared with xid/xmin: */
@@ -737,7 +740,10 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid)
737740
proc->xid=InvalidTransactionId;
738741
proc->lxid=InvalidLocalTransactionId;
739742
proc->xmin=InvalidTransactionId;
740-
proc->delayChkpt= false;/* be sure this is cleared in abort */
743+
744+
/* be sure this is cleared in abort */
745+
proc->delayChkpt=0;
746+
741747
proc->recoveryConflictPending= false;
742748

743749
/* must be cleared with xid/xmin: */
@@ -3053,7 +3059,8 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
30533059
* delaying checkpoint because they have critical actions in progress.
30543060
*
30553061
* Constructs an array of VXIDs of transactions that are currently in commit
3056-
* critical sections, as shown by having delayChkpt set in their PGPROC.
3062+
* critical sections, as shown by having specified delayChkpt bits set in their
3063+
* PGPROC.
30573064
*
30583065
* Returns a palloc'd array that should be freed by the caller.
30593066
* *nvxids is the number of valid entries.
@@ -3067,13 +3074,15 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
30673074
* for clearing of delayChkpt to propagate is unimportant for correctness.
30683075
*/
30693076
VirtualTransactionId*
3070-
GetVirtualXIDsDelayingChkpt(int*nvxids)
3077+
GetVirtualXIDsDelayingChkpt(int*nvxids,inttype)
30713078
{
30723079
VirtualTransactionId*vxids;
30733080
ProcArrayStruct*arrayP=procArray;
30743081
intcount=0;
30753082
intindex;
30763083

3084+
Assert(type!=0);
3085+
30773086
/* allocate what's certainly enough result space */
30783087
vxids= (VirtualTransactionId*)
30793088
palloc(sizeof(VirtualTransactionId)*arrayP->maxProcs);
@@ -3085,7 +3094,7 @@ GetVirtualXIDsDelayingChkpt(int *nvxids)
30853094
intpgprocno=arrayP->pgprocnos[index];
30863095
PGPROC*proc=&allProcs[pgprocno];
30873096

3088-
if (proc->delayChkpt)
3097+
if ((proc->delayChkpt&type)!=0)
30893098
{
30903099
VirtualTransactionIdvxid;
30913100

@@ -3111,12 +3120,14 @@ GetVirtualXIDsDelayingChkpt(int *nvxids)
31113120
* those numbers should be small enough for it not to be a problem.
31123121
*/
31133122
bool
3114-
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids)
3123+
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids,inttype)
31153124
{
31163125
boolresult= false;
31173126
ProcArrayStruct*arrayP=procArray;
31183127
intindex;
31193128

3129+
Assert(type!=0);
3130+
31203131
LWLockAcquire(ProcArrayLock,LW_SHARED);
31213132

31223133
for (index=0;index<arrayP->numProcs;index++)
@@ -3127,7 +3138,8 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
31273138

31283139
GET_VXID_FROM_PGPROC(vxid,*proc);
31293140

3130-
if (proc->delayChkpt&&VirtualTransactionIdIsValid(vxid))
3141+
if ((proc->delayChkpt&type)!=0&&
3142+
VirtualTransactionIdIsValid(vxid))
31313143
{
31323144
inti;
31333145

‎src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ InitProcess(void)
393393
MyProc->roleId=InvalidOid;
394394
MyProc->tempNamespaceId=InvalidOid;
395395
MyProc->isBackgroundWorker=IsBackgroundWorker;
396-
MyProc->delayChkpt=false;
396+
MyProc->delayChkpt=0;
397397
MyProc->statusFlags=0;
398398
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
399399
if (IsAutoVacuumWorkerProcess())
@@ -578,7 +578,7 @@ InitAuxiliaryProcess(void)
578578
MyProc->roleId=InvalidOid;
579579
MyProc->tempNamespaceId=InvalidOid;
580580
MyProc->isBackgroundWorker=IsBackgroundWorker;
581-
MyProc->delayChkpt=false;
581+
MyProc->delayChkpt=0;
582582
MyProc->statusFlags=0;
583583
MyProc->lwWaiting= false;
584584
MyProc->lwWaitMode=0;

‎src/include/storage/proc.h

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,41 @@ struct XidCache
8686
*/
8787
#defineINVALID_PGPROCNOPG_INT32_MAX
8888

89+
/*
90+
* Flags for PGPROC.delayChkpt
91+
*
92+
* These flags can be used to delay the start or completion of a checkpoint
93+
* for short periods. A flag is in effect if the corresponding bit is set in
94+
* the PGPROC of any backend.
95+
*
96+
* For our purposes here, a checkpoint has three phases: (1) determine the
97+
* location to which the redo pointer will be moved, (2) write all the
98+
* data durably to disk, and (3) WAL-log the checkpoint.
99+
*
100+
* Setting DELAY_CHKPT_START prevents the system from moving from phase 1
101+
* to phase 2. This is useful when we are performing a WAL-logged modification
102+
* of data that will be flushed to disk in phase 2. By setting this flag
103+
* before writing WAL and clearing it after we've both written WAL and
104+
* performed the corresponding modification, we ensure that if the WAL record
105+
* is inserted prior to the new redo point, the corresponding data changes will
106+
* also be flushed to disk before the checkpoint can complete. (In the
107+
* extremely common case where the data being modified is in shared buffers
108+
* and we acquire an exclusive content lock on the relevant buffers before
109+
* writing WAL, this mechanism is not needed, because phase 2 will block
110+
* until we release the content lock and then flush the modified data to
111+
* disk.)
112+
*
113+
* Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
114+
* to phase 3. This is useful if we are performing a WAL-logged operation that
115+
* might invalidate buffers, such as relation truncation. In this case, we need
116+
* to ensure that any buffers which were invalidated and thus not flushed by
117+
* the checkpoint are actaully destroyed on disk. Replay can cope with a file
118+
* or block that doesn't exist, but not with a block that has the wrong
119+
* contents.
120+
*/
121+
#defineDELAY_CHKPT_START(1<<0)
122+
#defineDELAY_CHKPT_COMPLETE(1<<1)
123+
89124
typedefenum
90125
{
91126
PROC_WAIT_STATUS_OK,
@@ -191,7 +226,7 @@ struct PGPROC
191226
pg_atomic_uint64waitStart;/* time at which wait for lock acquisition
192227
* started */
193228

194-
booldelayChkpt;/*true if this proc delays checkpoint start */
229+
intdelayChkpt;/*for DELAY_CHKPT_* flags */
195230

196231
uint8statusFlags;/* this backend's status flags, see PROC_*
197232
* above. mirrored in

‎src/include/storage/procarray.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,9 @@ extern TransactionId GetOldestActiveTransactionId(void);
5959
externTransactionIdGetOldestSafeDecodingTransactionId(boolcatalogOnly);
6060
externvoidGetReplicationHorizons(TransactionId*slot_xmin,TransactionId*catalog_xmin);
6161

62-
externVirtualTransactionId*GetVirtualXIDsDelayingChkpt(int*nvxids);
63-
externboolHaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids);
62+
externVirtualTransactionId*GetVirtualXIDsDelayingChkpt(int*nvxids,inttype);
63+
externboolHaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,
64+
intnvxids,inttype);
6465

6566
externPGPROC*BackendPidGetProc(intpid);
6667
externPGPROC*BackendPidGetProcWithLock(intpid);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp