Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3821d66

Browse files
committed
Fix possible recovery trouble if TRUNCATE overlaps a checkpoint.
If TRUNCATE causes some buffers to be invalidated and thus thecheckpoint does not flush them, TRUNCATE must also ensure that thecorresponding files are truncated on disk. Otherwise, a replayfrom the checkpoint might find that the buffers exist but havethe wrong contents, which may cause replay to fail.Report by Teja Mupparti. Patch by Kyotaro Horiguchi, per a designsuggestion from Heikki Linnakangas, with some changes to thecomments by me. Review of this and a prior patch that approachedthe issue differently by Heikki Linnakangas, Andres Freund, ÁlvaroHerrera, Masahiko Sawada, and Tom Lane.Discussion:http://postgr.es/m/BYAPR06MB6373BF50B469CA393C614257ABF00@BYAPR06MB6373.namprd06.prod.outlook.com
1 parent61a007f commit3821d66

File tree

11 files changed

+117
-29
lines changed

11 files changed

+117
-29
lines changed

‎src/backend/access/transam/multixact.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,8 +3069,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30693069
* crash/basebackup, even though the state of the data directory would
30703070
* require it.
30713071
*/
3072-
Assert(!MyPgXact->delayChkpt);
3073-
MyPgXact->delayChkpt= true;
3072+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)==0);
3073+
MyPgXact->delayChkpt|=DELAY_CHKPT_START;
30743074

30753075
/* WAL log truncation */
30763076
WriteMTruncateXlogRec(newOldestMultiDB,
@@ -3096,7 +3096,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
30963096
/* Then offsets */
30973097
PerformOffsetsTruncation(oldestMulti,newOldestMulti);
30983098

3099-
MyPgXact->delayChkpt= false;
3099+
MyPgXact->delayChkpt&= ~DELAY_CHKPT_START;
31003100

31013101
END_CRIT_SECTION();
31023102
LWLockRelease(MultiXactTruncationLock);

‎src/backend/access/transam/twophase.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
477477
}
478478
pgxact->xid=xid;
479479
pgxact->xmin=InvalidTransactionId;
480-
pgxact->delayChkpt=false;
480+
pgxact->delayChkpt=0;
481481
pgxact->vacuumFlags=0;
482482
proc->pid=0;
483483
proc->databaseId=databaseid;
@@ -1187,7 +1187,8 @@ EndPrepare(GlobalTransaction gxact)
11871187

11881188
START_CRIT_SECTION();
11891189

1190-
MyPgXact->delayChkpt= true;
1190+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)==0);
1191+
MyPgXact->delayChkpt |=DELAY_CHKPT_START;
11911192

11921193
XLogBeginInsert();
11931194
for (record=records.head;record!=NULL;record=record->next)
@@ -1230,7 +1231,7 @@ EndPrepare(GlobalTransaction gxact)
12301231
* checkpoint starting after this will certainly see the gxact as a
12311232
* candidate for fsyncing.
12321233
*/
1233-
MyPgXact->delayChkpt= false;
1234+
MyPgXact->delayChkpt&= ~DELAY_CHKPT_START;
12341235

12351236
/*
12361237
* Remember that we have this GlobalTransaction entry locked for us. If
@@ -2337,7 +2338,8 @@ RecordTransactionCommitPrepared(TransactionId xid,
23372338
START_CRIT_SECTION();
23382339

23392340
/* See notes in RecordTransactionCommit */
2340-
MyPgXact->delayChkpt= true;
2341+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)==0);
2342+
MyPgXact->delayChkpt |=DELAY_CHKPT_START;
23412343

23422344
/*
23432345
* Emit the XLOG commit record. Note that we mark 2PC commits as
@@ -2385,7 +2387,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
23852387
TransactionIdCommitTree(xid,nchildren,children);
23862388

23872389
/* Checkpoint can proceed now */
2388-
MyPgXact->delayChkpt= false;
2390+
MyPgXact->delayChkpt&= ~DELAY_CHKPT_START;
23892391

23902392
END_CRIT_SECTION();
23912393

‎src/backend/access/transam/xact.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,8 +1306,9 @@ RecordTransactionCommit(void)
13061306
* This makes checkpoint's determination of which xacts are delayChkpt
13071307
* a bit fuzzy, but it doesn't matter.
13081308
*/
1309+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)==0);
13091310
START_CRIT_SECTION();
1310-
MyPgXact->delayChkpt= true;
1311+
MyPgXact->delayChkpt|=DELAY_CHKPT_START;
13111312

13121313
SetCurrentTransactionStopTimestamp();
13131314

@@ -1408,7 +1409,7 @@ RecordTransactionCommit(void)
14081409
*/
14091410
if (markXidCommitted)
14101411
{
1411-
MyPgXact->delayChkpt= false;
1412+
MyPgXact->delayChkpt&= ~DELAY_CHKPT_START;
14121413
END_CRIT_SECTION();
14131414
}
14141415

‎src/backend/access/transam/xlog.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8920,18 +8920,30 @@ CreateCheckPoint(int flags)
89208920
* and we will correctly flush the update below. So we cannot miss any
89218921
* xacts we need to wait for.
89228922
*/
8923-
vxids=GetVirtualXIDsDelayingChkpt(&nvxids);
8923+
vxids=GetVirtualXIDsDelayingChkpt(&nvxids,DELAY_CHKPT_START);
89248924
if (nvxids>0)
89258925
{
89268926
do
89278927
{
89288928
pg_usleep(10000L);/* wait for 10 msec */
8929-
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids));
8929+
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids,
8930+
DELAY_CHKPT_START));
89308931
}
89318932
pfree(vxids);
89328933

89338934
CheckPointGuts(checkPoint.redo,flags);
89348935

8936+
vxids=GetVirtualXIDsDelayingChkpt(&nvxids,DELAY_CHKPT_COMPLETE);
8937+
if (nvxids>0)
8938+
{
8939+
do
8940+
{
8941+
pg_usleep(10000L);/* wait for 10 msec */
8942+
}while (HaveVirtualXIDsDelayingChkpt(vxids,nvxids,
8943+
DELAY_CHKPT_COMPLETE));
8944+
}
8945+
pfree(vxids);
8946+
89358947
/*
89368948
* Take a snapshot of running transactions and write this to WAL. This
89378949
* allows us to reconstruct the state of running transactions during

‎src/backend/access/transam/xloginsert.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
899899
/*
900900
* Ensure no checkpoint can change our view of RedoRecPtr.
901901
*/
902-
Assert(MyPgXact->delayChkpt);
902+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)!=0);
903903

904904
/*
905905
* Update RedoRecPtr so that we can make the right decision

‎src/backend/catalog/storage.c

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include"catalog/storage.h"
3030
#include"catalog/storage_xlog.h"
3131
#include"storage/freespace.h"
32+
#include"storage/proc.h"
3233
#include"storage/smgr.h"
3334
#include"utils/memutils.h"
3435
#include"utils/rel.h"
@@ -252,6 +253,22 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
252253
if (vm)
253254
visibilitymap_truncate(rel,nblocks);
254255

256+
/*
257+
* Make sure that a concurrent checkpoint can't complete while truncation
258+
* is in progress.
259+
*
260+
* The truncation operation might drop buffers that the checkpoint
261+
* otherwise would have flushed. If it does, then it's essential that
262+
* the files actually get truncated on disk before the checkpoint record
263+
* is written. Otherwise, if reply begins from that checkpoint, the
264+
* to-be-truncated blocks might still exist on disk but have older
265+
* contents than expected, which can cause replay to fail. It's OK for
266+
* the blocks to not exist on disk at all, but not for them to have the
267+
* wrong contents.
268+
*/
269+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_COMPLETE)==0);
270+
MyPgXact->delayChkpt |=DELAY_CHKPT_COMPLETE;
271+
255272
/*
256273
* We WAL-log the truncation before actually truncating, which means
257274
* trouble if the truncation fails. If we then crash, the WAL replay
@@ -290,8 +307,15 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
290307
XLogFlush(lsn);
291308
}
292309

293-
/* Do the real work */
310+
/*
311+
* This will first remove any buffers from the buffer pool that should no
312+
* longer exist after truncation is complete, and then truncate the
313+
* corresponding files on disk.
314+
*/
294315
smgrtruncate(rel->rd_smgr,MAIN_FORKNUM,nblocks);
316+
317+
/* We've done all the critical work, so checkpoints are OK now. */
318+
MyPgXact->delayChkpt &= ~DELAY_CHKPT_COMPLETE;
295319
}
296320

297321
/*

‎src/backend/storage/buffer/bufmgr.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3514,7 +3514,9 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
35143514
* essential that CreateCheckpoint waits for virtual transactions
35153515
* rather than full transactionids.
35163516
*/
3517-
MyPgXact->delayChkpt=delayChkpt= true;
3517+
Assert((MyPgXact->delayChkpt&DELAY_CHKPT_START)==0);
3518+
MyPgXact->delayChkpt |=DELAY_CHKPT_START;
3519+
delayChkpt= true;
35183520
lsn=XLogSaveBufferForHint(buffer,buffer_std);
35193521
}
35203522

@@ -3547,7 +3549,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
35473549
UnlockBufHdr(bufHdr,buf_state);
35483550

35493551
if (delayChkpt)
3550-
MyPgXact->delayChkpt= false;
3552+
MyPgXact->delayChkpt&= ~DELAY_CHKPT_START;
35513553

35523554
if (dirtied)
35533555
{

‎src/backend/storage/ipc/procarray.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,10 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid)
434434
pgxact->xmin=InvalidTransactionId;
435435
/* must be cleared with xid/xmin: */
436436
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
437-
pgxact->delayChkpt= false;/* be sure this is cleared in abort */
437+
438+
/* be sure this is cleared in abort */
439+
pgxact->delayChkpt=0;
440+
438441
proc->recoveryConflictPending= false;
439442

440443
Assert(pgxact->nxids==0);
@@ -456,7 +459,10 @@ ProcArrayEndTransactionInternal(PGPROC *proc, PGXACT *pgxact,
456459
pgxact->xmin=InvalidTransactionId;
457460
/* must be cleared with xid/xmin: */
458461
pgxact->vacuumFlags &= ~PROC_VACUUM_STATE_MASK;
459-
pgxact->delayChkpt= false;/* be sure this is cleared in abort */
462+
463+
/* be sure this is cleared in abort */
464+
pgxact->delayChkpt=0;
465+
460466
proc->recoveryConflictPending= false;
461467

462468
/* Clear the subtransaction-XID cache too while holding the lock */
@@ -2261,7 +2267,8 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
22612267
* delaying checkpoint because they have critical actions in progress.
22622268
*
22632269
* Constructs an array of VXIDs of transactions that are currently in commit
2264-
* critical sections, as shown by having delayChkpt set in their PGXACT.
2270+
* critical sections, as shown by having specified delayChkpt bits set in their
2271+
* PGXACT.
22652272
*
22662273
* Returns a palloc'd array that should be freed by the caller.
22672274
* *nvxids is the number of valid entries.
@@ -2275,13 +2282,15 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly)
22752282
* for clearing of delayChkpt to propagate is unimportant for correctness.
22762283
*/
22772284
VirtualTransactionId*
2278-
GetVirtualXIDsDelayingChkpt(int*nvxids)
2285+
GetVirtualXIDsDelayingChkpt(int*nvxids,inttype)
22792286
{
22802287
VirtualTransactionId*vxids;
22812288
ProcArrayStruct*arrayP=procArray;
22822289
intcount=0;
22832290
intindex;
22842291

2292+
Assert(type!=0);
2293+
22852294
/* allocate what's certainly enough result space */
22862295
vxids= (VirtualTransactionId*)
22872296
palloc(sizeof(VirtualTransactionId)*arrayP->maxProcs);
@@ -2294,7 +2303,7 @@ GetVirtualXIDsDelayingChkpt(int *nvxids)
22942303
PGPROC*proc=&allProcs[pgprocno];
22952304
PGXACT*pgxact=&allPgXact[pgprocno];
22962305

2297-
if (pgxact->delayChkpt)
2306+
if ((pgxact->delayChkpt&type)!=0)
22982307
{
22992308
VirtualTransactionIdvxid;
23002309

@@ -2320,12 +2329,14 @@ GetVirtualXIDsDelayingChkpt(int *nvxids)
23202329
* those numbers should be small enough for it not to be a problem.
23212330
*/
23222331
bool
2323-
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids)
2332+
HaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids,inttype)
23242333
{
23252334
boolresult= false;
23262335
ProcArrayStruct*arrayP=procArray;
23272336
intindex;
23282337

2338+
Assert(type!=0);
2339+
23292340
LWLockAcquire(ProcArrayLock,LW_SHARED);
23302341

23312342
for (index=0;index<arrayP->numProcs;index++)
@@ -2337,7 +2348,8 @@ HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids)
23372348

23382349
GET_VXID_FROM_PGPROC(vxid,*proc);
23392350

2340-
if (pgxact->delayChkpt&&VirtualTransactionIdIsValid(vxid))
2351+
if ((pgxact->delayChkpt&type)!=0&&
2352+
VirtualTransactionIdIsValid(vxid))
23412353
{
23422354
inti;
23432355

‎src/backend/storage/lmgr/proc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ InitProcess(void)
397397
MyProc->roleId=InvalidOid;
398398
MyProc->tempNamespaceId=InvalidOid;
399399
MyProc->isBackgroundWorker=IsBackgroundWorker;
400-
MyPgXact->delayChkpt=false;
400+
MyPgXact->delayChkpt=0;
401401
MyPgXact->vacuumFlags=0;
402402
/* NB -- autovac launcher intentionally does not set IS_AUTOVACUUM */
403403
if (IsAutoVacuumWorkerProcess())
@@ -579,7 +579,7 @@ InitAuxiliaryProcess(void)
579579
MyProc->roleId=InvalidOid;
580580
MyProc->tempNamespaceId=InvalidOid;
581581
MyProc->isBackgroundWorker=IsBackgroundWorker;
582-
MyPgXact->delayChkpt=false;
582+
MyPgXact->delayChkpt=0;
583583
MyPgXact->vacuumFlags=0;
584584
MyProc->lwWaiting= false;
585585
MyProc->lwWaitMode=0;

‎src/include/storage/proc.h

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,41 @@ struct XidCache
7676
*/
7777
#defineINVALID_PGPROCNOPG_INT32_MAX
7878

79+
/*
80+
* Flags for PGPROC.delayChkpt
81+
*
82+
* These flags can be used to delay the start or completion of a checkpoint
83+
* for short periods. A flag is in effect if the corresponding bit is set in
84+
* the PGPROC of any backend.
85+
*
86+
* For our purposes here, a checkpoint has three phases: (1) determine the
87+
* location to which the redo pointer will be moved, (2) write all the
88+
* data durably to disk, and (3) WAL-log the checkpoint.
89+
*
90+
* Setting DELAY_CHKPT_START prevents the system from moving from phase 1
91+
* to phase 2. This is useful when we are performing a WAL-logged modification
92+
* of data that will be flushed to disk in phase 2. By setting this flag
93+
* before writing WAL and clearing it after we've both written WAL and
94+
* performed the corresponding modification, we ensure that if the WAL record
95+
* is inserted prior to the new redo point, the corresponding data changes will
96+
* also be flushed to disk before the checkpoint can complete. (In the
97+
* extremely common case where the data being modified is in shared buffers
98+
* and we acquire an exclusive content lock on the relevant buffers before
99+
* writing WAL, this mechanism is not needed, because phase 2 will block
100+
* until we release the content lock and then flush the modified data to
101+
* disk.)
102+
*
103+
* Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2
104+
* to phase 3. This is useful if we are performing a WAL-logged operation that
105+
* might invalidate buffers, such as relation truncation. In this case, we need
106+
* to ensure that any buffers which were invalidated and thus not flushed by
107+
* the checkpoint are actaully destroyed on disk. Replay can cope with a file
108+
* or block that doesn't exist, but not with a block that has the wrong
109+
* contents.
110+
*/
111+
#defineDELAY_CHKPT_START(1<<0)
112+
#defineDELAY_CHKPT_COMPLETE(1<<1)
113+
79114
/*
80115
* Each backend has a PGPROC struct in shared memory. There is also a list of
81116
* currently-unused PGPROC structs that will be reallocated to new backends.
@@ -232,8 +267,7 @@ typedef struct PGXACT
232267

233268
uint8vacuumFlags;/* vacuum-related flags, see above */
234269
booloverflowed;
235-
booldelayChkpt;/* true if this proc delays checkpoint start;
236-
* previously called InCommit */
270+
intdelayChkpt;/* for DELAY_CHKPT_* flags */
237271

238272
uint8nxids;
239273
}PGXACT;

‎src/include/storage/procarray.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,9 @@ extern TransactionId GetOldestXmin(Relation rel, int flags);
9292
externTransactionIdGetOldestActiveTransactionId(void);
9393
externTransactionIdGetOldestSafeDecodingTransactionId(boolcatalogOnly);
9494

95-
externVirtualTransactionId*GetVirtualXIDsDelayingChkpt(int*nvxids);
96-
externboolHaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,intnvxids);
95+
externVirtualTransactionId*GetVirtualXIDsDelayingChkpt(int*nvxids,inttype);
96+
externboolHaveVirtualXIDsDelayingChkpt(VirtualTransactionId*vxids,
97+
intnvxids,inttype);
9798

9899
externPGPROC*BackendPidGetProc(intpid);
99100
externPGPROC*BackendPidGetProcWithLock(intpid);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp