Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit81c4508

Browse files
committed
Fix race condition between hot standby and restoring a full-page image.
There was a window in RestoreBackupBlock where a page would be zeroed out,but not yet locked. If a backend pinned and locked the page in that window,it saw the zeroed page instead of the old page or new page contents, whichcould lead to missing rows in a result set, or errors.To fix, replace RBM_ZERO with RBM_ZERO_AND_LOCK, which atomically pins,zeroes, and locks the page, if it's not in the buffer cache already.In stable branches, the old RBM_ZERO constant is renamed to RBM_DO_NOT_USE,to avoid breaking any 3rd party extensions that might use RBM_ZERO. Moreimportantly, this avoids renumbering the other enum values, which wouldcause even bigger confusion in extensions that use ReadBufferExtended, buthaven't been recompiled.Backpatch to all supported versions; this has been racy since hot standbywas introduced.
1 parent35fed51 commit81c4508

File tree

5 files changed

+78
-32
lines changed

5 files changed

+78
-32
lines changed

‎src/backend/access/hash/hashpage.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,8 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
155155
if (blkno==P_NEW)
156156
elog(ERROR,"hash AM does not use P_NEW");
157157

158-
buf=ReadBufferExtended(rel,MAIN_FORKNUM,blkno,RBM_ZERO,NULL);
159-
160-
LockBuffer(buf,HASH_WRITE);
158+
buf=ReadBufferExtended(rel,MAIN_FORKNUM,blkno,RBM_ZERO_AND_LOCK,
159+
NULL);
161160

162161
/* ref count and lock type are correct */
163162

@@ -198,11 +197,13 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno, ForkNumber forkNum)
198197
if (BufferGetBlockNumber(buf)!=blkno)
199198
elog(ERROR,"unexpected hash relation size: %u, should be %u",
200199
BufferGetBlockNumber(buf),blkno);
200+
LockBuffer(buf,HASH_WRITE);
201201
}
202202
else
203-
buf=ReadBufferExtended(rel,forkNum,blkno,RBM_ZERO,NULL);
204-
205-
LockBuffer(buf,HASH_WRITE);
203+
{
204+
buf=ReadBufferExtended(rel,forkNum,blkno,RBM_ZERO_AND_LOCK,
205+
NULL);
206+
}
206207

207208
/* ref count and lock type are correct */
208209

‎src/backend/access/heap/heapam.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7556,7 +7556,7 @@ heap_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
75567556
{
75577557
XLogReadBufferForRedoExtended(lsn,record,0,
75587558
target_node,MAIN_FORKNUM,blkno,
7559-
RBM_ZERO, false,&buffer);
7559+
RBM_ZERO_AND_LOCK, false,&buffer);
75607560
page=BufferGetPage(buffer);
75617561
PageInit(page,BufferGetPageSize(buffer),0);
75627562
action=BLK_NEEDS_REDO;
@@ -7683,7 +7683,7 @@ heap_xlog_multi_insert(XLogRecPtr lsn, XLogRecord *record)
76837683
{
76847684
XLogReadBufferForRedoExtended(lsn,record,0,
76857685
rnode,MAIN_FORKNUM,blkno,
7686-
RBM_ZERO, false,&buffer);
7686+
RBM_ZERO_AND_LOCK, false,&buffer);
76877687
page=BufferGetPage(buffer);
76887688
PageInit(page,BufferGetPageSize(buffer),0);
76897689
action=BLK_NEEDS_REDO;
@@ -7876,7 +7876,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
78767876
{
78777877
XLogReadBufferForRedoExtended(lsn,record,1,
78787878
rnode,MAIN_FORKNUM,newblk,
7879-
RBM_ZERO, false,&nbuffer);
7879+
RBM_ZERO_AND_LOCK, false,&nbuffer);
78807880
page= (Page)BufferGetPage(nbuffer);
78817881
PageInit(page,BufferGetPageSize(nbuffer),0);
78827882
newaction=BLK_NEEDS_REDO;

‎src/backend/access/transam/xlogutils.c

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,13 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
287287
* XLogReadBufferForRedoExtended
288288
*Like XLogReadBufferForRedo, but with extra options.
289289
*
290-
* If mode is RBM_ZERO or RBM_ZERO_ON_ERROR, if the page doesn't exist, the
291-
* relation is extended with all-zeroes pages up to the referenced block
292-
* number. In RBM_ZERO mode, the return value is always BLK_NEEDS_REDO.
290+
* In RBM_ZERO_* modes, if the page doesn't exist, the relation is extended
291+
* with all-zeroes pages up to the referenced block number. In
292+
* RBM_ZERO_AND_LOCK and RBM_ZERO_AND_CLEANUP_LOCK modes, the return value
293+
* is always BLK_NEEDS_REDO.
294+
*
295+
* (The RBM_ZERO_AND_CLEANUP_LOCK mode is redundant with the get_cleanup_lock
296+
* parameter. Do not use an inconsistent combination!)
293297
*
294298
* If 'get_cleanup_lock' is true, a "cleanup lock" is acquired on the buffer
295299
* using LockBufferForCleanup(), instead of a regular exclusive lock.
@@ -312,10 +316,13 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
312316
*buf=XLogReadBufferExtended(rnode,forkno,blkno,mode);
313317
if (BufferIsValid(*buf))
314318
{
315-
if (get_cleanup_lock)
316-
LockBufferForCleanup(*buf);
317-
else
318-
LockBuffer(*buf,BUFFER_LOCK_EXCLUSIVE);
319+
if (mode!=RBM_ZERO_AND_LOCK&&mode!=RBM_ZERO_AND_CLEANUP_LOCK)
320+
{
321+
if (get_cleanup_lock)
322+
LockBufferForCleanup(*buf);
323+
else
324+
LockBuffer(*buf,BUFFER_LOCK_EXCLUSIVE);
325+
}
319326
if (lsn <=PageGetLSN(BufferGetPage(*buf)))
320327
returnBLK_DONE;
321328
else
@@ -341,16 +348,17 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
341348
* The returned buffer is exclusively-locked.
342349
*
343350
* For historical reasons, instead of a ReadBufferMode argument, this only
344-
* supports RBM_ZERO (init == true) and RBM_NORMAL (init == false) modes.
351+
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
352+
* modes.
345353
*/
346354
Buffer
347355
XLogReadBuffer(RelFileNodernode,BlockNumberblkno,boolinit)
348356
{
349357
Bufferbuf;
350358

351359
buf=XLogReadBufferExtended(rnode,MAIN_FORKNUM,blkno,
352-
init ?RBM_ZERO :RBM_NORMAL);
353-
if (BufferIsValid(buf))
360+
init ?RBM_ZERO_AND_LOCK :RBM_NORMAL);
361+
if (BufferIsValid(buf)&& !init)
354362
LockBuffer(buf,BUFFER_LOCK_EXCLUSIVE);
355363

356364
returnbuf;
@@ -369,8 +377,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
369377
* dropped or truncated. If we don't see evidence of that later in the WAL
370378
* sequence, we'll complain at the end of WAL replay.)
371379
*
372-
* InRBM_ZERO and RBM_ZERO_ON_ERRORmodes, if the page doesn't exist, the
373-
*relation is extendedwith all-zeroes pages up to the given block number.
380+
* InRBM_ZERO_*modes, if the page doesn't exist, the relation is extended
381+
* with all-zeroes pages up to the given block number.
374382
*
375383
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
376384
* exist, and we don't check for all-zeroes. Thus, no log entry is made
@@ -424,14 +432,20 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
424432
do
425433
{
426434
if (buffer!=InvalidBuffer)
435+
{
436+
if (mode==RBM_ZERO_AND_LOCK||mode==RBM_ZERO_AND_CLEANUP_LOCK)
437+
LockBuffer(buffer,BUFFER_LOCK_UNLOCK);
427438
ReleaseBuffer(buffer);
439+
}
428440
buffer=ReadBufferWithoutRelcache(rnode,forknum,
429441
P_NEW,mode,NULL);
430442
}
431443
while (BufferGetBlockNumber(buffer)<blkno);
432444
/* Handle the corner case that P_NEW returns non-consecutive pages */
433445
if (BufferGetBlockNumber(buffer)!=blkno)
434446
{
447+
if (mode==RBM_ZERO_AND_LOCK||mode==RBM_ZERO_AND_CLEANUP_LOCK)
448+
LockBuffer(buffer,BUFFER_LOCK_UNLOCK);
435449
ReleaseBuffer(buffer);
436450
buffer=ReadBufferWithoutRelcache(rnode,forknum,blkno,
437451
mode,NULL);
@@ -537,12 +551,8 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
537551
Pagepage;
538552

539553
buffer=XLogReadBufferExtended(bkpb.node,bkpb.fork,bkpb.block,
540-
RBM_ZERO);
554+
get_cleanup_lock ?RBM_ZERO_AND_CLEANUP_LOCK :RBM_ZERO_AND_LOCK);
541555
Assert(BufferIsValid(buffer));
542-
if (get_cleanup_lock)
543-
LockBufferForCleanup(buffer);
544-
else
545-
LockBuffer(buffer,BUFFER_LOCK_EXCLUSIVE);
546556

547557
page= (Page)BufferGetPage(buffer);
548558

‎src/backend/storage/buffer/bufmgr.c

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -499,14 +499,19 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
499499
* valid, the page is zeroed instead of throwing an error. This is intended
500500
* for non-critical data, where the caller is prepared to repair errors.
501501
*
502-
* InRBM_ZERO mode, if the page isn't in buffer cache already, it's filled
503-
* with zeros instead of reading it from disk. Useful when the caller is
504-
* going to fill the page from scratch, since this saves I/O and avoids
502+
* InRBM_ZERO_AND_LOCK mode, if the page isn't in buffer cache already, it's
503+
*filledwith zeros instead of reading it from disk. Useful when the caller
504+
*isgoing to fill the page from scratch, since this saves I/O and avoids
505505
* unnecessary failure if the page-on-disk has corrupt page headers.
506+
* The page is returned locked to ensure that the caller has a chance to
507+
* initialize the page before it's made visible to others.
506508
* Caution: do not use this mode to read a page that is beyond the relation's
507509
* current physical EOF; that is likely to cause problems in md.c when
508510
* the page is modified and written out. P_NEW is OK, though.
509511
*
512+
* RBM_ZERO_AND_CLEANUP_LOCK is the same as RBM_ZERO_AND_LOCK, but acquires
513+
* a cleanup-strength lock on the page.
514+
*
510515
* RBM_NORMAL_NO_LOG mode is treated the same as RBM_NORMAL here.
511516
*
512517
* If strategy is not NULL, a nondefault buffer access strategy is used.
@@ -648,6 +653,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
648653
isExtend,
649654
found);
650655

656+
/*
657+
* In RBM_ZERO_AND_LOCK mode the caller expects the page to
658+
* be locked on return.
659+
*/
660+
if (!isLocalBuf)
661+
{
662+
if (mode==RBM_ZERO_AND_LOCK)
663+
LWLockAcquire(bufHdr->content_lock,LW_EXCLUSIVE);
664+
elseif (mode==RBM_ZERO_AND_CLEANUP_LOCK)
665+
LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
666+
}
667+
651668
returnBufferDescriptorGetBuffer(bufHdr);
652669
}
653670

@@ -729,7 +746,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
729746
* Read in the page, unless the caller intends to overwrite it and
730747
* just wants us to allocate a buffer.
731748
*/
732-
if (mode==RBM_ZERO)
749+
if (mode==RBM_ZERO_AND_LOCK||mode==RBM_ZERO_AND_CLEANUP_LOCK)
733750
MemSet((char*)bufBlock,0,BLCKSZ);
734751
else
735752
{
@@ -771,6 +788,22 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
771788
}
772789
}
773790

791+
/*
792+
* In RBM_ZERO_AND_LOCK mode, grab the buffer content lock before marking
793+
* the page as valid, to make sure that no other backend sees the zeroed
794+
* page before the caller has had a chance to initialize it.
795+
*
796+
* Since no-one else can be looking at the page contents yet, there is no
797+
* difference between an exclusive lock and a cleanup-strength lock.
798+
* (Note that we cannot use LockBuffer() of LockBufferForCleanup() here,
799+
* because they assert that the buffer is already valid.)
800+
*/
801+
if ((mode==RBM_ZERO_AND_LOCK||mode==RBM_ZERO_AND_CLEANUP_LOCK)&&
802+
!isLocalBuf)
803+
{
804+
LWLockAcquire(bufHdr->content_lock,LW_EXCLUSIVE);
805+
}
806+
774807
if (isLocalBuf)
775808
{
776809
/* Only need to adjust flags */

‎src/include/storage/bufmgr.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ typedef enum BufferAccessStrategyType
3636
typedefenum
3737
{
3838
RBM_NORMAL,/* Normal read */
39-
RBM_ZERO,/* Don't read from disk, caller will
40-
* initialize */
39+
RBM_ZERO_AND_LOCK,/* Don't read from disk, caller will
40+
* initialize. Also locks the page. */
41+
RBM_ZERO_AND_CLEANUP_LOCK,/* Like RBM_ZERO_AND_LOCK, but locks the page
42+
* in "cleanup" mode */
4143
RBM_ZERO_ON_ERROR,/* Read, but return an all-zeros page on error */
4244
RBM_NORMAL_NO_LOG/* Don't log page as invalid during WAL
4345
* replay; otherwise same as RBM_NORMAL */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp