Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd526575

Browse files
committed
Make large sequential scans and VACUUMs work in a limited-size "ring" of
buffers, rather than blowing out the whole shared-buffer arena. Aside fromavoiding cache spoliation, this fixes the problem that VACUUM formerly tendedto cause a WAL flush for every page it modified, because we had it hacked touse only a single buffer. Those flushes will now occur only once perring-ful. The exact ring size, and the threshold for seqscans to switch intothe ring usage pattern, remain under debate; but the infrastructure seemsdone. The key bit of infrastructure is a new optional BufferAccessStrategyobject that can be passed to ReadBuffer operations; this replaces the formerStrategyHintVacuum API.This patch also changes the buffer usage-count methodology a bit: we nowadvance usage_count when first pinning a buffer, rather than when lastunpinning it. To preserve the behavior that a buffer's lifetime starts todecrease when it's released, the clock sweep code is modified to not decrementusage_count of pinned buffers.Work not done in this commit: teach GiST and GIN indexes to use the vacuumBufferAccessStrategy for vacuum-driven fetches.Original patch by Simon, reworked by Heikki and again by Tom.
1 parent0a6f2ee commitd526575

File tree

24 files changed

+723
-263
lines changed

24 files changed

+723
-263
lines changed

‎src/backend/access/hash/hash.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
1212
*
1313
* NOTES
1414
* This file contains only the public interface routines.
@@ -547,8 +547,9 @@ hashbulkdelete(PG_FUNCTION_ARGS)
547547

548548
vacuum_delay_point();
549549

550-
buf=_hash_getbuf(rel,blkno,HASH_WRITE,
551-
LH_BUCKET_PAGE |LH_OVERFLOW_PAGE);
550+
buf=_hash_getbuf_with_strategy(rel,blkno,HASH_WRITE,
551+
LH_BUCKET_PAGE |LH_OVERFLOW_PAGE,
552+
info->strategy);
552553
page=BufferGetPage(buf);
553554
opaque= (HashPageOpaque)PageGetSpecialPointer(page);
554555
Assert(opaque->hasho_bucket==cur_bucket);
@@ -596,7 +597,8 @@ hashbulkdelete(PG_FUNCTION_ARGS)
596597

597598
/* If we deleted anything, try to compact free space */
598599
if (bucket_dirty)
599-
_hash_squeezebucket(rel,cur_bucket,bucket_blkno);
600+
_hash_squeezebucket(rel,cur_bucket,bucket_blkno,
601+
info->strategy);
600602

601603
/* Release bucket lock */
602604
_hash_droplock(rel,bucket_blkno,HASH_EXCLUSIVE);

‎src/backend/access/hash/hashovfl.c

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
1212
*
1313
* NOTES
1414
* Overflow pages look like ordinary relation pages.
@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
362362
*Remove this overflow page from its bucket's chain, and mark the page as
363363
*free. On entry, ovflbuf is write-locked; it is released before exiting.
364364
*
365+
*Since this function is invoked in VACUUM, we provide an access strategy
366+
*parameter that controls fetches of the bucket pages.
367+
*
365368
*Returns the block number of the page that followed the given page
366369
*in the bucket, or InvalidBlockNumber if no following page.
367370
*
@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
370373
*on the bucket, too.
371374
*/
372375
BlockNumber
373-
_hash_freeovflpage(Relationrel,Bufferovflbuf)
376+
_hash_freeovflpage(Relationrel,Bufferovflbuf,
377+
BufferAccessStrategybstrategy)
374378
{
375379
HashMetaPagemetap;
376380
Buffermetabuf;
@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
413417
*/
414418
if (BlockNumberIsValid(prevblkno))
415419
{
416-
Bufferprevbuf=_hash_getbuf(rel,prevblkno,HASH_WRITE,
417-
LH_BUCKET_PAGE |LH_OVERFLOW_PAGE);
420+
Bufferprevbuf=_hash_getbuf_with_strategy(rel,
421+
prevblkno,
422+
HASH_WRITE,
423+
LH_BUCKET_PAGE |LH_OVERFLOW_PAGE,
424+
bstrategy);
418425
Pageprevpage=BufferGetPage(prevbuf);
419426
HashPageOpaqueprevopaque= (HashPageOpaque)PageGetSpecialPointer(prevpage);
420427

@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
424431
}
425432
if (BlockNumberIsValid(nextblkno))
426433
{
427-
Buffernextbuf=_hash_getbuf(rel,nextblkno,HASH_WRITE,
428-
LH_OVERFLOW_PAGE);
434+
Buffernextbuf=_hash_getbuf_with_strategy(rel,
435+
nextblkno,
436+
HASH_WRITE,
437+
LH_OVERFLOW_PAGE,
438+
bstrategy);
429439
Pagenextpage=BufferGetPage(nextbuf);
430440
HashPageOpaquenextopaque= (HashPageOpaque)PageGetSpecialPointer(nextpage);
431441

@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
434444
_hash_wrtbuf(rel,nextbuf);
435445
}
436446

447+
/* Note: bstrategy is intentionally not used for metapage and bitmap */
448+
437449
/* Read the metapage so we can determine which bitmap page to use */
438450
metabuf=_hash_getbuf(rel,HASH_METAPAGE,HASH_READ,LH_META_PAGE);
439451
metap= (HashMetaPage)BufferGetPage(metabuf);
@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
558570
*
559571
*Caller must hold exclusive lock on the target bucket. This allows
560572
*us to safely lock multiple pages in the bucket.
573+
*
574+
*Since this function is invoked in VACUUM, we provide an access strategy
575+
*parameter that controls fetches of the bucket pages.
561576
*/
562577
void
563578
_hash_squeezebucket(Relationrel,
564579
Bucketbucket,
565-
BlockNumberbucket_blkno)
580+
BlockNumberbucket_blkno,
581+
BufferAccessStrategybstrategy)
566582
{
567583
Bufferwbuf;
568584
Bufferrbuf=0;
@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
581597
* start squeezing into the base bucket page.
582598
*/
583599
wblkno=bucket_blkno;
584-
wbuf=_hash_getbuf(rel,wblkno,HASH_WRITE,LH_BUCKET_PAGE);
600+
wbuf=_hash_getbuf_with_strategy(rel,
601+
wblkno,
602+
HASH_WRITE,
603+
LH_BUCKET_PAGE,
604+
bstrategy);
585605
wpage=BufferGetPage(wbuf);
586606
wopaque= (HashPageOpaque)PageGetSpecialPointer(wpage);
587607

@@ -595,16 +615,22 @@ _hash_squeezebucket(Relation rel,
595615
}
596616

597617
/*
598-
* find the last page in the bucket chain by starting at the base bucket
599-
* page and working forward.
618+
* Find the last page in the bucket chain by starting at the base bucket
619+
* page and working forward. Note: we assume that a hash bucket chain is
620+
* usually smaller than the buffer ring being used by VACUUM, else using
621+
* the access strategy here would be counterproductive.
600622
*/
601623
ropaque=wopaque;
602624
do
603625
{
604626
rblkno=ropaque->hasho_nextblkno;
605627
if (ropaque!=wopaque)
606628
_hash_relbuf(rel,rbuf);
607-
rbuf=_hash_getbuf(rel,rblkno,HASH_WRITE,LH_OVERFLOW_PAGE);
629+
rbuf=_hash_getbuf_with_strategy(rel,
630+
rblkno,
631+
HASH_WRITE,
632+
LH_OVERFLOW_PAGE,
633+
bstrategy);
608634
rpage=BufferGetPage(rbuf);
609635
ropaque= (HashPageOpaque)PageGetSpecialPointer(rpage);
610636
Assert(ropaque->hasho_bucket==bucket);
@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
644670
return;
645671
}
646672

647-
wbuf=_hash_getbuf(rel,wblkno,HASH_WRITE,LH_OVERFLOW_PAGE);
673+
wbuf=_hash_getbuf_with_strategy(rel,
674+
wblkno,
675+
HASH_WRITE,
676+
LH_OVERFLOW_PAGE,
677+
bstrategy);
648678
wpage=BufferGetPage(wbuf);
649679
wopaque= (HashPageOpaque)PageGetSpecialPointer(wpage);
650680
Assert(wopaque->hasho_bucket==bucket);
@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
688718
/* yes, so release wbuf lock first */
689719
_hash_wrtbuf(rel,wbuf);
690720
/* free this overflow page (releases rbuf) */
691-
_hash_freeovflpage(rel,rbuf);
721+
_hash_freeovflpage(rel,rbuf,bstrategy);
692722
/* done */
693723
return;
694724
}
695725

696726
/* free this overflow page, then get the previous one */
697-
_hash_freeovflpage(rel,rbuf);
727+
_hash_freeovflpage(rel,rbuf,bstrategy);
698728

699-
rbuf=_hash_getbuf(rel,rblkno,HASH_WRITE,LH_OVERFLOW_PAGE);
729+
rbuf=_hash_getbuf_with_strategy(rel,
730+
rblkno,
731+
HASH_WRITE,
732+
LH_OVERFLOW_PAGE,
733+
bstrategy);
700734
rpage=BufferGetPage(rbuf);
701735
ropaque= (HashPageOpaque)PageGetSpecialPointer(rpage);
702736
Assert(ropaque->hasho_bucket==bucket);

‎src/backend/access/hash/hashpage.c

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
1212
*
1313
* NOTES
1414
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
214214
returnbuf;
215215
}
216216

217+
/*
218+
*_hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
219+
*
220+
*This is identical to _hash_getbuf() but also allows a buffer access
221+
*strategy to be specified. We use this for VACUUM operations.
222+
*/
223+
Buffer
224+
_hash_getbuf_with_strategy(Relationrel,BlockNumberblkno,
225+
intaccess,intflags,
226+
BufferAccessStrategybstrategy)
227+
{
228+
Bufferbuf;
229+
230+
if (blkno==P_NEW)
231+
elog(ERROR,"hash AM does not use P_NEW");
232+
233+
buf=ReadBufferWithStrategy(rel,blkno,bstrategy);
234+
235+
if (access!=HASH_NOLOCK)
236+
LockBuffer(buf,access);
237+
238+
/* ref count and lock type are correct */
239+
240+
_hash_checkpage(rel,buf,flags);
241+
242+
returnbuf;
243+
}
244+
217245
/*
218246
*_hash_relbuf() -- release a locked buffer.
219247
*
@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
840868
_hash_wrtbuf(rel,obuf);
841869
_hash_wrtbuf(rel,nbuf);
842870

843-
_hash_squeezebucket(rel,obucket,start_oblkno);
871+
_hash_squeezebucket(rel,obucket,start_oblkno,NULL);
844872
}

‎src/backend/access/heap/heapam.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key)
8383
*/
8484
scan->rs_nblocks=RelationGetNumberOfBlocks(scan->rs_rd);
8585

86+
/*
87+
* If the table is large relative to NBuffers, use a bulk-read access
88+
* strategy, else use the default random-access strategy. During a
89+
* rescan, don't make a new strategy object if we don't have to.
90+
*/
91+
if (scan->rs_nblocks>NBuffers /4&&
92+
!scan->rs_rd->rd_istemp)
93+
{
94+
if (scan->rs_strategy==NULL)
95+
scan->rs_strategy=GetAccessStrategy(BAS_BULKREAD);
96+
}
97+
else
98+
{
99+
if (scan->rs_strategy!=NULL)
100+
FreeAccessStrategy(scan->rs_strategy);
101+
scan->rs_strategy=NULL;
102+
}
103+
86104
scan->rs_inited= false;
87105
scan->rs_ctup.t_data=NULL;
88106
ItemPointerSetInvalid(&scan->rs_ctup.t_self);
@@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
123141

124142
Assert(page<scan->rs_nblocks);
125143

126-
scan->rs_cbuf=ReleaseAndReadBuffer(scan->rs_cbuf,
127-
scan->rs_rd,
128-
page);
144+
/* release previous scan buffer, if any */
145+
if (BufferIsValid(scan->rs_cbuf))
146+
{
147+
ReleaseBuffer(scan->rs_cbuf);
148+
scan->rs_cbuf=InvalidBuffer;
149+
}
150+
151+
/* read page using selected strategy */
152+
scan->rs_cbuf=ReadBufferWithStrategy(scan->rs_rd,
153+
page,
154+
scan->rs_strategy);
129155
scan->rs_cblock=page;
130156

131157
if (!scan->rs_pageatatime)
@@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
938964
scan->rs_rd=relation;
939965
scan->rs_snapshot=snapshot;
940966
scan->rs_nkeys=nkeys;
967+
scan->rs_strategy=NULL;/* set in initscan */
941968

942969
/*
943970
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
@@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan)
10071034
if (scan->rs_key)
10081035
pfree(scan->rs_key);
10091036

1037+
if (scan->rs_strategy!=NULL)
1038+
FreeAccessStrategy(scan->rs_strategy);
1039+
10101040
pfree(scan);
10111041
}
10121042

‎src/backend/access/nbtree/nbtree.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
* Portions Copyright (c) 1994, Regents of the University of California
1313
*
1414
* IDENTIFICATION
15-
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.154 2007/01/05 22:19:23 momjian Exp $
15+
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.155 2007/05/30 20:11:53 tgl Exp $
1616
*
1717
*-------------------------------------------------------------------------
1818
*/
@@ -786,9 +786,10 @@ btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
786786
/*
787787
* We can't use _bt_getbuf() here because it always applies
788788
* _bt_checkpage(), which will barf on an all-zero page. We want to
789-
* recycle all-zero pages, not fail.
789+
* recycle all-zero pages, not fail. Also, we want to use a nondefault
790+
* buffer access strategy.
790791
*/
791-
buf=ReadBuffer(rel,blkno);
792+
buf=ReadBufferWithStrategy(rel,blkno,info->strategy);
792793
LockBuffer(buf,BT_READ);
793794
page=BufferGetPage(buf);
794795
opaque= (BTPageOpaque)PageGetSpecialPointer(page);

‎src/backend/access/transam/xlog.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.270 2007/05/30 20:11:55 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record)
17991799
LogwrtResult.Flush.xlogid,LogwrtResult.Flush.xrecoff);
18001800
}
18011801

1802+
/*
1803+
* Test whether XLOG data has been flushed up to (at least) the given position.
1804+
*
1805+
* Returns true if a flush is still needed. (It may be that someone else
1806+
* is already in process of flushing that far, however.)
1807+
*/
1808+
bool
1809+
XLogNeedsFlush(XLogRecPtrrecord)
1810+
{
1811+
/* Quick exit if already known flushed */
1812+
if (XLByteLE(record,LogwrtResult.Flush))
1813+
return false;
1814+
1815+
/* read LogwrtResult and update local state */
1816+
{
1817+
/* use volatile pointer to prevent code rearrangement */
1818+
volatileXLogCtlData*xlogctl=XLogCtl;
1819+
1820+
SpinLockAcquire(&xlogctl->info_lck);
1821+
LogwrtResult=xlogctl->LogwrtResult;
1822+
SpinLockRelease(&xlogctl->info_lck);
1823+
}
1824+
1825+
/* check again */
1826+
if (XLByteLE(record,LogwrtResult.Flush))
1827+
return false;
1828+
1829+
return true;
1830+
}
1831+
18021832
/*
18031833
* Create a new XLOG file segment, or open a pre-existing one.
18041834
*

‎src/backend/catalog/index.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.283 2007/05/16 17:28:20 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.284 2007/05/30 20:11:55 tgl Exp $
1212
*
1313
*
1414
* INTERFACE ROUTINES
@@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
16581658
ivinfo.vacuum_full= false;
16591659
ivinfo.message_level=DEBUG2;
16601660
ivinfo.num_heap_tuples=-1;
1661+
ivinfo.strategy=NULL;
16611662

16621663
state.tuplesort=tuplesort_begin_datum(TIDOID,
16631664
TIDLessOperator, false,

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp