Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc5c239e

Browse files
Use streaming read I/O in btree vacuuming
Btree vacuum processes all index pages in physical order. Now it usesthe read stream API to get the next buffer instead of explicitlyinvoking ReadBuffer().It is possible for concurrent insertions to cause page splits duringindex vacuuming. This can lead to index entries that have yet to bevacuumed being moved to pages that have already been vacuumed. Btreevacuum code handles this by backtracking to reprocess those pages. So,while sequentially encountered pages are now read through theread stream API, backtracked pages are still read with explicitReadBuffer() calls.Author: Andrey Borodin <x4mmm@yandex-team.ru>Reviewed-by: Melanie Plageman <melanieplageman@gmail.com>Reviewed-by: Junwang Zhao <zhjwpku@gmail.com>Reviewed-by: Kirill Reshke <reshkekirill@gmail.com>Discussion:https://postgr.es/m/flat/CAAKRu_bW1UOyup%3DjdFw%2BkOF9bCaAm%3D9UpiyZtbPMn8n_vnP%2Big%40mail.gmail.com#3b3a84132fc683b3ee5b40bc4c2ea2a5
1 parent1d617a2 commitc5c239e

File tree

1 file changed

+66
-25
lines changed

1 file changed

+66
-25
lines changed

‎src/backend/access/nbtree/nbtree.c

Lines changed: 66 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ typedef struct BTParallelScanDescData *BTParallelScanDesc;
8686
staticvoidbtvacuumscan(IndexVacuumInfo*info,IndexBulkDeleteResult*stats,
8787
IndexBulkDeleteCallbackcallback,void*callback_state,
8888
BTCycleIdcycleid);
89-
staticvoidbtvacuumpage(BTVacState*vstate,BlockNumberscanblkno);
89+
staticBlockNumberbtvacuumpage(BTVacState*vstate,Bufferbuf);
9090
staticBTVacuumPostingbtreevacuumposting(BTVacState*vstate,
9191
IndexTupleposting,
9292
OffsetNumberupdatedoffset,
@@ -991,8 +991,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
991991
Relationrel=info->index;
992992
BTVacStatevstate;
993993
BlockNumbernum_pages;
994-
BlockNumberscanblkno;
995994
boolneedLock;
995+
BlockRangeReadStreamPrivatep;
996+
ReadStream*stream=NULL;
996997

997998
/*
998999
* Reset fields that track information about the entire index now. This
@@ -1061,9 +1062,18 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
10611062
*/
10621063
needLock= !RELATION_IS_LOCAL(rel);
10631064

1064-
scanblkno=BTREE_METAPAGE+1;
1065+
p.current_blocknum=BTREE_METAPAGE+1;
1066+
stream=read_stream_begin_relation(READ_STREAM_FULL,
1067+
info->strategy,
1068+
rel,
1069+
MAIN_FORKNUM,
1070+
block_range_read_stream_cb,
1071+
&p,
1072+
0);
10651073
for (;;)
10661074
{
1075+
Bufferbuf;
1076+
10671077
/* Get the current relation length */
10681078
if (needLock)
10691079
LockRelationForExtension(rel,ExclusiveLock);
@@ -1076,18 +1086,44 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
10761086
num_pages);
10771087

10781088
/* Quit if we've scanned the whole relation */
1079-
if (scanblkno >=num_pages)
1089+
if (p.current_blocknum >=num_pages)
10801090
break;
1081-
/* Iterate over pages, then loop back to recheck length */
1082-
for (;scanblkno<num_pages;scanblkno++)
1091+
1092+
1093+
p.last_exclusive=num_pages;
1094+
1095+
/* Iterate over pages, then loop back to recheck relation length */
1096+
while (true)
10831097
{
1084-
btvacuumpage(&vstate,scanblkno);
1098+
BlockNumbercurrent_block;
1099+
1100+
/* call vacuum_delay_point while not holding any buffer lock */
1101+
vacuum_delay_point(false);
1102+
1103+
buf=read_stream_next_buffer(stream,NULL);
1104+
1105+
if (!BufferIsValid(buf))
1106+
break;
1107+
1108+
current_block=btvacuumpage(&vstate,buf);
1109+
10851110
if (info->report_progress)
10861111
pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1087-
scanblkno);
1112+
current_block);
10881113
}
1114+
1115+
Assert(read_stream_next_buffer(stream,NULL)==InvalidBuffer);
1116+
1117+
/*
1118+
* We have to reset the read stream to use it again. After returning
1119+
* InvalidBuffer, the read stream API won't invoke our callback again
1120+
* until the stream has been reset.
1121+
*/
1122+
read_stream_reset(stream);
10891123
}
10901124

1125+
read_stream_end(stream);
1126+
10911127
/* Set statistics num_pages field to final size of index */
10921128
stats->num_pages=num_pages;
10931129

@@ -1111,14 +1147,16 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
11111147
* btvacuumpage --- VACUUM one page
11121148
*
11131149
* This processes a single page for btvacuumscan(). In some cases we must
1114-
* backtrack to re-examine and VACUUM pages that werethe scanblkno during
1150+
* backtrack to re-examine and VACUUM pages that wereon buf's page during
11151151
* a previous call here. This is how we handle page splits (that happened
11161152
* after our cycleid was acquired) whose right half page happened to reuse
11171153
* a block that we might have processed at some point before it was
11181154
* recycled (i.e. before the page split).
1155+
*
1156+
* Returns BlockNumber of a scanned page (not backtracked).
11191157
*/
1120-
staticvoid
1121-
btvacuumpage(BTVacState*vstate,BlockNumberscanblkno)
1158+
staticBlockNumber
1159+
btvacuumpage(BTVacState*vstate,Bufferbuf)
11221160
{
11231161
IndexVacuumInfo*info=vstate->info;
11241162
IndexBulkDeleteResult*stats=vstate->stats;
@@ -1129,7 +1167,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11291167
boolattempt_pagedel;
11301168
BlockNumberblkno,
11311169
backtrack_to;
1132-
Bufferbuf;
1170+
BlockNumberscanblkno=BufferGetBlockNumber(buf);
11331171
Pagepage;
11341172
BTPageOpaqueopaque;
11351173

@@ -1140,17 +1178,6 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11401178
attempt_pagedel= false;
11411179
backtrack_to=P_NONE;
11421180

1143-
/* call vacuum_delay_point while not holding any buffer lock */
1144-
vacuum_delay_point(false);
1145-
1146-
/*
1147-
* We can't use _bt_getbuf() here because it always applies
1148-
* _bt_checkpage(), which will barf on an all-zero page. We want to
1149-
* recycle all-zero pages, not fail. Also, we want to use a nondefault
1150-
* buffer access strategy.
1151-
*/
1152-
buf=ReadBufferExtended(rel,MAIN_FORKNUM,blkno,RBM_NORMAL,
1153-
info->strategy);
11541181
_bt_lockbuf(rel,buf,BT_READ);
11551182
page=BufferGetPage(buf);
11561183
opaque=NULL;
@@ -1186,7 +1213,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11861213
errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
11871214
blkno,scanblkno,RelationGetRelationName(rel))));
11881215
_bt_relbuf(rel,buf);
1189-
return;
1216+
returnscanblkno;
11901217
}
11911218

11921219
/*
@@ -1206,7 +1233,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
12061233
{
12071234
/* Done with current scanblkno (and all lower split pages) */
12081235
_bt_relbuf(rel,buf);
1209-
return;
1236+
returnscanblkno;
12101237
}
12111238
}
12121239

@@ -1437,8 +1464,22 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
14371464
if (backtrack_to!=P_NONE)
14381465
{
14391466
blkno=backtrack_to;
1467+
1468+
/* check for vacuum delay while not holding any buffer lock */
1469+
vacuum_delay_point(false);
1470+
1471+
/*
1472+
* We can't use _bt_getbuf() here because it always applies
1473+
* _bt_checkpage(), which will barf on an all-zero page. We want to
1474+
* recycle all-zero pages, not fail. Also, we want to use a
1475+
* nondefault buffer access strategy.
1476+
*/
1477+
buf=ReadBufferExtended(rel,MAIN_FORKNUM,blkno,RBM_NORMAL,
1478+
info->strategy);
14401479
gotobacktrack;
14411480
}
1481+
1482+
returnscanblkno;
14421483
}
14431484

14441485
/*

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp