Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit37a8565

Browse files
committed
Fix BRIN summarization concurrent with extension
If a process is extending a table concurrently with some BRINsummarization process, it is possible for the latter to miss pages addedby the former because the number of pages is computed ahead of time.Fix by determining a fresh relation size after inserting the placeholdertuple: any process that further extends the table concurrently willupdate the placeholder tuple, while previous pages will be processed bythe heap scan.Reported-by: Tomas VondraReviewed-by: Tom LaneAuthor: Álvaro HerreraDiscussion:https://postgr.es/m/083d996a-4a8a-0e13-800a-851dd09ad8cc@2ndquadrant.comBackpatch-to: 9.5
1 parent0f6bd53 commit37a8565

File tree

1 file changed

+61
-29
lines changed

1 file changed

+61
-29
lines changed

‎src/backend/access/brin/brin.c

Lines changed: 61 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ static BrinBuildState *initialize_brin_buildstate(Relation idxRel,
6767
BrinRevmap*revmap,BlockNumberpagesPerRange);
6868
staticvoidterminate_brin_buildstate(BrinBuildState*state);
6969
staticvoidbrinsummarize(Relationindex,RelationheapRel,BlockNumberpageRange,
70-
double*numSummarized,double*numExisting);
70+
boolinclude_partial,double*numSummarized,double*numExisting);
7171
staticvoidform_and_insert_tuple(BrinBuildState*state);
7272
staticvoidunion_tuples(BrinDesc*bdesc,BrinMemTuple*a,
7373
BrinTuple*b);
@@ -789,7 +789,7 @@ brinvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
789789

790790
brin_vacuum_scan(info->index,info->strategy);
791791

792-
brinsummarize(info->index,heapRel,BRIN_ALL_BLOCKRANGES,
792+
brinsummarize(info->index,heapRel,BRIN_ALL_BLOCKRANGES, false,
793793
&stats->num_index_tuples,&stats->num_index_tuples);
794794

795795
heap_close(heapRel,AccessShareLock);
@@ -907,7 +907,7 @@ brin_summarize_range(PG_FUNCTION_ARGS)
907907
RelationGetRelationName(indexRel))));
908908

909909
/* OK, do it */
910-
brinsummarize(indexRel,heapRel,heapBlk,&numSummarized,NULL);
910+
brinsummarize(indexRel,heapRel,heapBlk,true,&numSummarized,NULL);
911911

912912
relation_close(indexRel,ShareUpdateExclusiveLock);
913913
relation_close(heapRel,ShareUpdateExclusiveLock);
@@ -1127,7 +1127,8 @@ terminate_brin_buildstate(BrinBuildState *state)
11271127
}
11281128

11291129
/*
1130-
* Summarize the given page range of the given index.
1130+
* On the given BRIN index, summarize the heap page range that corresponds
1131+
* to the heap block number given.
11311132
*
11321133
* This routine can run in parallel with insertions into the heap. To avoid
11331134
* missing those values from the summary tuple, we first insert a placeholder
@@ -1137,6 +1138,12 @@ terminate_brin_buildstate(BrinBuildState *state)
11371138
* update of the index value happens in a loop, so that if somebody updates
11381139
* the placeholder tuple after we read it, we detect the case and try again.
11391140
* This ensures that the concurrently inserted tuples are not lost.
1141+
*
1142+
* A further corner case is this routine being asked to summarize the partial
1143+
* range at the end of the table. heapNumBlocks is the (possibly outdated)
1144+
* table size; if we notice that the requested range lies beyond that size,
1145+
* we re-compute the table size after inserting the placeholder tuple, to
1146+
* avoid missing pages that were appended recently.
11401147
*/
11411148
staticvoid
11421149
summarize_range(IndexInfo*indexInfo,BrinBuildState*state,RelationheapRel,
@@ -1157,6 +1164,33 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11571164
state->bs_rmAccess,&phbuf,
11581165
heapBlk,phtup,phsz);
11591166

1167+
/*
1168+
* Compute range end. We hold ShareUpdateExclusive lock on table, so it
1169+
* cannot shrink concurrently (but it can grow).
1170+
*/
1171+
Assert(heapBlk %state->bs_pagesPerRange==0);
1172+
if (heapBlk+state->bs_pagesPerRange>heapNumBlks)
1173+
{
1174+
/*
1175+
* If we're asked to scan what we believe to be the final range on the
1176+
* table (i.e. a range that might be partial) we need to recompute our
1177+
* idea of what the latest page is after inserting the placeholder
1178+
* tuple. Anyone that grows the table later will update the
1179+
* placeholder tuple, so it doesn't matter that we won't scan these
1180+
* pages ourselves. Careful: the table might have been extended
1181+
* beyond the current range, so clamp our result.
1182+
*
1183+
* Fortunately, this should occur infrequently.
1184+
*/
1185+
scanNumBlks=Min(RelationGetNumberOfBlocks(heapRel)-heapBlk,
1186+
state->bs_pagesPerRange);
1187+
}
1188+
else
1189+
{
1190+
/* Easy case: range is known to be complete */
1191+
scanNumBlks=state->bs_pagesPerRange;
1192+
}
1193+
11601194
/*
11611195
* Execute the partial heap scan covering the heap blocks in the specified
11621196
* page range, summarizing the heap tuples in it. This scan stops just
@@ -1167,8 +1201,6 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
11671201
* by transactions that are still in progress, among other corner cases.
11681202
*/
11691203
state->bs_currRangeStart=heapBlk;
1170-
scanNumBlks=heapBlk+state->bs_pagesPerRange <=heapNumBlks ?
1171-
state->bs_pagesPerRange :heapNumBlks-heapBlk;
11721204
IndexBuildHeapRangeScan(heapRel,state->bs_irel,indexInfo, false, true,
11731205
heapBlk,scanNumBlks,
11741206
brinbuildCallback, (void*)state);
@@ -1232,63 +1264,63 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
12321264
* Summarize page ranges that are not already summarized. If pageRange is
12331265
* BRIN_ALL_BLOCKRANGES then the whole table is scanned; otherwise, only the
12341266
* page range containing the given heap page number is scanned.
1267+
* If include_partial is true, then the partial range at the end of the table
1268+
* is summarized, otherwise not.
12351269
*
12361270
* For each new index tuple inserted, *numSummarized (if not NULL) is
12371271
* incremented; for each existing tuple, *numExisting (if not NULL) is
12381272
* incremented.
12391273
*/
12401274
staticvoid
12411275
brinsummarize(Relationindex,RelationheapRel,BlockNumberpageRange,
1242-
double*numSummarized,double*numExisting)
1276+
boolinclude_partial,double*numSummarized,double*numExisting)
12431277
{
12441278
BrinRevmap*revmap;
12451279
BrinBuildState*state=NULL;
12461280
IndexInfo*indexInfo=NULL;
12471281
BlockNumberheapNumBlocks;
1248-
BlockNumberheapBlk;
12491282
BlockNumberpagesPerRange;
12501283
Bufferbuf;
12511284
BlockNumberstartBlk;
1252-
BlockNumberendBlk;
1253-
1254-
/* determine range of pages to process; nothing to do for an empty table */
1255-
heapNumBlocks=RelationGetNumberOfBlocks(heapRel);
1256-
if (heapNumBlocks==0)
1257-
return;
12581285

12591286
revmap=brinRevmapInitialize(index,&pagesPerRange,NULL);
12601287

1288+
/* determine range of pages to process */
1289+
heapNumBlocks=RelationGetNumberOfBlocks(heapRel);
12611290
if (pageRange==BRIN_ALL_BLOCKRANGES)
1262-
{
12631291
startBlk=0;
1264-
endBlk=heapNumBlocks;
1265-
}
12661292
else
1267-
{
12681293
startBlk= (pageRange /pagesPerRange)*pagesPerRange;
1294+
if (startBlk >=heapNumBlocks)
1295+
{
12691296
/* Nothing to do if start point is beyond end of table */
1270-
if (startBlk>heapNumBlocks)
1271-
{
1272-
brinRevmapTerminate(revmap);
1273-
return;
1274-
}
1275-
endBlk=startBlk+pagesPerRange;
1276-
if (endBlk>heapNumBlocks)
1277-
endBlk=heapNumBlocks;
1297+
brinRevmapTerminate(revmap);
1298+
return;
12781299
}
12791300

12801301
/*
12811302
* Scan the revmap to find unsummarized items.
12821303
*/
12831304
buf=InvalidBuffer;
1284-
for (heapBlk=startBlk;heapBlk<endBlk;heapBlk+=pagesPerRange)
1305+
for (;startBlk<heapNumBlocks;startBlk+=pagesPerRange)
12851306
{
12861307
BrinTuple*tup;
12871308
OffsetNumberoff;
12881309

1310+
/*
1311+
* Unless requested to summarize even a partial range, go away now if
1312+
* we think the next range is partial. Caller would pass true when
1313+
* it is typically run once bulk data loading is done
1314+
* (brin_summarize_new_values), and false when it is typically the
1315+
* result of arbitrarily-scheduled maintenance command (vacuuming).
1316+
*/
1317+
if (!include_partial&&
1318+
(startBlk+pagesPerRange>heapNumBlocks))
1319+
break;
1320+
12891321
CHECK_FOR_INTERRUPTS();
12901322

1291-
tup=brinGetTupleForHeapBlock(revmap,heapBlk,&buf,&off,NULL,
1323+
tup=brinGetTupleForHeapBlock(revmap,startBlk,&buf,&off,NULL,
12921324
BUFFER_LOCK_SHARE,NULL);
12931325
if (tup==NULL)
12941326
{
@@ -1301,7 +1333,7 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange,
13011333
pagesPerRange);
13021334
indexInfo=BuildIndexInfo(index);
13031335
}
1304-
summarize_range(indexInfo,state,heapRel,heapBlk,heapNumBlocks);
1336+
summarize_range(indexInfo,state,heapRel,startBlk,heapNumBlocks);
13051337

13061338
/* and re-initialize state for the next range */
13071339
brin_memtuple_initialize(state->bs_dtuple,state->bs_bdesc);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp