Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbfe56cd

Browse files
Delay extraction of TIDBitmap per page offsets
Pages from the bitmap created by the TIDBitmap API can be exact orlossy. The TIDBitmap API extracts the tuple offsets from exact pagesinto an array for the convenience of the caller.This was done in tbm_private|shared_iterate() right after advancing theiterator. However, as long as tbm_private|shared_iterate() set areference to the PagetableEntry in the TBMIterateResult, the offsetextraction can be done later.Waiting to extract the tuple offsets has a few benefits. For the sharediterator case, it allows us to extract the offsets after dropping theshared iterator state lock, reducing time spent holding a contendedlock.Separating the iteration step and extracting the offsets later alsoallows us to avoid extracting the offsets for prefetched blocks. Thoseoffsets were never used, so the overhead of extracting and storing themwas wasted.The real motivation for this change, however, is that future commitswill make bitmap heap scan use the read stream API. This requires aTBMIterateResult per issued block. By removing the array of tupleoffsets from the TBMIterateResult and only extracting the offsets whenthey are used, we reduce the memory required for per buffer datasubstantially.Suggested-by: Thomas Munro <thomas.munro@gmail.com>Reviewed-by: Thomas Munro <thomas.munro@gmail.com>Discussion:https://postgr.es/m/CA%2BhUKGLHbKP3jwJ6_%2BhnGi37Pw3BD5j2amjV3oSk7j-KyCnY7Q%40mail.gmail.com
1 parentb8778c4 commitbfe56cd

File tree

6 files changed

+87
-50
lines changed

6 files changed

+87
-50
lines changed

‎src/backend/access/gin/ginget.c

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ startScanEntry(GinState *ginstate, GinScanEntry entry, Snapshot snapshot)
333333
entry->nlist=0;
334334
entry->matchBitmap=NULL;
335335
entry->matchResult=NULL;
336+
entry->matchNtuples=-1;
336337
entry->reduceResult= false;
337338
entry->predictNumberResult=0;
338339

@@ -828,7 +829,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
828829
*/
829830
while (entry->matchResult==NULL||
830831
(!entry->matchResult->lossy&&
831-
entry->offset >=entry->matchResult->ntuples)||
832+
entry->offset >=entry->matchNtuples)||
832833
entry->matchResult->blockno<advancePastBlk||
833834
(ItemPointerIsLossyPage(&advancePast)&&
834835
entry->matchResult->blockno==advancePastBlk))
@@ -845,9 +846,15 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
845846
break;
846847
}
847848

849+
/* Exact pages need their tuple offsets extracted. */
850+
if (!entry->matchResult->lossy)
851+
entry->matchNtuples=tbm_extract_page_tuple(entry->matchResult,
852+
entry->matchOffsets,
853+
TBM_MAX_TUPLES_PER_PAGE);
854+
848855
/*
849856
* Reset counter to the beginning of entry->matchResult. Note:
850-
* entry->offset is still greater thanmatchResult->ntuples if
857+
* entry->offset is still greater thanentry->matchNtuples if
851858
* matchResult is lossy. So, on next call we will get next
852859
* result from TIDBitmap.
853860
*/
@@ -874,32 +881,35 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
874881
}
875882

876883
/*
877-
* Not a lossy page.Skip over anyoffsets<= advancePast, and
878-
*return that.
884+
* Not a lossy page.If tupleoffsetswere extracted,
885+
*entry->matchNtuples must be > -1
879886
*/
887+
Assert(entry->matchNtuples>-1);
888+
889+
/* Skip over any offsets <= advancePast, and return that. */
880890
if (entry->matchResult->blockno==advancePastBlk)
881891
{
882-
Assert(entry->matchResult->ntuples>0);
892+
Assert(entry->matchNtuples>0);
883893

884894
/*
885895
* First, do a quick check against the last offset on the
886896
* page. If that's > advancePast, so are all the other
887897
* offsets, so just go back to the top to get the next page.
888898
*/
889-
if (entry->matchResult->offsets[entry->matchResult->ntuples-1] <=advancePastOff)
899+
if (entry->matchOffsets[entry->matchNtuples-1] <=advancePastOff)
890900
{
891-
entry->offset=entry->matchResult->ntuples;
901+
entry->offset=entry->matchNtuples;
892902
continue;
893903
}
894904

895905
/* Otherwise scan to find the first item > advancePast */
896-
while (entry->matchResult->offsets[entry->offset] <=advancePastOff)
906+
while (entry->matchOffsets[entry->offset] <=advancePastOff)
897907
entry->offset++;
898908
}
899909

900910
ItemPointerSet(&entry->curItem,
901911
entry->matchResult->blockno,
902-
entry->matchResult->offsets[entry->offset]);
912+
entry->matchOffsets[entry->offset]);
903913
entry->offset++;
904914

905915
/* Done unless we need to reduce the result */

‎src/backend/access/gin/ginscan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
107107
scanEntry->matchBitmap=NULL;
108108
scanEntry->matchIterator=NULL;
109109
scanEntry->matchResult=NULL;
110+
scanEntry->matchNtuples=-1;
110111
scanEntry->list=NULL;
111112
scanEntry->nlist=0;
112113
scanEntry->offset=InvalidOffsetNumber;

‎src/backend/access/heap/heapam_handler.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2127,6 +2127,8 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
21272127
Snapshotsnapshot;
21282128
intntup;
21292129
TBMIterateResult*tbmres;
2130+
OffsetNumberoffsets[TBM_MAX_TUPLES_PER_PAGE];
2131+
intnoffsets=-1;
21302132

21312133
Assert(scan->rs_flags&SO_TYPE_BITMAPSCAN);
21322134

@@ -2145,6 +2147,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
21452147
if (tbmres==NULL)
21462148
return false;
21472149

2150+
/* Exact pages need their tuple offsets extracted. */
2151+
if (!tbmres->lossy)
2152+
noffsets=tbm_extract_page_tuple(tbmres,offsets,
2153+
TBM_MAX_TUPLES_PER_PAGE);
2154+
21482155
/*
21492156
* Ignore any claimed entries past what we think is the end of the
21502157
* relation. It may have been extended after the start of our scan (we
@@ -2172,8 +2179,9 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
21722179
/* can't be lossy in the skip_fetch case */
21732180
Assert(!tbmres->lossy);
21742181
Assert(bscan->rs_empty_tuples_pending >=0);
2182+
Assert(noffsets>-1);
21752183

2176-
bscan->rs_empty_tuples_pending+=tbmres->ntuples;
2184+
bscan->rs_empty_tuples_pending+=noffsets;
21772185

21782186
return true;
21792187
}
@@ -2216,9 +2224,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
22162224
*/
22172225
intcurslot;
22182226

2219-
for (curslot=0;curslot<tbmres->ntuples;curslot++)
2227+
/* We must have extracted the tuple offsets by now */
2228+
Assert(noffsets>-1);
2229+
2230+
for (curslot=0;curslot<noffsets;curslot++)
22202231
{
2221-
OffsetNumberoffnum=tbmres->offsets[curslot];
2232+
OffsetNumberoffnum=offsets[curslot];
22222233
ItemPointerDatatid;
22232234
HeapTupleDataheapTuple;
22242235

‎src/backend/nodes/tidbitmap.c

Lines changed: 24 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,13 @@
4040

4141
#include<limits.h>
4242

43-
#include"access/htup_details.h"
4443
#include"common/hashfn.h"
4544
#include"common/int.h"
4645
#include"nodes/bitmapset.h"
4746
#include"nodes/tidbitmap.h"
4847
#include"storage/lwlock.h"
4948
#include"utils/dsa.h"
5049

51-
/*
52-
* The maximum number of tuples per page is not large (typically 256 with
53-
* 8K pages, or 1024 with 32K pages). So there's not much point in making
54-
* the per-page bitmaps variable size. We just legislate that the size
55-
* is this:
56-
*/
57-
#defineMAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage
58-
5950
/*
6051
* When we have to switch over to lossy storage, we use a data structure
6152
* with one bit per page, where all pages having the same number DIV
@@ -67,7 +58,7 @@
6758
* table, using identical data structures. (This is because the memory
6859
* management for hashtables doesn't easily/efficiently allow space to be
6960
* transferred easily from one hashtable to another.) Therefore it's best
70-
* if PAGES_PER_CHUNK is the same asMAX_TUPLES_PER_PAGE, or at least not
61+
* if PAGES_PER_CHUNK is the same asTBM_MAX_TUPLES_PER_PAGE, or at least not
7162
* too different. But we also want PAGES_PER_CHUNK to be a power of 2 to
7263
* avoid expensive integer remainder operations. So, define it like this:
7364
*/
@@ -79,7 +70,7 @@
7970
#defineBITNUM(x)((x) % BITS_PER_BITMAPWORD)
8071

8172
/* number of active words for an exact page: */
82-
#defineWORDS_PER_PAGE((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
73+
#defineWORDS_PER_PAGE((TBM_MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
8374
/* number of active words for a lossy chunk: */
8475
#defineWORDS_PER_CHUNK ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)
8576

@@ -181,7 +172,7 @@ struct TBMPrivateIterator
181172
intspageptr;/* next spages index */
182173
intschunkptr;/* next schunks index */
183174
intschunkbit;/* next bit to check in current schunk */
184-
TBMIterateResultoutput;/* MUST BE LAST (because variable-size) */
175+
TBMIterateResultoutput;
185176
};
186177

187178
/*
@@ -222,7 +213,7 @@ struct TBMSharedIterator
222213
PTEntryArray*ptbase;/* pagetable element array */
223214
PTIterationArray*ptpages;/* sorted exact page index list */
224215
PTIterationArray*ptchunks;/* sorted lossy page index list */
225-
TBMIterateResultoutput;/* MUST BE LAST (because variable-size) */
216+
TBMIterateResultoutput;
226217
};
227218

228219
/* Local function prototypes */
@@ -390,7 +381,7 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
390381
bitnum;
391382

392383
/* safety check to ensure we don't overrun bit array bounds */
393-
if (off<1||off>MAX_TUPLES_PER_PAGE)
384+
if (off<1||off>TBM_MAX_TUPLES_PER_PAGE)
394385
elog(ERROR,"tuple offset out of range: %u",off);
395386

396387
/*
@@ -696,9 +687,7 @@ tbm_begin_private_iterate(TIDBitmap *tbm)
696687
* Create the TBMPrivateIterator struct, with enough trailing space to
697688
* serve the needs of the TBMIterateResult sub-struct.
698689
*/
699-
iterator= (TBMPrivateIterator*)palloc(sizeof(TBMPrivateIterator)+
700-
MAX_TUPLES_PER_PAGE*
701-
sizeof(OffsetNumber));
690+
iterator= (TBMPrivateIterator*)palloc(sizeof(TBMPrivateIterator));
702691
iterator->tbm=tbm;
703692

704693
/*
@@ -906,11 +895,16 @@ tbm_prepare_shared_iterate(TIDBitmap *tbm)
906895
/*
907896
* tbm_extract_page_tuple - extract the tuple offsets from a page
908897
*
909-
* The extracted offsets are stored into TBMIterateResult.
898+
* Returns the number of offsets it filled in if <= max_offsets. Otherwise,
899+
* fills in as many offsets as fit and returns the total number of offsets in
900+
* the page.
910901
*/
911-
staticinlineint
912-
tbm_extract_page_tuple(PagetableEntry*page,TBMIterateResult*output)
902+
int
903+
tbm_extract_page_tuple(TBMIterateResult*iteritem,
904+
OffsetNumber*offsets,
905+
uint32max_offsets)
913906
{
907+
PagetableEntry*page=iteritem->internal_page;
914908
intwordnum;
915909
intntuples=0;
916910

@@ -925,7 +919,11 @@ tbm_extract_page_tuple(PagetableEntry *page, TBMIterateResult *output)
925919
while (w!=0)
926920
{
927921
if (w&1)
928-
output->offsets[ntuples++]= (OffsetNumber)off;
922+
{
923+
if (ntuples<max_offsets)
924+
offsets[ntuples]= (OffsetNumber)off;
925+
ntuples++;
926+
}
929927
off++;
930928
w >>=1;
931929
}
@@ -1012,9 +1010,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
10121010
{
10131011
/* Return a lossy page indicator from the chunk */
10141012
output->blockno=chunk_blockno;
1015-
output->ntuples=-1;
10161013
output->lossy= true;
10171014
output->recheck= true;
1015+
output->internal_page=NULL;
10181016
iterator->schunkbit++;
10191017
returnoutput;
10201018
}
@@ -1023,18 +1021,15 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
10231021
if (iterator->spageptr<tbm->npages)
10241022
{
10251023
PagetableEntry*page;
1026-
intntuples;
10271024

10281025
/* In TBM_ONE_PAGE state, we don't allocate an spages[] array */
10291026
if (tbm->status==TBM_ONE_PAGE)
10301027
page=&tbm->entry1;
10311028
else
10321029
page=tbm->spages[iterator->spageptr];
10331030

1034-
/* scan bitmap to extract individual offset numbers */
1035-
ntuples=tbm_extract_page_tuple(page,output);
1031+
output->internal_page=page;
10361032
output->blockno=page->blockno;
1037-
output->ntuples=ntuples;
10381033
output->lossy= false;
10391034
output->recheck=page->recheck;
10401035
iterator->spageptr++;
@@ -1107,9 +1102,9 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
11071102
{
11081103
/* Return a lossy page indicator from the chunk */
11091104
output->blockno=chunk_blockno;
1110-
output->ntuples=-1;
11111105
output->lossy= true;
11121106
output->recheck= true;
1107+
output->internal_page=NULL;
11131108
istate->schunkbit++;
11141109

11151110
LWLockRelease(&istate->lock);
@@ -1120,12 +1115,9 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
11201115
if (istate->spageptr<istate->npages)
11211116
{
11221117
PagetableEntry*page=&ptbase[idxpages[istate->spageptr]];
1123-
intntuples;
11241118

1125-
/* scan bitmap to extract individual offset numbers */
1126-
ntuples=tbm_extract_page_tuple(page,output);
1119+
output->internal_page=page;
11271120
output->blockno=page->blockno;
1128-
output->ntuples=ntuples;
11291121
output->lossy= false;
11301122
output->recheck=page->recheck;
11311123
istate->spageptr++;
@@ -1473,8 +1465,7 @@ tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
14731465
* Create the TBMSharedIterator struct, with enough trailing space to
14741466
* serve the needs of the TBMIterateResult sub-struct.
14751467
*/
1476-
iterator= (TBMSharedIterator*)palloc0(sizeof(TBMSharedIterator)+
1477-
MAX_TUPLES_PER_PAGE*sizeof(OffsetNumber));
1468+
iterator= (TBMSharedIterator*)palloc0(sizeof(TBMSharedIterator));
14781469

14791470
istate= (TBMSharedIteratorState*)dsa_get_address(dsa,dp);
14801471

‎src/include/access/gin_private.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ typedef struct GinScanEntryData
354354
TIDBitmap*matchBitmap;
355355
TBMPrivateIterator*matchIterator;
356356
TBMIterateResult*matchResult;
357+
OffsetNumbermatchOffsets[TBM_MAX_TUPLES_PER_PAGE];
358+
intmatchNtuples;
357359

358360
/* used for Posting list and one page in Posting tree */
359361
ItemPointerData*list;

‎src/include/nodes/tidbitmap.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,17 @@
2222
#ifndefTIDBITMAP_H
2323
#defineTIDBITMAP_H
2424

25+
#include"access/htup_details.h"
2526
#include"storage/itemptr.h"
2627
#include"utils/dsa.h"
2728

29+
/*
30+
* The maximum number of tuples per page is not large (typically 256 with
31+
* 8K pages, or 1024 with 32K pages). So there's not much point in making
32+
* the per-page bitmaps variable size. We just legislate that the size
33+
* is this:
34+
*/
35+
#defineTBM_MAX_TUPLES_PER_PAGE MaxHeapTuplesPerPage
2836

2937
/*
3038
* Actual bitmap representation is private to tidbitmap.c. Callers can
@@ -53,12 +61,22 @@ typedef struct TBMIterator
5361
/* Result structure for tbm_iterate */
5462
typedefstructTBMIterateResult
5563
{
56-
BlockNumberblockno;/*page number containing tuples */
57-
intntuples;/* -1 when lossy */
64+
BlockNumberblockno;/*block number containing tuples */
65+
5866
boollossy;
59-
boolrecheck;/* should the tuples be rechecked? */
60-
/* Note: recheck is always true if lossy */
61-
OffsetNumberoffsets[FLEXIBLE_ARRAY_MEMBER];
67+
68+
/*
69+
* Whether or not the tuples should be rechecked. This is always true if
70+
* the page is lossy but may also be true if the query requires recheck.
71+
*/
72+
boolrecheck;
73+
74+
/*
75+
* Pointer to the page containing the bitmap for this block. It is a void *
76+
* to avoid exposing the details of the tidbitmap PagetableEntry to API
77+
* users.
78+
*/
79+
void*internal_page;
6280
}TBMIterateResult;
6381

6482
/* function prototypes in nodes/tidbitmap.c */
@@ -75,6 +93,10 @@ extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno);
7593
externvoidtbm_union(TIDBitmap*a,constTIDBitmap*b);
7694
externvoidtbm_intersect(TIDBitmap*a,constTIDBitmap*b);
7795

96+
externinttbm_extract_page_tuple(TBMIterateResult*iteritem,
97+
OffsetNumber*offsets,
98+
uint32max_offsets);
99+
78100
externbooltbm_is_empty(constTIDBitmap*tbm);
79101

80102
externTBMPrivateIterator*tbm_begin_private_iterate(TIDBitmap*tbm);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp