Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7c70996

Browse files
committed
Allow bitmap scans to operate as index-only scans when possible.
If we don't have to return any columns from heap tuples, and there'sno need to recheck qual conditions, and the heap page is all-visible,then we can skip fetching the heap page altogether.Skip prefetching pages too, when possible, on the assumption that therecheck flag will remain the same from one page to the next. While thatassumption is hardly bulletproof, it seems like a good bet most of thetime, and better than prefetching pages we don't need.This commit installs the executor infrastructure, but doesn't changeany planner cost estimates, thus possibly causing bitmap scans tonot be chosen in cases where this change renders them the best choice.I (tgl) am not entirely convinced that we need to account for thisbehavior in the planner, because I think typically the bitmap scan wouldget chosen anyway if it's the best bet. In any case the submitted patchtook way too many shortcuts, resulting in too many clearly-bad choices,to be committable.Alexander Kuzmenkov, reviewed by Alexey Chernyshov, and whacked aroundrather heavily by me.Discussion:https://postgr.es/m/239a8955-c0fc-f506-026d-c837e86c827b@postgrespro.ru
1 parentec7ce54 commit7c70996

File tree

5 files changed

+150
-40
lines changed

5 files changed

+150
-40
lines changed

‎src/backend/executor/nodeBitmapHeapscan.c

Lines changed: 126 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
#include"access/relscan.h"
4141
#include"access/transam.h"
42+
#include"access/visibilitymap.h"
4243
#include"executor/execdebug.h"
4344
#include"executor/nodeBitmapHeapscan.h"
4445
#include"miscadmin.h"
@@ -225,9 +226,31 @@ BitmapHeapNext(BitmapHeapScanState *node)
225226
}
226227

227228
/*
228-
* Fetch the current heap page and identify candidate tuples.
229+
* We can skip fetching the heap page if we don't need any fields
230+
* from the heap, and the bitmap entries don't need rechecking,
231+
* and all tuples on the page are visible to our transaction.
229232
*/
230-
bitgetpage(scan,tbmres);
233+
node->skip_fetch= (node->can_skip_fetch&&
234+
!tbmres->recheck&&
235+
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
236+
tbmres->blockno,
237+
&node->vmbuffer));
238+
239+
if (node->skip_fetch)
240+
{
241+
/*
242+
* The number of tuples on this page is put into
243+
* scan->rs_ntuples; note we don't fill scan->rs_vistuples.
244+
*/
245+
scan->rs_ntuples=tbmres->ntuples;
246+
}
247+
else
248+
{
249+
/*
250+
* Fetch the current heap page and identify candidate tuples.
251+
*/
252+
bitgetpage(scan,tbmres);
253+
}
231254

232255
if (tbmres->ntuples >=0)
233256
node->exact_pages++;
@@ -289,45 +312,55 @@ BitmapHeapNext(BitmapHeapScanState *node)
289312
*/
290313
BitmapPrefetch(node,scan);
291314

292-
/*
293-
* Okay to fetch the tuple
294-
*/
295-
targoffset=scan->rs_vistuples[scan->rs_cindex];
296-
dp= (Page)BufferGetPage(scan->rs_cbuf);
297-
lp=PageGetItemId(dp,targoffset);
298-
Assert(ItemIdIsNormal(lp));
315+
if (node->skip_fetch)
316+
{
317+
/*
318+
* If we don't have to fetch the tuple, just return nulls.
319+
*/
320+
ExecStoreAllNullTuple(slot);
321+
}
322+
else
323+
{
324+
/*
325+
* Okay to fetch the tuple.
326+
*/
327+
targoffset=scan->rs_vistuples[scan->rs_cindex];
328+
dp= (Page)BufferGetPage(scan->rs_cbuf);
329+
lp=PageGetItemId(dp,targoffset);
330+
Assert(ItemIdIsNormal(lp));
299331

300-
scan->rs_ctup.t_data= (HeapTupleHeader)PageGetItem((Page)dp,lp);
301-
scan->rs_ctup.t_len=ItemIdGetLength(lp);
302-
scan->rs_ctup.t_tableOid=scan->rs_rd->rd_id;
303-
ItemPointerSet(&scan->rs_ctup.t_self,tbmres->blockno,targoffset);
332+
scan->rs_ctup.t_data= (HeapTupleHeader)PageGetItem((Page)dp,lp);
333+
scan->rs_ctup.t_len=ItemIdGetLength(lp);
334+
scan->rs_ctup.t_tableOid=scan->rs_rd->rd_id;
335+
ItemPointerSet(&scan->rs_ctup.t_self,tbmres->blockno,targoffset);
304336

305-
pgstat_count_heap_fetch(scan->rs_rd);
337+
pgstat_count_heap_fetch(scan->rs_rd);
306338

307-
/*
308-
* Set up the result slot to point to this tuple. Note that the slot
309-
* acquires a pin on the buffer.
310-
*/
311-
ExecStoreTuple(&scan->rs_ctup,
312-
slot,
313-
scan->rs_cbuf,
314-
false);
315-
316-
/*
317-
* If we are using lossy info, we have to recheck the qual conditions
318-
* at every tuple.
319-
*/
320-
if (tbmres->recheck)
321-
{
322-
econtext->ecxt_scantuple=slot;
323-
ResetExprContext(econtext);
339+
/*
340+
* Set up the result slot to point to this tuple. Note that the
341+
* slot acquires a pin on the buffer.
342+
*/
343+
ExecStoreTuple(&scan->rs_ctup,
344+
slot,
345+
scan->rs_cbuf,
346+
false);
324347

325-
if (!ExecQual(node->bitmapqualorig,econtext))
348+
/*
349+
* If we are using lossy info, we have to recheck the qual
350+
* conditions at every tuple.
351+
*/
352+
if (tbmres->recheck)
326353
{
327-
/* Fails recheck, so drop it and loop back for another */
328-
InstrCountFiltered2(node,1);
329-
ExecClearTuple(slot);
330-
continue;
354+
econtext->ecxt_scantuple=slot;
355+
ResetExprContext(econtext);
356+
357+
if (!ExecQual(node->bitmapqualorig,econtext))
358+
{
359+
/* Fails recheck, so drop it and loop back for another */
360+
InstrCountFiltered2(node,1);
361+
ExecClearTuple(slot);
362+
continue;
363+
}
331364
}
332365
}
333366

@@ -582,6 +615,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
582615
while (node->prefetch_pages<node->prefetch_target)
583616
{
584617
TBMIterateResult*tbmpre=tbm_iterate(prefetch_iterator);
618+
boolskip_fetch;
585619

586620
if (tbmpre==NULL)
587621
{
@@ -591,7 +625,26 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
591625
break;
592626
}
593627
node->prefetch_pages++;
594-
PrefetchBuffer(scan->rs_rd,MAIN_FORKNUM,tbmpre->blockno);
628+
629+
/*
630+
* If we expect not to have to actually read this heap page,
631+
* skip this prefetch call, but continue to run the prefetch
632+
* logic normally. (Would it be better not to increment
633+
* prefetch_pages?)
634+
*
635+
* This depends on the assumption that the index AM will
636+
* report the same recheck flag for this future heap page as
637+
* it did for the current heap page; which is not a certainty
638+
* but is true in many cases.
639+
*/
640+
skip_fetch= (node->can_skip_fetch&&
641+
(node->tbmres ? !node->tbmres->recheck : false)&&
642+
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
643+
tbmpre->blockno,
644+
&node->pvmbuffer));
645+
646+
if (!skip_fetch)
647+
PrefetchBuffer(scan->rs_rd,MAIN_FORKNUM,tbmpre->blockno);
595648
}
596649
}
597650

@@ -608,6 +661,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
608661
{
609662
TBMIterateResult*tbmpre;
610663
booldo_prefetch= false;
664+
boolskip_fetch;
611665

612666
/*
613667
* Recheck under the mutex. If some other process has already
@@ -633,7 +687,15 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
633687
break;
634688
}
635689

636-
PrefetchBuffer(scan->rs_rd,MAIN_FORKNUM,tbmpre->blockno);
690+
/* As above, skip prefetch if we expect not to need page */
691+
skip_fetch= (node->can_skip_fetch&&
692+
(node->tbmres ? !node->tbmres->recheck : false)&&
693+
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
694+
tbmpre->blockno,
695+
&node->pvmbuffer));
696+
697+
if (!skip_fetch)
698+
PrefetchBuffer(scan->rs_rd,MAIN_FORKNUM,tbmpre->blockno);
637699
}
638700
}
639701
}
@@ -687,6 +749,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
687749
/* rescan to release any page pin */
688750
heap_rescan(node->ss.ss_currentScanDesc,NULL);
689751

752+
/* release bitmaps and buffers if any */
690753
if (node->tbmiterator)
691754
tbm_end_iterate(node->tbmiterator);
692755
if (node->prefetch_iterator)
@@ -697,13 +760,19 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
697760
tbm_end_shared_iterate(node->shared_prefetch_iterator);
698761
if (node->tbm)
699762
tbm_free(node->tbm);
763+
if (node->vmbuffer!=InvalidBuffer)
764+
ReleaseBuffer(node->vmbuffer);
765+
if (node->pvmbuffer!=InvalidBuffer)
766+
ReleaseBuffer(node->pvmbuffer);
700767
node->tbm=NULL;
701768
node->tbmiterator=NULL;
702769
node->tbmres=NULL;
703770
node->prefetch_iterator=NULL;
704771
node->initialized= false;
705772
node->shared_tbmiterator=NULL;
706773
node->shared_prefetch_iterator=NULL;
774+
node->vmbuffer=InvalidBuffer;
775+
node->pvmbuffer=InvalidBuffer;
707776

708777
ExecScanReScan(&node->ss);
709778

@@ -748,7 +817,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
748817
ExecEndNode(outerPlanState(node));
749818

750819
/*
751-
* releasebitmap if any
820+
* releasebitmaps and buffers if any
752821
*/
753822
if (node->tbmiterator)
754823
tbm_end_iterate(node->tbmiterator);
@@ -760,6 +829,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
760829
tbm_end_shared_iterate(node->shared_tbmiterator);
761830
if (node->shared_prefetch_iterator)
762831
tbm_end_shared_iterate(node->shared_prefetch_iterator);
832+
if (node->vmbuffer!=InvalidBuffer)
833+
ReleaseBuffer(node->vmbuffer);
834+
if (node->pvmbuffer!=InvalidBuffer)
835+
ReleaseBuffer(node->pvmbuffer);
763836

764837
/*
765838
* close heap scan
@@ -805,6 +878,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
805878
scanstate->tbm=NULL;
806879
scanstate->tbmiterator=NULL;
807880
scanstate->tbmres=NULL;
881+
scanstate->skip_fetch= false;
882+
scanstate->vmbuffer=InvalidBuffer;
883+
scanstate->pvmbuffer=InvalidBuffer;
808884
scanstate->exact_pages=0;
809885
scanstate->lossy_pages=0;
810886
scanstate->prefetch_iterator=NULL;
@@ -815,8 +891,19 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
815891
scanstate->pscan_len=0;
816892
scanstate->initialized= false;
817893
scanstate->shared_tbmiterator=NULL;
894+
scanstate->shared_prefetch_iterator=NULL;
818895
scanstate->pstate=NULL;
819896

897+
/*
898+
* We can potentially skip fetching heap pages if we do not need any
899+
* columns of the table, either for checking non-indexable quals or for
900+
* returning data. This test is a bit simplistic, as it checks the
901+
* stronger condition that there's no qual or return tlist at all. But in
902+
* most cases it's probably not worth working harder than that.
903+
*/
904+
scanstate->can_skip_fetch= (node->scan.plan.qual==NIL&&
905+
node->scan.plan.targetlist==NIL);
906+
820907
/*
821908
* Miscellaneous initialization
822909
*

‎src/backend/optimizer/plan/createplan.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
807807
if (IsA(path,CustomPath))
808808
return false;
809809

810+
/*
811+
* If a bitmap scan's tlist is empty, keep it as-is. This may allow the
812+
* executor to skip heap page fetches, and in any case, the benefit of
813+
* using a physical tlist instead would be minimal.
814+
*/
815+
if (IsA(path,BitmapHeapPath)&&
816+
path->pathtarget->exprs==NIL)
817+
return false;
818+
810819
/*
811820
* Can't do it if any system columns or whole-row Vars are requested.
812821
* (This could possibly be fixed but would take some fragile assumptions

‎src/include/nodes/execnodes.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ typedef struct EState
507507
bool*es_epqTupleSet;/* true if EPQ tuple is provided */
508508
bool*es_epqScanDone;/* true if EPQ tuple has been fetched */
509509

510-
booles_use_parallel_mode;/* can we use parallel workers? */
510+
booles_use_parallel_mode;/* can we use parallel workers? */
511511

512512
/* The per-query shared memory area to use for parallel execution. */
513513
structdsa_area*es_query_dsa;
@@ -1331,6 +1331,10 @@ typedef struct ParallelBitmapHeapState
13311331
*tbm bitmap obtained from child index scan(s)
13321332
*tbmiterator iterator for scanning current pages
13331333
*tbmres current-page data
1334+
*can_skip_fetch can we potentially skip tuple fetches in this scan?
1335+
*skip_fetch are we skipping tuple fetches on this page?
1336+
*vmbuffer buffer for visibility-map lookups
1337+
*pvmbuffer ditto, for prefetched pages
13341338
*exact_pages total number of exact pages retrieved
13351339
*lossy_pages total number of lossy pages retrieved
13361340
*prefetch_iterator iterator for prefetching ahead of current page
@@ -1351,6 +1355,10 @@ typedef struct BitmapHeapScanState
13511355
TIDBitmap*tbm;
13521356
TBMIterator*tbmiterator;
13531357
TBMIterateResult*tbmres;
1358+
boolcan_skip_fetch;
1359+
boolskip_fetch;
1360+
Buffervmbuffer;
1361+
Bufferpvmbuffer;
13541362
longexact_pages;
13551363
longlossy_pages;
13561364
TBMIterator*prefetch_iterator;

‎src/test/regress/expected/stats.out

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
136136
(1 row)
137137

138138
-- do an indexscan
139+
-- make sure it is not a bitmap scan, which might skip fetching heap tuples
140+
SET enable_bitmapscan TO off;
139141
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
140142
count
141143
-------
142144
1
143145
(1 row)
144146

147+
RESET enable_bitmapscan;
145148
-- We can't just call wait_for_stats() at this point, because we only
146149
-- transmit stats when the session goes idle, and we probably didn't
147150
-- transmit the last couple of counts yet thanks to the rate-limiting logic

‎src/test/regress/sql/stats.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,10 @@ ROLLBACK;
138138
-- do a seqscan
139139
SELECTcount(*)FROM tenk2;
140140
-- do an indexscan
141+
-- make sure it is not a bitmap scan, which might skip fetching heap tuples
142+
SET enable_bitmapscan TO off;
141143
SELECTcount(*)FROM tenk2WHERE unique1=1;
144+
RESET enable_bitmapscan;
142145

143146
-- We can't just call wait_for_stats() at this point, because we only
144147
-- transmit stats when the session goes idle, and we probably didn't

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp