Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbb437f9

Browse files
committed
Add TID Range Scans to support efficient scanning ranges of TIDs
This adds a new executor node named TID Range Scan. The query plannerwill generate paths for TID Range scans when quals are discovered on baserelations which search for ranges on the table's ctid column. Theseranges may be open at either end. For example, WHERE ctid >= '(10,0)';will return all tuples on page 10 and over.To support this, two new optional callback functions have been added totable AM. scan_set_tidrange is used to set the scan range to just thegiven range of TIDs. scan_getnextslot_tidrange fetches the next tuplein the given range.For AMs were scanning ranges of TIDs would not make sense, these functionscan be set to NULL in the TableAmRoutine. The query planner won'tgenerate TID Range Scan Paths in that case.Author: Edmund Horner, David RowleyReviewed-by: David Rowley, Tomas Vondra, Tom Lane, Andres Freund, Zhihong YuDiscussion:https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com
1 parentf4adc41 commitbb437f9

36 files changed

+1654
-22
lines changed

‎src/backend/access/heap/heapam.c

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
13911391
return true;
13921392
}
13931393

1394+
void
1395+
heap_set_tidrange(TableScanDescsscan,ItemPointermintid,
1396+
ItemPointermaxtid)
1397+
{
1398+
HeapScanDescscan= (HeapScanDesc)sscan;
1399+
BlockNumberstartBlk;
1400+
BlockNumbernumBlks;
1401+
ItemPointerDatahighestItem;
1402+
ItemPointerDatalowestItem;
1403+
1404+
/*
1405+
* For relations without any pages, we can simply leave the TID range
1406+
* unset. There will be no tuples to scan, therefore no tuples outside
1407+
* the given TID range.
1408+
*/
1409+
if (scan->rs_nblocks==0)
1410+
return;
1411+
1412+
/*
1413+
* Set up some ItemPointers which point to the first and last possible
1414+
* tuples in the heap.
1415+
*/
1416+
ItemPointerSet(&highestItem,scan->rs_nblocks-1,MaxOffsetNumber);
1417+
ItemPointerSet(&lowestItem,0,FirstOffsetNumber);
1418+
1419+
/*
1420+
* If the given maximum TID is below the highest possible TID in the
1421+
* relation, then restrict the range to that, otherwise we scan to the end
1422+
* of the relation.
1423+
*/
1424+
if (ItemPointerCompare(maxtid,&highestItem)<0)
1425+
ItemPointerCopy(maxtid,&highestItem);
1426+
1427+
/*
1428+
* If the given minimum TID is above the lowest possible TID in the
1429+
* relation, then restrict the range to only scan for TIDs above that.
1430+
*/
1431+
if (ItemPointerCompare(mintid,&lowestItem)>0)
1432+
ItemPointerCopy(mintid,&lowestItem);
1433+
1434+
/*
1435+
* Check for an empty range and protect from would be negative results
1436+
* from the numBlks calculation below.
1437+
*/
1438+
if (ItemPointerCompare(&highestItem,&lowestItem)<0)
1439+
{
1440+
/* Set an empty range of blocks to scan */
1441+
heap_setscanlimits(sscan,0,0);
1442+
return;
1443+
}
1444+
1445+
/*
1446+
* Calculate the first block and the number of blocks we must scan. We
1447+
* could be more aggressive here and perform some more validation to try
1448+
* and further narrow the scope of blocks to scan by checking if the
1449+
* lowerItem has an offset above MaxOffsetNumber. In this case, we could
1450+
* advance startBlk by one. Likewise, if highestItem has an offset of 0
1451+
* we could scan one fewer blocks. However, such an optimization does not
1452+
* seem worth troubling over, currently.
1453+
*/
1454+
startBlk=ItemPointerGetBlockNumberNoCheck(&lowestItem);
1455+
1456+
numBlks=ItemPointerGetBlockNumberNoCheck(&highestItem)-
1457+
ItemPointerGetBlockNumberNoCheck(&lowestItem)+1;
1458+
1459+
/* Set the start block and number of blocks to scan */
1460+
heap_setscanlimits(sscan,startBlk,numBlks);
1461+
1462+
/* Finally, set the TID range in sscan */
1463+
ItemPointerCopy(&lowestItem,&sscan->rs_mintid);
1464+
ItemPointerCopy(&highestItem,&sscan->rs_maxtid);
1465+
}
1466+
1467+
bool
1468+
heap_getnextslot_tidrange(TableScanDescsscan,ScanDirectiondirection,
1469+
TupleTableSlot*slot)
1470+
{
1471+
HeapScanDescscan= (HeapScanDesc)sscan;
1472+
ItemPointermintid=&sscan->rs_mintid;
1473+
ItemPointermaxtid=&sscan->rs_maxtid;
1474+
1475+
/* Note: no locking manipulations needed */
1476+
for (;;)
1477+
{
1478+
if (sscan->rs_flags&SO_ALLOW_PAGEMODE)
1479+
heapgettup_pagemode(scan,direction,sscan->rs_nkeys,sscan->rs_key);
1480+
else
1481+
heapgettup(scan,direction,sscan->rs_nkeys,sscan->rs_key);
1482+
1483+
if (scan->rs_ctup.t_data==NULL)
1484+
{
1485+
ExecClearTuple(slot);
1486+
return false;
1487+
}
1488+
1489+
/*
1490+
* heap_set_tidrange will have used heap_setscanlimits to limit the
1491+
* range of pages we scan to only ones that can contain the TID range
1492+
* we're scanning for. Here we must filter out any tuples from these
1493+
* pages that are outwith that range.
1494+
*/
1495+
if (ItemPointerCompare(&scan->rs_ctup.t_self,mintid)<0)
1496+
{
1497+
ExecClearTuple(slot);
1498+
1499+
/*
1500+
* When scanning backwards, the TIDs will be in descending order.
1501+
* Future tuples in this direction will be lower still, so we can
1502+
* just return false to indicate there will be no more tuples.
1503+
*/
1504+
if (ScanDirectionIsBackward(direction))
1505+
return false;
1506+
1507+
continue;
1508+
}
1509+
1510+
/*
1511+
* Likewise for the final page, we must filter out TIDs greater than
1512+
* maxtid.
1513+
*/
1514+
if (ItemPointerCompare(&scan->rs_ctup.t_self,maxtid)>0)
1515+
{
1516+
ExecClearTuple(slot);
1517+
1518+
/*
1519+
* When scanning forward, the TIDs will be in ascending order.
1520+
* Future tuples in this direction will be higher still, so we can
1521+
* just return false to indicate there will be no more tuples.
1522+
*/
1523+
if (ScanDirectionIsForward(direction))
1524+
return false;
1525+
continue;
1526+
}
1527+
1528+
break;
1529+
}
1530+
1531+
/*
1532+
* if we get here it means we have a new current scan tuple, so point to
1533+
* the proper return buffer and return the tuple.
1534+
*/
1535+
pgstat_count_heap_getnext(scan->rs_base.rs_rd);
1536+
1537+
ExecStoreBufferHeapTuple(&scan->rs_ctup,slot,scan->rs_cbuf);
1538+
return true;
1539+
}
1540+
13941541
/*
13951542
*heap_fetch- retrieve tuple with given tid
13961543
*

‎src/backend/access/heap/heapam_handler.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2542,6 +2542,9 @@ static const TableAmRoutine heapam_methods = {
25422542
.scan_rescan=heap_rescan,
25432543
.scan_getnextslot=heap_getnextslot,
25442544

2545+
.scan_set_tidrange=heap_set_tidrange,
2546+
.scan_getnextslot_tidrange=heap_getnextslot_tidrange,
2547+
25452548
.parallelscan_estimate=table_block_parallelscan_estimate,
25462549
.parallelscan_initialize=table_block_parallelscan_initialize,
25472550
.parallelscan_reinitialize=table_block_parallelscan_reinitialize,

‎src/backend/commands/explain.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
10571057
caseT_IndexOnlyScan:
10581058
caseT_BitmapHeapScan:
10591059
caseT_TidScan:
1060+
caseT_TidRangeScan:
10601061
caseT_SubqueryScan:
10611062
caseT_FunctionScan:
10621063
caseT_TableFuncScan:
@@ -1223,6 +1224,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
12231224
caseT_TidScan:
12241225
pname=sname="Tid Scan";
12251226
break;
1227+
caseT_TidRangeScan:
1228+
pname=sname="Tid Range Scan";
1229+
break;
12261230
caseT_SubqueryScan:
12271231
pname=sname="Subquery Scan";
12281232
break;
@@ -1417,6 +1421,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
14171421
caseT_SampleScan:
14181422
caseT_BitmapHeapScan:
14191423
caseT_TidScan:
1424+
caseT_TidRangeScan:
14201425
caseT_SubqueryScan:
14211426
caseT_FunctionScan:
14221427
caseT_TableFuncScan:
@@ -1871,6 +1876,23 @@ ExplainNode(PlanState *planstate, List *ancestors,
18711876
planstate,es);
18721877
}
18731878
break;
1879+
caseT_TidRangeScan:
1880+
{
1881+
/*
1882+
* The tidrangequals list has AND semantics, so be sure to
1883+
* show it as an AND condition.
1884+
*/
1885+
List*tidquals= ((TidRangeScan*)plan)->tidrangequals;
1886+
1887+
if (list_length(tidquals)>1)
1888+
tidquals=list_make1(make_andclause(tidquals));
1889+
show_scan_qual(tidquals,"TID Cond",planstate,ancestors,es);
1890+
show_scan_qual(plan->qual,"Filter",planstate,ancestors,es);
1891+
if (plan->qual)
1892+
show_instrumentation_count("Rows Removed by Filter",1,
1893+
planstate,es);
1894+
}
1895+
break;
18741896
caseT_ForeignScan:
18751897
show_scan_qual(plan->qual,"Filter",planstate,ancestors,es);
18761898
if (plan->qual)
@@ -3558,6 +3580,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
35583580
caseT_IndexOnlyScan:
35593581
caseT_BitmapHeapScan:
35603582
caseT_TidScan:
3583+
caseT_TidRangeScan:
35613584
caseT_ForeignScan:
35623585
caseT_CustomScan:
35633586
caseT_ModifyTable:

‎src/backend/executor/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ OBJS = \
6767
nodeSubplan.o\
6868
nodeSubqueryscan.o\
6969
nodeTableFuncscan.o\
70+
nodeTidrangescan.o\
7071
nodeTidscan.o\
7172
nodeUnique.o\
7273
nodeValuesscan.o\

‎src/backend/executor/execAmi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include"executor/nodeSubplan.h"
5252
#include"executor/nodeSubqueryscan.h"
5353
#include"executor/nodeTableFuncscan.h"
54+
#include"executor/nodeTidrangescan.h"
5455
#include"executor/nodeTidscan.h"
5556
#include"executor/nodeUnique.h"
5657
#include"executor/nodeValuesscan.h"
@@ -197,6 +198,10 @@ ExecReScan(PlanState *node)
197198
ExecReScanTidScan((TidScanState*)node);
198199
break;
199200

201+
caseT_TidRangeScanState:
202+
ExecReScanTidRangeScan((TidRangeScanState*)node);
203+
break;
204+
200205
caseT_SubqueryScanState:
201206
ExecReScanSubqueryScan((SubqueryScanState*)node);
202207
break;
@@ -562,6 +567,7 @@ ExecSupportsBackwardScan(Plan *node)
562567

563568
caseT_SeqScan:
564569
caseT_TidScan:
570+
caseT_TidRangeScan:
565571
caseT_FunctionScan:
566572
caseT_ValuesScan:
567573
caseT_CteScan:

‎src/backend/executor/execCurrent.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,7 @@ search_plan_tree(PlanState *node, Oid table_oid,
336336
caseT_IndexOnlyScanState:
337337
caseT_BitmapHeapScanState:
338338
caseT_TidScanState:
339+
caseT_TidRangeScanState:
339340
caseT_ForeignScanState:
340341
caseT_CustomScanState:
341342
{

‎src/backend/executor/execProcnode.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
#include"executor/nodeSubplan.h"
110110
#include"executor/nodeSubqueryscan.h"
111111
#include"executor/nodeTableFuncscan.h"
112+
#include"executor/nodeTidrangescan.h"
112113
#include"executor/nodeTidscan.h"
113114
#include"executor/nodeUnique.h"
114115
#include"executor/nodeValuesscan.h"
@@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
238239
estate,eflags);
239240
break;
240241

242+
caseT_TidRangeScan:
243+
result= (PlanState*)ExecInitTidRangeScan((TidRangeScan*)node,
244+
estate,eflags);
245+
break;
246+
241247
caseT_SubqueryScan:
242248
result= (PlanState*)ExecInitSubqueryScan((SubqueryScan*)node,
243249
estate,eflags);
@@ -637,6 +643,10 @@ ExecEndNode(PlanState *node)
637643
ExecEndTidScan((TidScanState*)node);
638644
break;
639645

646+
caseT_TidRangeScanState:
647+
ExecEndTidRangeScan((TidRangeScanState*)node);
648+
break;
649+
640650
caseT_SubqueryScanState:
641651
ExecEndSubqueryScan((SubqueryScanState*)node);
642652
break;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp