Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit29b64d1

Browse files
Add nbtree high key "continuescan" optimization.
Teach nbtree forward index scans to check the high key before moving tothe right sibling page in the hope of finding that it isn't actuallynecessary to do so. The new check may indicate that the scan definitelycannot find matching tuples to the right, ending the scan immediately.We already opportunistically force a similar "continuescan orientated"key check of the final non-pivot tuple when it's clear that it cannot bereturned to the scan due to being dead-to-all. The new high key checkis complementary.The new approach for forward scans is more effective than checking thefinal non-pivot tuple, especially with composite indexes and non-uniqueindexes. The improvements to the logic for picking a split point addedby commitfab2502 make it likely that relatively dissimilar high keyswill appear on a page. A distinguishing key value that can only appearon non-pivot tuples on the right sibling page will often be present inleaf page high keys.Since forcing the final item to be key checked no longer makes anydifference in the case of forward scans, the existing extra key check isnow only used for backwards scans. Backward scans continue toopportunistically check the final non-pivot tuple, which is actually thefirst non-pivot tuple on the page (not the last).Note that even pg_upgrade'd v3 indexes make use of this optimization.Author: Peter Geoghegan, Heikki LinnakangasReviewed-By: Heikki LinnakangasDiscussion:https://postgr.es/m/CAH2-WzkOmUduME31QnuTFpimejuQoiZ-HOf0pOWeFZNhTMctvA@mail.gmail.com
1 parent4ba96d1 commit29b64d1

File tree

3 files changed

+128
-77
lines changed

3 files changed

+128
-77
lines changed

‎src/backend/access/nbtree/nbtsearch.c

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,8 +1406,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
14061406
OffsetNumberminoff;
14071407
OffsetNumbermaxoff;
14081408
intitemIndex;
1409-
IndexTupleitup;
14101409
boolcontinuescan;
1410+
intindnatts;
14111411

14121412
/*
14131413
* We must have the buffer pinned and locked, but the usual macro can't be
@@ -1427,6 +1427,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
14271427
_bt_parallel_release(scan,BufferGetBlockNumber(so->currPos.buf));
14281428
}
14291429

1430+
continuescan= true;/* default assumption */
1431+
indnatts=IndexRelationGetNumberOfAttributes(scan->indexRelation);
14301432
minoff=P_FIRSTDATAKEY(opaque);
14311433
maxoff=PageGetMaxOffsetNumber(page);
14321434

@@ -1468,23 +1470,58 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
14681470

14691471
while (offnum <=maxoff)
14701472
{
1471-
itup=_bt_checkkeys(scan,page,offnum,dir,&continuescan);
1472-
if (itup!=NULL)
1473+
ItemIdiid=PageGetItemId(page,offnum);
1474+
IndexTupleitup;
1475+
1476+
/*
1477+
* If the scan specifies not to return killed tuples, then we
1478+
* treat a killed tuple as not passing the qual
1479+
*/
1480+
if (scan->ignore_killed_tuples&&ItemIdIsDead(iid))
1481+
{
1482+
offnum=OffsetNumberNext(offnum);
1483+
continue;
1484+
}
1485+
1486+
itup= (IndexTuple)PageGetItem(page,iid);
1487+
1488+
if (_bt_checkkeys(scan,itup,indnatts,dir,&continuescan))
14731489
{
14741490
/* tuple passes all scan key conditions, so remember it */
14751491
_bt_saveitem(so,itemIndex,offnum,itup);
14761492
itemIndex++;
14771493
}
1494+
/* When !continuescan, there can't be any more matches, so stop */
14781495
if (!continuescan)
1479-
{
1480-
/* there can't be any more matches, so stop */
1481-
so->currPos.moreRight= false;
14821496
break;
1483-
}
14841497

14851498
offnum=OffsetNumberNext(offnum);
14861499
}
14871500

1501+
/*
1502+
* We don't need to visit page to the right when the high key
1503+
* indicates that no more matches will be found there.
1504+
*
1505+
* Checking the high key like this works out more often than you might
1506+
* think. Leaf page splits pick a split point between the two most
1507+
* dissimilar tuples (this is weighed against the need to evenly share
1508+
* free space). Leaf pages with high key attribute values that can
1509+
* only appear on non-pivot tuples on the right sibling page are
1510+
* common.
1511+
*/
1512+
if (continuescan&& !P_RIGHTMOST(opaque))
1513+
{
1514+
ItemIdiid=PageGetItemId(page,P_HIKEY);
1515+
IndexTupleitup= (IndexTuple)PageGetItem(page,iid);
1516+
inttruncatt;
1517+
1518+
truncatt=BTreeTupleGetNAtts(itup,scan->indexRelation);
1519+
_bt_checkkeys(scan,itup,truncatt,dir,&continuescan);
1520+
}
1521+
1522+
if (!continuescan)
1523+
so->currPos.moreRight= false;
1524+
14881525
Assert(itemIndex <=MaxIndexTuplesPerPage);
14891526
so->currPos.firstItem=0;
14901527
so->currPos.lastItem=itemIndex-1;
@@ -1499,8 +1536,40 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
14991536

15001537
while (offnum >=minoff)
15011538
{
1502-
itup=_bt_checkkeys(scan,page,offnum,dir,&continuescan);
1503-
if (itup!=NULL)
1539+
ItemIdiid=PageGetItemId(page,offnum);
1540+
IndexTupleitup;
1541+
booltuple_alive;
1542+
boolpasses_quals;
1543+
1544+
/*
1545+
* If the scan specifies not to return killed tuples, then we
1546+
* treat a killed tuple as not passing the qual. Most of the
1547+
* time, it's a win to not bother examining the tuple's index
1548+
* keys, but just skip to the next tuple (previous, actually,
1549+
* since we're scanning backwards). However, if this is the first
1550+
* tuple on the page, we do check the index keys, to prevent
1551+
* uselessly advancing to the page to the left. This is similar
1552+
* to the high key optimization used by forward scans.
1553+
*/
1554+
if (scan->ignore_killed_tuples&&ItemIdIsDead(iid))
1555+
{
1556+
Assert(offnum >=P_FIRSTDATAKEY(opaque));
1557+
if (offnum>P_FIRSTDATAKEY(opaque))
1558+
{
1559+
offnum=OffsetNumberPrev(offnum);
1560+
continue;
1561+
}
1562+
1563+
tuple_alive= false;
1564+
}
1565+
else
1566+
tuple_alive= true;
1567+
1568+
itup= (IndexTuple)PageGetItem(page,iid);
1569+
1570+
passes_quals=_bt_checkkeys(scan,itup,indnatts,dir,
1571+
&continuescan);
1572+
if (passes_quals&&tuple_alive)
15041573
{
15051574
/* tuple passes all scan key conditions, so remember it */
15061575
itemIndex--;

‎src/backend/access/nbtree/nbtutils.c

Lines changed: 48 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
4848
staticbool_bt_fix_scankey_strategy(ScanKeyskey,int16*indoption);
4949
staticvoid_bt_mark_scankey_required(ScanKeyskey);
5050
staticbool_bt_check_rowcompare(ScanKeyskey,
51-
IndexTupletuple,TupleDesctupdesc,
51+
IndexTupletuple,inttupnatts,TupleDesctupdesc,
5252
ScanDirectiondir,bool*continuescan);
5353
staticint_bt_keep_natts(Relationrel,IndexTuplelastleft,
5454
IndexTuplefirstright,BTScanInsertitup_key);
@@ -1333,72 +1333,34 @@ _bt_mark_scankey_required(ScanKey skey)
13331333
/*
13341334
* Test whether an indextuple satisfies all the scankey conditions.
13351335
*
1336-
* If so, return the address of the index tuple on the index page.
1337-
* If not, return NULL.
1336+
* Return true if so, false if not. If the tuple fails to pass the qual,
1337+
* we also determine whether there's any need to continue the scan beyond
1338+
* this tuple, and set *continuescan accordingly. See comments for
1339+
* _bt_preprocess_keys(), above, about how this is done.
13381340
*
1339-
* If the tuple fails to pass the qual, we also determine whether there's
1340-
* any need to continue the scan beyond this tuple, and set *continuescan
1341-
* accordingly. See comments for _bt_preprocess_keys(), above, about how
1342-
* this is done.
1341+
* Forward scan callers can pass a high key tuple in the hopes of having
1342+
* us set *continuescanthat to false, and avoiding an unnecessary visit to
1343+
* the page to the right.
13431344
*
13441345
* scan: index scan descriptor (containing a search-type scankey)
1345-
*page: buffer page containingindex tuple
1346-
*offnum: offsetnumber ofindex tuple (must be a valid item!)
1346+
*tuple:index tuple to test
1347+
*tupnatts:number ofattributes in tupnatts (high key may be truncated)
13471348
* dir: direction we are scanning in
13481349
* continuescan: output parameter (will be set correctly in all cases)
1349-
*
1350-
* Caller must hold pin and lock on the index page.
13511350
*/
1352-
IndexTuple
1353-
_bt_checkkeys(IndexScanDescscan,
1354-
Pagepage,OffsetNumberoffnum,
1351+
bool
1352+
_bt_checkkeys(IndexScanDescscan,IndexTupletuple,inttupnatts,
13551353
ScanDirectiondir,bool*continuescan)
13561354
{
1357-
ItemIdiid=PageGetItemId(page,offnum);
1358-
booltuple_alive;
1359-
IndexTupletuple;
13601355
TupleDesctupdesc;
13611356
BTScanOpaqueso;
13621357
intkeysz;
13631358
intikey;
13641359
ScanKeykey;
13651360

1366-
*continuescan= true;/* default assumption */
1367-
1368-
/*
1369-
* If the scan specifies not to return killed tuples, then we treat a
1370-
* killed tuple as not passing the qual. Most of the time, it's a win to
1371-
* not bother examining the tuple's index keys, but just return
1372-
* immediately with continuescan = true to proceed to the next tuple.
1373-
* However, if this is the last tuple on the page, we should check the
1374-
* index keys to prevent uselessly advancing to the next page.
1375-
*/
1376-
if (scan->ignore_killed_tuples&&ItemIdIsDead(iid))
1377-
{
1378-
/* return immediately if there are more tuples on the page */
1379-
if (ScanDirectionIsForward(dir))
1380-
{
1381-
if (offnum<PageGetMaxOffsetNumber(page))
1382-
returnNULL;
1383-
}
1384-
else
1385-
{
1386-
BTPageOpaqueopaque= (BTPageOpaque)PageGetSpecialPointer(page);
1387-
1388-
if (offnum>P_FIRSTDATAKEY(opaque))
1389-
returnNULL;
1390-
}
1391-
1392-
/*
1393-
* OK, we want to check the keys so we can set continuescan correctly,
1394-
* but we'll return NULL even if the tuple passes the key tests.
1395-
*/
1396-
tuple_alive= false;
1397-
}
1398-
else
1399-
tuple_alive= true;
1361+
Assert(BTreeTupleGetNAtts(tuple,scan->indexRelation)==tupnatts);
14001362

1401-
tuple=(IndexTuple)PageGetItem(page,iid);
1363+
*continuescan=true;/* default assumption */
14021364

14031365
tupdesc=RelationGetDescr(scan->indexRelation);
14041366
so= (BTScanOpaque)scan->opaque;
@@ -1410,13 +1372,25 @@ _bt_checkkeys(IndexScanDesc scan,
14101372
boolisNull;
14111373
Datumtest;
14121374

1413-
Assert(key->sk_attno <=BTreeTupleGetNAtts(tuple,scan->indexRelation));
1375+
if (key->sk_attno>tupnatts)
1376+
{
1377+
/*
1378+
* This attribute is truncated (must be high key). The value for
1379+
* this attribute in the first non-pivot tuple on the page to the
1380+
* right could be any possible value. Assume that truncated
1381+
* attribute passes the qual.
1382+
*/
1383+
Assert(ScanDirectionIsForward(dir));
1384+
continue;
1385+
}
1386+
14141387
/* row-comparison keys need special processing */
14151388
if (key->sk_flags&SK_ROW_HEADER)
14161389
{
1417-
if (_bt_check_rowcompare(key,tuple,tupdesc,dir,continuescan))
1390+
if (_bt_check_rowcompare(key,tuple,tupnatts,tupdesc,dir,
1391+
continuescan))
14181392
continue;
1419-
returnNULL;
1393+
returnfalse;
14201394
}
14211395

14221396
datum=index_getattr(tuple,
@@ -1454,7 +1428,7 @@ _bt_checkkeys(IndexScanDesc scan,
14541428
/*
14551429
* In any case, this indextuple doesn't match the qual.
14561430
*/
1457-
returnNULL;
1431+
returnfalse;
14581432
}
14591433

14601434
if (isNull)
@@ -1495,7 +1469,7 @@ _bt_checkkeys(IndexScanDesc scan,
14951469
/*
14961470
* In any case, this indextuple doesn't match the qual.
14971471
*/
1498-
returnNULL;
1472+
returnfalse;
14991473
}
15001474

15011475
test=FunctionCall2Coll(&key->sk_func,key->sk_collation,
@@ -1523,16 +1497,12 @@ _bt_checkkeys(IndexScanDesc scan,
15231497
/*
15241498
* In any case, this indextuple doesn't match the qual.
15251499
*/
1526-
returnNULL;
1500+
returnfalse;
15271501
}
15281502
}
15291503

1530-
/* Check for failure due to it being a killed tuple. */
1531-
if (!tuple_alive)
1532-
returnNULL;
1533-
15341504
/* If we get here, the tuple passes all index quals. */
1535-
returntuple;
1505+
returntrue;
15361506
}
15371507

15381508
/*
@@ -1545,8 +1515,8 @@ _bt_checkkeys(IndexScanDesc scan,
15451515
* This is a subroutine for _bt_checkkeys, which see for more info.
15461516
*/
15471517
staticbool
1548-
_bt_check_rowcompare(ScanKeyskey,IndexTupletuple,TupleDesctupdesc,
1549-
ScanDirectiondir,bool*continuescan)
1518+
_bt_check_rowcompare(ScanKeyskey,IndexTupletuple,inttupnatts,
1519+
TupleDesctupdesc,ScanDirectiondir,bool*continuescan)
15501520
{
15511521
ScanKeysubkey= (ScanKey)DatumGetPointer(skey->sk_argument);
15521522
int32cmpresult=0;
@@ -1563,6 +1533,19 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
15631533

15641534
Assert(subkey->sk_flags&SK_ROW_MEMBER);
15651535

1536+
if (subkey->sk_attno>tupnatts)
1537+
{
1538+
/*
1539+
* This attribute is truncated (must be high key). The value for
1540+
* this attribute in the first non-pivot tuple on the page to the
1541+
* right could be any possible value. Assume that truncated
1542+
* attribute passes the qual.
1543+
*/
1544+
Assert(ScanDirectionIsForward(dir));
1545+
cmpresult=0;
1546+
continue;
1547+
}
1548+
15661549
datum=index_getattr(tuple,
15671550
subkey->sk_attno,
15681551
tupdesc,

‎src/include/access/nbtree.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,9 +772,8 @@ extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
772772
externvoid_bt_mark_array_keys(IndexScanDescscan);
773773
externvoid_bt_restore_array_keys(IndexScanDescscan);
774774
externvoid_bt_preprocess_keys(IndexScanDescscan);
775-
externIndexTuple_bt_checkkeys(IndexScanDescscan,
776-
Pagepage,OffsetNumberoffnum,
777-
ScanDirectiondir,bool*continuescan);
775+
externbool_bt_checkkeys(IndexScanDescscan,IndexTupletuple,
776+
inttupnatts,ScanDirectiondir,bool*continuescan);
778777
externvoid_bt_killitems(IndexScanDescscan);
779778
externBTCycleId_bt_vacuum_cycleid(Relationrel);
780779
externBTCycleId_bt_start_vacuum(Relationrel);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp