Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit21a152b

Browse files
Improve nbtree skip scan primitive scan scheduling.
Don't allow nbtree scans with skip arrays to end any primitive scan onits first leaf page without giving some consideration to how many timesthe scan's arrays advanced while changing at least one skip array(though continue not caring about the number of array advancements thatonly affected SAOP arrays, even during skip scans with SAOP arrays).Now when a scan performs more than 3 such array advancements in thecourse of reading a single leaf page, it is taken as a signal that thenext page is unlikely to be skippable. We'll therefore continue theongoing primitive index scan, at least until we can perform a recheckagainst the next page's finaltup.Testing has shown that this new heuristic occasionally makes all thedifference with skip scans that were expected to rely on the "passedfirst page" heuristic added by commit9a2e2a2. Without it, there is aremaining risk that certain kinds of skip scans will never quite manageto clear the initial hurdle of performing a primitive scan that lastsbeyond its first leaf page (or that such a skip scan will only clearthat initial hurdle when it has already wasted noticeably-many cyclesdue to inefficient primitive scan scheduling).Follow-up to commits92fe23d and9a2e2a2.Author: Peter Geoghegan <pg@bowt.ie>Reviewed-By: Matthias van de Meent <boekewurm+postgres@gmail.com>Discussion:https://postgr.es/m/CAH2-Wz=RVdG3zWytFWBsyW7fWH7zveFvTHed5JKEsuTT0RCO_A@mail.gmail.com
1 parentcf2655a commit21a152b

File tree

3 files changed

+78
-31
lines changed

3 files changed

+78
-31
lines changed

‎src/backend/access/nbtree/nbtsearch.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,6 +1655,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
16551655
pstate.continuescan= true;/* default assumption */
16561656
pstate.rechecks=0;
16571657
pstate.targetdistance=0;
1658+
pstate.nskipadvances=0;
16581659

16591660
if (ScanDirectionIsForward(dir))
16601661
{
@@ -1884,6 +1885,21 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
18841885
passes_quals=_bt_checkkeys(scan,&pstate,arrayKeys,
18851886
itup,indnatts);
18861887

1888+
if (arrayKeys&&so->scanBehind)
1889+
{
1890+
/*
1891+
* Done scanning this page, but not done with the current
1892+
* primscan.
1893+
*
1894+
* Note: Forward scans don't check this explicitly, since they
1895+
* prefer to reuse pstate.skip for this instead.
1896+
*/
1897+
Assert(!passes_quals&&pstate.continuescan);
1898+
Assert(!pstate.forcenonrequired);
1899+
1900+
break;
1901+
}
1902+
18871903
/*
18881904
* Check if we need to skip ahead to a later tuple (only possible
18891905
* when the scan uses array keys)

‎src/backend/access/nbtree/nbtutils.c

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#defineLOOK_AHEAD_REQUIRED_RECHECKS 3
2828
#defineLOOK_AHEAD_DEFAULT_DISTANCE 5
29+
#defineNSKIPADVANCES_THRESHOLD3
2930

3031
staticinlineint32_bt_compare_array_skey(FmgrInfo*orderproc,
3132
Datumtupdatum,booltupnull,
@@ -41,7 +42,8 @@ static void _bt_array_set_low_or_high(Relation rel, ScanKey skey,
4142
BTArrayKeyInfo*array,boollow_not_high);
4243
staticbool_bt_array_decrement(Relationrel,ScanKeyskey,BTArrayKeyInfo*array);
4344
staticbool_bt_array_increment(Relationrel,ScanKeyskey,BTArrayKeyInfo*array);
44-
staticbool_bt_advance_array_keys_increment(IndexScanDescscan,ScanDirectiondir);
45+
staticbool_bt_advance_array_keys_increment(IndexScanDescscan,ScanDirectiondir,
46+
bool*skip_array_set);
4547
staticvoid_bt_rewind_nonrequired_arrays(IndexScanDescscan,ScanDirectiondir);
4648
staticbool_bt_tuple_before_array_skeys(IndexScanDescscan,ScanDirectiondir,
4749
IndexTupletuple,TupleDesctupdesc,inttupnatts,
@@ -970,7 +972,8 @@ _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array)
970972
* advanced (every array remains at its final element for scan direction).
971973
*/
972974
staticbool
973-
_bt_advance_array_keys_increment(IndexScanDescscan,ScanDirectiondir)
975+
_bt_advance_array_keys_increment(IndexScanDescscan,ScanDirectiondir,
976+
bool*skip_array_set)
974977
{
975978
Relationrel=scan->indexRelation;
976979
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
@@ -985,6 +988,9 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir)
985988
BTArrayKeyInfo*array=&so->arrayKeys[i];
986989
ScanKeyskey=&so->keyData[array->scan_key];
987990

991+
if (array->num_elems==-1)
992+
*skip_array_set= true;
993+
988994
if (ScanDirectionIsForward(dir))
989995
{
990996
if (_bt_array_increment(rel,skey,array))
@@ -1460,6 +1466,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
14601466
ScanDirectiondir=so->currPos.dir;
14611467
intarrayidx=0;
14621468
boolbeyond_end_advance= false,
1469+
skip_array_advanced= false,
14631470
has_required_opposite_direction_only= false,
14641471
all_required_satisfied= true,
14651472
all_satisfied= true;
@@ -1756,6 +1763,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
17561763
/* Skip array's new element is tupdatum (or MINVAL/MAXVAL) */
17571764
_bt_skiparray_set_element(rel,cur,array,result,
17581765
tupdatum,tupnull);
1766+
skip_array_advanced= true;
17591767
}
17601768
elseif (array->cur_elem!=set_elem)
17611769
{
@@ -1772,11 +1780,19 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
17721780
* higher-order arrays (might exhaust all the scan's arrays instead, which
17731781
* ends the top-level scan).
17741782
*/
1775-
if (beyond_end_advance&& !_bt_advance_array_keys_increment(scan,dir))
1783+
if (beyond_end_advance&&
1784+
!_bt_advance_array_keys_increment(scan,dir,&skip_array_advanced))
17761785
gotoend_toplevel_scan;
17771786

17781787
Assert(_bt_verify_keys_with_arraykeys(scan));
17791788

1789+
/*
1790+
* Maintain a page-level count of the number of times the scan's array
1791+
* keys advanced in a way that affected at least one skip array
1792+
*/
1793+
if (sktrig_required&&skip_array_advanced)
1794+
pstate->nskipadvances++;
1795+
17801796
/*
17811797
* Does tuple now satisfy our new qual? Recheck with _bt_check_compare.
17821798
*
@@ -1946,26 +1962,12 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
19461962
* Being pessimistic would also give some scans with non-required arrays a
19471963
* perverse advantage over similar scans that use required arrays instead.
19481964
*
1949-
* You can think of this as a speculative bet on what the scan is likely
1950-
* to find on the next page. It's not much of a gamble, though, since the
1951-
* untruncated prefix of attributes must strictly satisfy the new qual.
1965+
* This is similar to our scan-level heuristics, below. They also set
1966+
* scanBehind to speculatively continue the primscan onto the next page.
19521967
*/
19531968
if (so->scanBehind)
19541969
{
1955-
/*
1956-
* Truncated high key -- _bt_scanbehind_checkkeys recheck scheduled.
1957-
*
1958-
* Remember if recheck needs to call _bt_oppodir_checkkeys for next
1959-
* page's finaltup (see below comments about "Handle inequalities
1960-
* marked required in the opposite scan direction" for why).
1961-
*/
1962-
so->oppositeDirCheck=has_required_opposite_direction_only;
1963-
1964-
/*
1965-
* Make sure that any SAOP arrays that were not marked required by
1966-
* preprocessing are reset to their first element for this direction
1967-
*/
1968-
_bt_rewind_nonrequired_arrays(scan,dir);
1970+
/* Truncated high key -- _bt_scanbehind_checkkeys recheck scheduled */
19691971
}
19701972

19711973
/*
@@ -2006,6 +2008,10 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
20062008
elseif (has_required_opposite_direction_only&&pstate->finaltup&&
20072009
unlikely(!_bt_oppodir_checkkeys(scan,dir,pstate->finaltup)))
20082010
{
2011+
/*
2012+
* Make sure that any SAOP arrays that were not marked required by
2013+
* preprocessing are reset to their first element for this direction
2014+
*/
20092015
_bt_rewind_nonrequired_arrays(scan,dir);
20102016
gotonew_prim_scan;
20112017
}
@@ -2032,11 +2038,21 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
20322038

20332039
if (so->scanBehind)
20342040
{
2035-
/* Optimization: skip by setting "look ahead" mechanism's offnum */
2041+
/*
2042+
* Remember if recheck needs to call _bt_oppodir_checkkeys for next
2043+
* page's finaltup (see above comments about "Handle inequalities
2044+
* marked required in the opposite scan direction" for why).
2045+
*/
2046+
so->oppositeDirCheck=has_required_opposite_direction_only;
2047+
2048+
_bt_rewind_nonrequired_arrays(scan,dir);
2049+
2050+
/*
2051+
* skip by setting "look ahead" mechanism's offnum for forwards scans
2052+
* (backwards scans check scanBehind flag directly instead)
2053+
*/
20362054
if (ScanDirectionIsForward(dir))
20372055
pstate->skip=pstate->maxoff+1;
2038-
else
2039-
pstate->skip=pstate->minoff-1;
20402056
}
20412057

20422058
/* Caller's tuple doesn't match the new qual */
@@ -2059,19 +2075,31 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
20592075
* This will in turn encourage _bt_readpage to apply the pstate.startikey
20602076
* optimization more often.
20612077
*
2062-
* Note: This heuristic isn't as aggressive as you might think. We're
2078+
* Also continue the ongoing primitive index scan when it is still on the
2079+
* first page if there have been more than NSKIPADVANCES_THRESHOLD calls
2080+
* here that each advanced at least one of the scan's skip arrays
2081+
* (deliberately ignore advancements that only affected SAOP arrays here).
2082+
* A page that cycles through this many skip array elements is quite
2083+
* likely to neighbor similar pages, that we'll also need to read.
2084+
*
2085+
* Note: These heuristics aren't as aggressive as you might think. We're
20632086
* conservative about allowing a primitive scan to step from the first
20642087
* leaf page it reads to the page's sibling page (we only allow it on
2065-
* first pages whose finaltup strongly suggests that it'll work out).
2088+
* first pages whose finaltup strongly suggests that it'll work out, as
2089+
* well as first pages that have a large number of skip array advances).
20662090
* Clearing this first page finaltup hurdle is a strong signal in itself.
2091+
*
2092+
* Note: The NSKIPADVANCES_THRESHOLD heuristic exists only to avoid
2093+
* pathological cases. Specifically, cases where a skip scan should just
2094+
* behave like a traditional full index scan, but ends up "skipping" again
2095+
* and again, descending to the prior leaf page's direct sibling leaf page
2096+
* each time. This misbehavior would otherwise be possible during scans
2097+
* that never quite manage to "clear the first page finaltup hurdle".
20672098
*/
2068-
if (!pstate->firstpage)
2099+
if (!pstate->firstpage||pstate->nskipadvances>NSKIPADVANCES_THRESHOLD)
20692100
{
20702101
/* Schedule a recheck once on the next (or previous) page */
20712102
so->scanBehind= true;
2072-
so->oppositeDirCheck=has_required_opposite_direction_only;
2073-
2074-
_bt_rewind_nonrequired_arrays(scan,dir);
20752103

20762104
/* Continue the current primitive scan after all */
20772105
gotocontinue_scan;
@@ -2441,7 +2469,9 @@ _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
24412469
* the first page (first for the current primitive scan) avoids wasting cycles
24422470
* during selective point queries. They typically don't stand to gain as much
24432471
* when we can set pstate.startikey, and are likely to notice the overhead of
2444-
* calling here.
2472+
* calling here. (Also, allowing pstate.forcenonrequired to be set on a
2473+
* primscan's first page would mislead _bt_advance_array_keys, which expects
2474+
* pstate.nskipadvances to be representative of every first page's key space.)
24452475
*
24462476
* Caller must reset startikey and forcenonrequired ahead of the _bt_checkkeys
24472477
* call for pstate.finaltup iff we set forcenonrequired=true. This will give

‎src/include/access/nbtree.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1118,10 +1118,11 @@ typedef struct BTReadPageState
11181118

11191119
/*
11201120
* Private _bt_checkkeys state used to manage "look ahead" optimization
1121-
* (only used during scans with array keys)
1121+
*and primscan scheduling(only used during scans with array keys)
11221122
*/
11231123
int16rechecks;
11241124
int16targetdistance;
1125+
int16nskipadvances;
11251126

11261127
}BTReadPageState;
11271128

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp