Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcbfa92c

Browse files
committed
Improve index-only scans to avoid repeated access to the index page.
We copy all the matched tuples off the page during _bt_readpage, instead ofexpensively re-locking the page during each subsequent tuple fetch. Thiscosts a bit more local storage, but not more than 2*BLCKSZ worth, and thereduction in LWLock traffic is certainly worth that. What's more, thislets us get rid of the API wart in the original patch that said an index AMcould randomly decline to supply an index tuple despite having assertedpg_am.amcanreturn. That will be important for future improvements in theindex-only-scan feature, since the executor will now be able to rely onhaving the index data available.
1 parent45401c1 commitcbfa92c

File tree

8 files changed

+136
-135
lines changed

8 files changed

+136
-135
lines changed

‎doc/src/sgml/indexam.sgml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -394,12 +394,13 @@ amgettuple (IndexScanDesc scan,
394394
If the access method supports index-only scans (i.e.,
395395
<structfield>amcanreturn</structfield> is TRUE in its <structname>pg_am</>
396396
row), then on success it must also check
397-
<literal>scan-&gt;xs_want_itup</>, and if that is true itshould return
397+
<literal>scan-&gt;xs_want_itup</>, and if that is true itmust return
398398
the original indexed data for the index entry, in the form of an
399-
<structname>IndexTuple</> stored at <literal>scan-&gt;xs_itup</>. However,
400-
it is permissible for the access method to sometimes fail to provide this
401-
data, in which case it must set <literal>scan-&gt;xs_itup</> to NULL. That
402-
will result in a regular heap fetch occurring.
399+
<structname>IndexTuple</> pointer stored at <literal>scan-&gt;xs_itup</>.
400+
(Management of the data referenced by the pointer is the access method's
401+
responsibility. The data must remain good at least until the next
402+
<function>amgettuple</>, <function>amrescan</>, or <function>amendscan</>
403+
call for the scan.)
403404
</para>
404405

405406
<para>

‎src/backend/access/index/indexam.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,9 +443,10 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
443443
Assert(TransactionIdIsValid(RecentGlobalXmin));
444444

445445
/*
446-
* The AM's gettuple proc finds the next index entry matching the scan
447-
* keys, and puts the TID in xs_ctup.t_self. It should also set
448-
* scan->xs_recheck, though we pay no attention to that here.
446+
* The AM's amgettuple proc finds the next index entry matching the scan
447+
* keys, and puts the TID into scan->xs_ctup.t_self. It should also set
448+
* scan->xs_recheck and possibly scan->xs_itup, though we pay no attention
449+
* to those fields here.
449450
*/
450451
found=DatumGetBool(FunctionCall2(procedure,
451452
PointerGetDatum(scan),

‎src/backend/access/nbtree/nbtree.c

Lines changed: 28 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
7373
BTCycleIdcycleid);
7474
staticvoidbtvacuumpage(BTVacState*vstate,BlockNumberblkno,
7575
BlockNumberorig_blkno);
76-
staticIndexTuplebt_getindextuple(IndexScanDescscan);
7776

7877

7978
/*
@@ -311,94 +310,9 @@ btgettuple(PG_FUNCTION_ARGS)
311310
else
312311
res=_bt_first(scan,dir);
313312

314-
/* Return the whole index tuple if requested */
315-
if (scan->xs_want_itup)
316-
{
317-
/* First, free the last one ... */
318-
if (scan->xs_itup!=NULL)
319-
{
320-
pfree(scan->xs_itup);
321-
scan->xs_itup=NULL;
322-
}
323-
324-
if (res)
325-
scan->xs_itup=bt_getindextuple(scan);
326-
}
327-
328313
PG_RETURN_BOOL(res);
329314
}
330315

331-
/*
332-
* bt_getindextuple - fetch index tuple at current position.
333-
*
334-
* This can fail to find the tuple if new tuples have been inserted on the
335-
* index page since we stepped onto the page. NULL is returned in that case.
336-
* (We could try a bit harder by searching for the TID; but if insertions
337-
* are happening, it's reasonably likely that an index-only scan will fail
338-
* anyway because of visibility. So probably not worth the trouble.)
339-
*
340-
* The tuple returned is a palloc'd copy, so that we don't need to keep a
341-
* lock on the index page.
342-
*
343-
* The caller must have pin on so->currPos.buf.
344-
*/
345-
staticIndexTuple
346-
bt_getindextuple(IndexScanDescscan)
347-
{
348-
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
349-
Pagepage;
350-
BTPageOpaqueopaque;
351-
OffsetNumberminoff;
352-
OffsetNumbermaxoff;
353-
intitemIndex;
354-
OffsetNumberoffnum;
355-
IndexTupleituple,
356-
result;
357-
358-
Assert(BufferIsValid(so->currPos.buf));
359-
360-
LockBuffer(so->currPos.buf,BT_READ);
361-
362-
/* Locate the tuple, being paranoid about possibility the page changed */
363-
page=BufferGetPage(so->currPos.buf);
364-
opaque= (BTPageOpaque)PageGetSpecialPointer(page);
365-
minoff=P_FIRSTDATAKEY(opaque);
366-
maxoff=PageGetMaxOffsetNumber(page);
367-
368-
itemIndex=so->currPos.itemIndex;
369-
/* pure paranoia */
370-
Assert(itemIndex >=so->currPos.firstItem&&
371-
itemIndex <=so->currPos.lastItem);
372-
373-
offnum=so->currPos.items[itemIndex].indexOffset;
374-
if (offnum<minoff||offnum>maxoff)
375-
{
376-
/* should never happen, since we have pin on page, but be careful */
377-
LockBuffer(so->currPos.buf,BUFFER_LOCK_UNLOCK);
378-
returnNULL;
379-
}
380-
381-
ituple= (IndexTuple)PageGetItem(page,PageGetItemId(page,offnum));
382-
383-
if (ItemPointerEquals(&ituple->t_tid,&scan->xs_ctup.t_self))
384-
{
385-
/* yup, it's the desired tuple, so make a copy */
386-
Sizeitupsz=IndexTupleSize(ituple);
387-
388-
result=palloc(itupsz);
389-
memcpy(result,ituple,itupsz);
390-
}
391-
else
392-
{
393-
/* oops, it got moved */
394-
result=NULL;
395-
}
396-
397-
LockBuffer(so->currPos.buf,BUFFER_LOCK_UNLOCK);
398-
399-
returnresult;
400-
}
401-
402316
/*
403317
* btgetbitmap() -- gets all matching tuples, and adds them to a bitmap
404318
*/
@@ -471,6 +385,15 @@ btbeginscan(PG_FUNCTION_ARGS)
471385
so->keyData=NULL;
472386
so->killedItems=NULL;/* until needed */
473387
so->numKilled=0;
388+
389+
/*
390+
* We don't know yet whether the scan will be index-only, so we do not
391+
* allocate the tuple workspace arrays until btrescan.
392+
*/
393+
so->currTuples=so->markTuples=NULL;
394+
so->currPos.nextTupleOffset=0;
395+
so->markPos.nextTupleOffset=0;
396+
474397
scan->opaque=so;
475398

476399
PG_RETURN_POINTER(scan);
@@ -505,6 +428,18 @@ btrescan(PG_FUNCTION_ARGS)
505428
}
506429
so->markItemIndex=-1;
507430

431+
/*
432+
* Allocate tuple workspace arrays, if needed for an index-only scan and
433+
* not already done in a previous rescan call. To save on palloc
434+
* overhead, both workspaces are allocated as one palloc block; only this
435+
* function and btendscan know that.
436+
*/
437+
if (scan->xs_want_itup&&so->currTuples==NULL)
438+
{
439+
so->currTuples= (char*)palloc(BLCKSZ*2);
440+
so->markTuples=so->currTuples+BLCKSZ;
441+
}
442+
508443
/*
509444
* Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
510445
* - vadim 05/05/97
@@ -544,18 +479,16 @@ btendscan(PG_FUNCTION_ARGS)
544479
}
545480
so->markItemIndex=-1;
546481

482+
/* Release storage */
547483
if (so->killedItems!=NULL)
548484
pfree(so->killedItems);
549485
if (so->keyData!=NULL)
550486
pfree(so->keyData);
487+
if (so->currTuples!=NULL)
488+
pfree(so->currTuples);
489+
/* so->markTuples should not be pfree'd, see btrescan */
551490
pfree(so);
552491

553-
if (scan->xs_itup!=NULL)
554-
{
555-
pfree(scan->xs_itup);
556-
scan->xs_itup=NULL;
557-
}
558-
559492
PG_RETURN_VOID();
560493
}
561494

@@ -626,6 +559,9 @@ btrestrpos(PG_FUNCTION_ARGS)
626559
memcpy(&so->currPos,&so->markPos,
627560
offsetof(BTScanPosData,items[1])+
628561
so->markPos.lastItem*sizeof(BTScanPosItem));
562+
if (so->currTuples)
563+
memcpy(so->currTuples,so->markTuples,
564+
so->markPos.nextTupleOffset);
629565
}
630566
}
631567

‎src/backend/access/nbtree/nbtsearch.c

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
staticbool_bt_readpage(IndexScanDescscan,ScanDirectiondir,
2828
OffsetNumberoffnum);
29+
staticvoid_bt_saveitem(BTScanOpaqueso,intitemIndex,
30+
OffsetNumberoffnum,IndexTupleitup);
2931
staticbool_bt_steppage(IndexScanDescscan,ScanDirectiondir);
3032
staticBuffer_bt_walk_left(Relationrel,Bufferbuf);
3133
staticbool_bt_endpoint(IndexScanDescscan,ScanDirectiondir);
@@ -429,8 +431,9 @@ _bt_compare(Relation rel,
429431
*if backwards scan, the last item) in the tree that satisfies the
430432
*qualifications in the scan key. On success exit, the page containing
431433
*the current index tuple is pinned but not locked, and data about
432-
*the matching tuple(s) on the page has been loaded into so->currPos,
433-
*and scan->xs_ctup.t_self is set to the heap TID of the current tuple.
434+
*the matching tuple(s) on the page has been loaded into so->currPos.
435+
*scan->xs_ctup.t_self is set to the heap TID of the current tuple,
436+
*and if requested, scan->xs_itup points to a copy of the index tuple.
434437
*
435438
* If there are no matching items in the index, we return FALSE, with no
436439
* pins or locks held.
@@ -456,6 +459,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
456459
intkeysCount=0;
457460
inti;
458461
StrategyNumberstrat_total;
462+
BTScanPosItem*currItem;
459463

460464
pgstat_count_index_scan(rel);
461465

@@ -912,7 +916,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
912916
LockBuffer(so->currPos.buf,BUFFER_LOCK_UNLOCK);
913917

914918
/* OK, itemIndex says what to return */
915-
scan->xs_ctup.t_self=so->currPos.items[so->currPos.itemIndex].heapTid;
919+
currItem=&so->currPos.items[so->currPos.itemIndex];
920+
scan->xs_ctup.t_self=currItem->heapTid;
921+
if (scan->xs_want_itup)
922+
scan->xs_itup= (IndexTuple) (so->currTuples+currItem->tupleOffset);
916923

917924
return true;
918925
}
@@ -925,7 +932,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
925932
*previously returned.
926933
*
927934
*On successful exit, scan->xs_ctup.t_self is set to the TID of the
928-
*next heap tuple, and so->currPos is updated as needed.
935+
*next heap tuple, and if requested, scan->xs_itup points to a copy of
936+
*the index tuple. so->currPos is updated as needed.
929937
*
930938
*On failure exit (no more tuples), we release pin and set
931939
*so->currPos.buf to InvalidBuffer.
@@ -934,6 +942,7 @@ bool
934942
_bt_next(IndexScanDescscan,ScanDirectiondir)
935943
{
936944
BTScanOpaqueso= (BTScanOpaque)scan->opaque;
945+
BTScanPosItem*currItem;
937946

938947
/*
939948
* Advance to next tuple on current page; or if there's no more, try to
@@ -967,7 +976,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
967976
}
968977

969978
/* OK, itemIndex says what to return */
970-
scan->xs_ctup.t_self=so->currPos.items[so->currPos.itemIndex].heapTid;
979+
currItem=&so->currPos.items[so->currPos.itemIndex];
980+
scan->xs_ctup.t_self=currItem->heapTid;
981+
if (scan->xs_want_itup)
982+
scan->xs_itup= (IndexTuple) (so->currTuples+currItem->tupleOffset);
971983

972984
return true;
973985
}
@@ -996,6 +1008,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
9961008
OffsetNumberminoff;
9971009
OffsetNumbermaxoff;
9981010
intitemIndex;
1011+
IndexTupleitup;
9991012
boolcontinuescan;
10001013

10011014
/* we must have the buffer pinned and locked */
@@ -1013,6 +1026,9 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
10131026
*/
10141027
so->currPos.nextPage=opaque->btpo_next;
10151028

1029+
/* initialize tuple workspace to empty */
1030+
so->currPos.nextTupleOffset=0;
1031+
10161032
if (ScanDirectionIsForward(dir))
10171033
{
10181034
/* load items[] in ascending order */
@@ -1022,12 +1038,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
10221038

10231039
while (offnum <=maxoff)
10241040
{
1025-
if (_bt_checkkeys(scan,page,offnum,dir,&continuescan))
1041+
itup=_bt_checkkeys(scan,page,offnum,dir,&continuescan);
1042+
if (itup!=NULL)
10261043
{
10271044
/* tuple passes all scan key conditions, so remember it */
1028-
/* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
1029-
so->currPos.items[itemIndex].heapTid=scan->xs_ctup.t_self;
1030-
so->currPos.items[itemIndex].indexOffset=offnum;
1045+
_bt_saveitem(so,itemIndex,offnum,itup);
10311046
itemIndex++;
10321047
}
10331048
if (!continuescan)
@@ -1054,13 +1069,12 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
10541069

10551070
while (offnum >=minoff)
10561071
{
1057-
if (_bt_checkkeys(scan,page,offnum,dir,&continuescan))
1072+
itup=_bt_checkkeys(scan,page,offnum,dir,&continuescan);
1073+
if (itup!=NULL)
10581074
{
10591075
/* tuple passes all scan key conditions, so remember it */
1060-
/* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
10611076
itemIndex--;
1062-
so->currPos.items[itemIndex].heapTid=scan->xs_ctup.t_self;
1063-
so->currPos.items[itemIndex].indexOffset=offnum;
1077+
_bt_saveitem(so,itemIndex,offnum,itup);
10641078
}
10651079
if (!continuescan)
10661080
{
@@ -1081,6 +1095,25 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
10811095
return (so->currPos.firstItem <=so->currPos.lastItem);
10821096
}
10831097

1098+
/* Save an index item into so->currPos.items[itemIndex] */
1099+
staticvoid
1100+
_bt_saveitem(BTScanOpaqueso,intitemIndex,
1101+
OffsetNumberoffnum,IndexTupleitup)
1102+
{
1103+
BTScanPosItem*currItem=&so->currPos.items[itemIndex];
1104+
1105+
currItem->heapTid=itup->t_tid;
1106+
currItem->indexOffset=offnum;
1107+
if (so->currTuples)
1108+
{
1109+
Sizeitupsz=IndexTupleSize(itup);
1110+
1111+
currItem->tupleOffset=so->currPos.nextTupleOffset;
1112+
memcpy(so->currTuples+so->currPos.nextTupleOffset,itup,itupsz);
1113+
so->currPos.nextTupleOffset+=MAXALIGN(itupsz);
1114+
}
1115+
}
1116+
10841117
/*
10851118
*_bt_steppage() -- Step to next page containing valid data for scan
10861119
*
@@ -1119,6 +1152,9 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
11191152
memcpy(&so->markPos,&so->currPos,
11201153
offsetof(BTScanPosData,items[1])+
11211154
so->currPos.lastItem*sizeof(BTScanPosItem));
1155+
if (so->markTuples)
1156+
memcpy(so->markTuples,so->currTuples,
1157+
so->currPos.nextTupleOffset);
11221158
so->markPos.itemIndex=so->markItemIndex;
11231159
so->markItemIndex=-1;
11241160
}
@@ -1428,6 +1464,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
14281464
Pagepage;
14291465
BTPageOpaqueopaque;
14301466
OffsetNumberstart;
1467+
BTScanPosItem*currItem;
14311468

14321469
/*
14331470
* Scan down to the leftmost or rightmost leaf page. This is a simplified
@@ -1505,7 +1542,10 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
15051542
LockBuffer(so->currPos.buf,BUFFER_LOCK_UNLOCK);
15061543

15071544
/* OK, itemIndex says what to return */
1508-
scan->xs_ctup.t_self=so->currPos.items[so->currPos.itemIndex].heapTid;
1545+
currItem=&so->currPos.items[so->currPos.itemIndex];
1546+
scan->xs_ctup.t_self=currItem->heapTid;
1547+
if (scan->xs_want_itup)
1548+
scan->xs_itup= (IndexTuple) (so->currTuples+currItem->tupleOffset);
15091549

15101550
return true;
15111551
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp