@@ -99,12 +99,13 @@ static void
9999scanPostingTree (Relation index ,GinScanEntry scanEntry ,
100100BlockNumber rootPostingTree )
101101{
102+ GinBtreeData btree ;
102103GinBtreeStack * stack ;
103104Buffer buffer ;
104105Page page ;
105106
106107/* Descend to the leftmost leaf page */
107- stack = ginScanBeginPostingTree (index ,rootPostingTree );
108+ stack = ginScanBeginPostingTree (& btree , index ,rootPostingTree );
108109buffer = stack -> buffer ;
109110IncrBufferRefCount (buffer );/* prevent unpin in freeGinBtreeStack */
110111
@@ -412,7 +413,8 @@ startScanEntry(GinState *ginstate, GinScanEntry entry)
412413LockBuffer (stackEntry -> buffer ,GIN_UNLOCK );
413414needUnlock = FALSE;
414415
415- stack = ginScanBeginPostingTree (ginstate -> index ,rootPostingTree );
416+ stack = ginScanBeginPostingTree (& entry -> btree ,ginstate -> index ,
417+ rootPostingTree );
416418entry -> buffer = stack -> buffer ;
417419
418420/*
@@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
506508{
507509Page page ;
508510int i ;
511+ bool stepright ;
512+
513+ if (!BufferIsValid (entry -> buffer ))
514+ {
515+ entry -> isFinished = true;
516+ return ;
517+ }
518+
519+ /*
520+ * We have two strategies for finding the correct page: step right from
521+ * the current page, or descend the tree again from the root. If
522+ * advancePast equals the current item, the next matching item should be
523+ * on the next page, so we step right. Otherwise, descend from root.
524+ */
525+ if (ginCompareItemPointers (& entry -> curItem ,& advancePast )== 0 )
526+ {
527+ stepright = true;
528+ LockBuffer (entry -> buffer ,GIN_SHARE );
529+ }
530+ else
531+ {
532+ GinBtreeStack * stack ;
533+
534+ ReleaseBuffer (entry -> buffer );
535+
536+ /*
537+ * Set the search key, and find the correct leaf page.
538+ */
539+ if (ItemPointerIsLossyPage (& advancePast ))
540+ {
541+ ItemPointerSet (& entry -> btree .itemptr ,
542+ GinItemPointerGetBlockNumber (& advancePast )+ 1 ,
543+ FirstOffsetNumber );
544+ }
545+ else
546+ {
547+ entry -> btree .itemptr = advancePast ;
548+ entry -> btree .itemptr .ip_posid ++ ;
549+ }
550+ entry -> btree .fullScan = false;
551+ stack = ginFindLeafPage (& entry -> btree , true);
552+
553+ /* we don't need the stack, just the buffer. */
554+ entry -> buffer = stack -> buffer ;
555+ IncrBufferRefCount (entry -> buffer );
556+ freeGinBtreeStack (stack );
557+ stepright = false;
558+ }
559+
560+ elog (DEBUG2 ,"entryLoadMoreItems, %u/%u, skip: %d" ,
561+ GinItemPointerGetBlockNumber (& advancePast ),
562+ GinItemPointerGetOffsetNumber (& advancePast ),
563+ !stepright );
509564
510- LockBuffer (entry -> buffer ,GIN_SHARE );
511565page = BufferGetPage (entry -> buffer );
512566for (;;)
513567{
@@ -519,30 +573,34 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
519573entry -> nlist = 0 ;
520574}
521575
522- /*
523- * We've processed all the entries on this page. If it was the last
524- * page in the tree, we're done.
525- */
526- if (GinPageRightMost (page ))
576+ if (stepright )
527577{
528- UnlockReleaseBuffer (entry -> buffer );
529- entry -> buffer = InvalidBuffer ;
530- entry -> isFinished = TRUE;
531- return ;
578+ /*
579+ * We've processed all the entries on this page. If it was the last
580+ * page in the tree, we're done.
581+ */
582+ if (GinPageRightMost (page ))
583+ {
584+ UnlockReleaseBuffer (entry -> buffer );
585+ entry -> buffer = InvalidBuffer ;
586+ entry -> isFinished = TRUE;
587+ return ;
588+ }
589+
590+ /*
591+ * Step to next page, following the right link. then find the first
592+ * ItemPointer greater than advancePast.
593+ */
594+ entry -> buffer = ginStepRight (entry -> buffer ,
595+ ginstate -> index ,
596+ GIN_SHARE );
597+ page = BufferGetPage (entry -> buffer );
532598}
599+ stepright = true;
533600
534601if (GinPageGetOpaque (page )-> flags & GIN_DELETED )
535602continue ;/* page was deleted by concurrent vacuum */
536603
537- /*
538- * Step to next page, following the right link. then find the first
539- * ItemPointer greater than advancePast.
540- */
541- entry -> buffer = ginStepRight (entry -> buffer ,
542- ginstate -> index ,
543- GIN_SHARE );
544- page = BufferGetPage (entry -> buffer );
545-
546604/*
547605 * The first item > advancePast might not be on this page, but
548606 * somewhere to the right, if the page was split, or a non-match from
@@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
566624{
567625if (ginCompareItemPointers (& advancePast ,& entry -> list [i ])< 0 )
568626{
569- LockBuffer (entry -> buffer ,GIN_UNLOCK );
570627entry -> offset = i ;
628+
629+ if (GinPageRightMost (page ))
630+ {
631+ /* after processing the copied items, we're done. */
632+ UnlockReleaseBuffer (entry -> buffer );
633+ entry -> buffer = InvalidBuffer ;
634+ }
635+ else
636+ LockBuffer (entry -> buffer ,GIN_UNLOCK );
571637return ;
572638}
573639}
@@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
677743}
678744else if (!BufferIsValid (entry -> buffer ))
679745{
680- /* A posting list from an entry tuple */
746+ /*
747+ * A posting list from an entry tuple, or the last page of a posting
748+ * tree.
749+ */
681750do
682751{
683752if (entry -> offset >=entry -> nlist )