88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
1010 * IDENTIFICATION
11- *$PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.24 2009/03/25 22:19 :01tgl Exp $
11+ *$PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.25 2009/04/05 11:32 :01teodor Exp $
1212 *-------------------------------------------------------------------------
1313 */
1414
@@ -618,6 +618,7 @@ entryGetItem(Relation index, GinScanEntry entry)
618618 * Sets key->curItem to new found heap item pointer for one scan key
619619 * Returns isFinished, ie TRUE means we did NOT get a new item pointer!
620620 * Also, *keyrecheck is set true if recheck is needed for this scan key.
621+ * Note: lossy page could be returned after items from the same page.
621622 */
622623static bool
623624keyGetItem (Relation index ,GinState * ginstate ,MemoryContext tempCtx ,
@@ -635,7 +636,10 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
635636{
636637/*
637638 * move forward from previously value and set new curItem, which is
638- * minimal from entries->curItems
639+ * minimal from entries->curItems. Lossy page is encoded by ItemPointer
640+ * with max value for offset (0xffff), so if there is an non-lossy entries
641+ * on lossy page they will returned too and after that the whole page.
642+ * That's not a problem for resulting tidbitmap.
639643 */
640644ItemPointerSetMax (& key -> curItem );
641645for (i = 0 ;i < key -> nentries ;i ++ )
@@ -646,7 +650,8 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
646650{
647651/*
648652 * Move forward only entries which was the least
649- * on previous call
653+ * on previous call, key->entryRes[i] points that
654+ * current entry was a result of loop/call.
650655 */
651656if (entry -> isFinished == FALSE&& entryGetItem (index ,entry )== FALSE)
652657{
@@ -671,11 +676,21 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
671676}
672677
673678/*
679+ * Now key->curItem contains closest ItemPointer to previous result.
680+ *
674681 * if key->nentries == 1 then the consistentFn should always succeed,
675682 * but we must call it anyway to find out the recheck status.
676683 */
677684
678- /* setting up array for consistentFn */
685+ /*----------
686+ * entryRes array is used for:
687+ * - as an argument for consistentFn
688+ * - entry->curItem with corresponding key->entryRes[i] == false are
689+ * greater than key->curItem, so next loop/call they should be
690+ * renewed by entryGetItem(). So, we need to set up an array before
691+ * checking of lossy page.
692+ *----------
693+ */
679694for (i = 0 ;i < key -> nentries ;i ++ )
680695{
681696entry = key -> scanEntry + i ;
@@ -719,7 +734,10 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
719734
720735/*
721736 * Get ItemPointer of next heap row to be checked from pending list.
722- * Returns false if there are no more.
737+ * Returns false if there are no more. On pages with several rows
738+ * it returns each row separately, on page with part of heap row returns
739+ * per page data. pos->firstOffset and pos->lastOffset points
740+ * fraction of tuples for current heap row.
723741 *
724742 * The pendingBuffer is presumed pinned and share-locked on entry, and is
725743 * pinned and share-locked on success exit. On failure exit it's released.
@@ -787,13 +805,26 @@ scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
787805 */
788806pos -> lastOffset = maxoff + 1 ;
789807}
808+
809+ /*
810+ * Now pos->firstOffset points to the first tuple of current heap row,
811+ * pos->lastOffset points to the first tuple of second heap row (or
812+ * to the end of page)
813+ */
814+
790815break ;
791816}
792817}
793818
794819return true;
795820}
796821
822+ /*
823+ * Scan page from current tuple (off) up to the first event:
824+ * - tuple's attribute number is not equal to entry's attrnum
825+ * - reach of last tuple
826+ * - match is found (then returns true)
827+ */
797828static bool
798829matchPartialInPendingList (GinState * ginstate ,Page page ,
799830OffsetNumber off ,OffsetNumber maxoff ,
@@ -817,6 +848,13 @@ matchPartialInPendingList(GinState *ginstate, Page page,
817848datumExtracted [off - 1 ]= true;
818849}
819850
851+ /*----------
852+ * Check of partial match.
853+ * case cmp == 0 => match
854+ * case cmp > 0 => not match and finish scan
855+ * case cmp < 0 => not match and continue scan
856+ *----------
857+ */
820858cmp = DatumGetInt32 (FunctionCall4 (& ginstate -> comparePartialFn [attrnum ],
821859value ,
822860datum [off - 1 ],
@@ -826,14 +864,16 @@ matchPartialInPendingList(GinState *ginstate, Page page,
826864return true;
827865else if (cmp > 0 )
828866return false;
867+
868+ off ++ ;
829869}
830870
831871return false;
832872}
833873
834874/*
835875 * Sets entryRes array for each key by looking at
836- * every entry per indexed value (row) in pending list.
876+ * every entry per indexed value (heap's row) in pending list.
837877 * returns true if at least one of datum was matched by key's entry
838878 *
839879 * The pendingBuffer is presumed pinned and share-locked on entry.
@@ -878,9 +918,15 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
878918StopMiddle ;
879919GinScanEntry entry = key -> scanEntry + j ;
880920
921+ /* already true - do not extra work */
881922if (key -> entryRes [j ] )
882923continue ;
883924
925+ /*
926+ * Interested tuples are from pos->firstOffset to pos->lastOffset
927+ * and they are ordered by (attnum, Datum) as it's done in entry tree
928+ * So we could use binary search to prevent linear scanning
929+ */
884930while (StopLow < StopHigh )
885931{
886932StopMiddle = StopLow + ((StopHigh - StopLow ) >>1 );
@@ -908,6 +954,11 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
908954
909955if (res == 0 )
910956{
957+ /*
958+ * The exact match causes, so we just scan from
959+ * current position to find a partial match.
960+ * See comment above about tuple's ordering.
961+ */
911962if (entry -> isPartialMatch )
912963key -> entryRes [j ]=
913964matchPartialInPendingList (& so -> ginstate ,
@@ -931,6 +982,12 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
931982}
932983
933984if (StopLow >=StopHigh && entry -> isPartialMatch )
985+ {
986+ /*
987+ * The exact match wasn't found, so we need to start
988+ * scan from first tuple greater then current entry
989+ * See comment above about tuple's ordering.
990+ */
934991key -> entryRes [j ]=
935992matchPartialInPendingList (& so -> ginstate ,
936993page ,StopHigh ,
@@ -941,6 +998,7 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
941998datumExtracted ,
942999entry -> strategy ,
9431000entry -> extra_data );
1001+ }
9441002
9451003hasMatch |=key -> entryRes [j ];
9461004}
@@ -960,6 +1018,11 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
9601018{
9611019ItemPointerData item = pos -> item ;
9621020
1021+ /*
1022+ * need to get next portion of tuples of row containing
1023+ * on several pages
1024+ */
1025+
9631026if (scanGetCandidate (scan ,pos )== false|| !ItemPointerEquals (& pos -> item ,& item ) )
9641027elog (ERROR ,"Could not process tuple" );/* XXX should not be here ! */
9651028}
@@ -1004,19 +1067,23 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
10041067UnlockReleaseBuffer (metabuffer );
10051068
10061069/*
1007- * loop for each heap row
1070+ * loop for each heap row. scanGetCandidate returns full row
1071+ * or row's tuples from first page.
10081072 */
10091073while (scanGetCandidate (scan ,& pos ) )
10101074{
10111075
10121076/*
1013- * Check entries in rows and setup entryRes array
1077+ * Check entries in tuple and setup entryRes array
1078+ * If tuples of heap's row are placed on several pages
1079+ * collectDatumForItem will read all of that pages.
10141080 */
10151081if (!collectDatumForItem (scan ,& pos ))
10161082continue ;
10171083
10181084/*
1019- * check for consistent
1085+ * Matching of entries of one row is finished,
1086+ * so check row by consistent function.
10201087 */
10211088oldCtx = MemoryContextSwitchTo (so -> tempCtx );
10221089recheck = false;