Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit92f6b49

Browse files
committed
Fix handling of NULL distances in KNN-GiST
In order to implement NULL LAST semantic GiST previously assumed distance tothe NULL value to be Inf. However, our distance functions can return Inf andNaN for non-null values. In such cases, NULL LAST semantic appears to bebroken. This commit fixes that by introducing separate array of null flags fordistances.Backpatch to all supported versions.Discussion:https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.comAuthor: Alexander KorotkovBackpatch-through: 9.4
1 parent8f72400 commit92f6b49

File tree

3 files changed

+95
-33
lines changed

3 files changed

+95
-33
lines changed

‎src/backend/access/gist/gistget.c

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,9 @@ gistkillitems(IndexScanDesc scan)
110110
* Similarly, *recheck_distances_p is set to indicate whether the distances
111111
* need to be rechecked, and it is also ignored for non-leaf entries.
112112
*
113-
* If we are doing an ordered scan, so->distances[] is filled with distance
114-
* data from the distance() functions before returning success.
113+
* If we are doing an ordered scan, so->distancesValues[] and
114+
* so->distancesNulls[] is filled with distance data from the distance()
115+
* functions before returning success.
115116
*
116117
* We must decompress the key in the IndexTuple before passing it to the
117118
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -132,7 +133,8 @@ gistindex_keytest(IndexScanDesc scan,
132133
GISTSTATE*giststate=so->giststate;
133134
ScanKeykey=scan->keyData;
134135
intkeySize=scan->numberOfKeys;
135-
double*distance_p;
136+
double*distance_value_p;
137+
bool*distance_null_p;
136138
Relationr=scan->indexRelation;
137139

138140
*recheck_p= false;
@@ -150,7 +152,10 @@ gistindex_keytest(IndexScanDesc scan,
150152
if (GistPageIsLeaf(page))/* shouldn't happen */
151153
elog(ERROR,"invalid GiST tuple found on leaf page");
152154
for (i=0;i<scan->numberOfOrderBys;i++)
153-
so->distances[i]=-get_float8_infinity();
155+
{
156+
so->distanceValues[i]=-get_float8_infinity();
157+
so->distanceNulls[i]= false;
158+
}
154159
return true;
155160
}
156161

@@ -233,7 +238,8 @@ gistindex_keytest(IndexScanDesc scan,
233238

234239
/* OK, it passes --- now let's compute the distances */
235240
key=scan->orderByData;
236-
distance_p=so->distances;
241+
distance_value_p=so->distanceValues;
242+
distance_null_p=so->distanceNulls;
237243
keySize=scan->numberOfOrderBys;
238244
while (keySize>0)
239245
{
@@ -247,8 +253,9 @@ gistindex_keytest(IndexScanDesc scan,
247253

248254
if ((key->sk_flags&SK_ISNULL)||isNull)
249255
{
250-
/* Assume distance computes as null and sorts to the end */
251-
*distance_p=get_float8_infinity();
256+
/* Assume distance computes as null */
257+
*distance_value_p=0.0;
258+
*distance_null_p= true;
252259
}
253260
else
254261
{
@@ -285,11 +292,13 @@ gistindex_keytest(IndexScanDesc scan,
285292
ObjectIdGetDatum(key->sk_subtype),
286293
PointerGetDatum(&recheck));
287294
*recheck_distances_p |=recheck;
288-
*distance_p=DatumGetFloat8(dist);
295+
*distance_value_p=DatumGetFloat8(dist);
296+
*distance_null_p= false;
289297
}
290298

291299
key++;
292-
distance_p++;
300+
distance_value_p++;
301+
distance_null_p++;
293302
keySize--;
294303
}
295304

@@ -302,7 +311,8 @@ gistindex_keytest(IndexScanDesc scan,
302311
*
303312
* scan: index scan we are executing
304313
* pageItem: search queue item identifying an index page to scan
305-
* myDistances: distances array associated with pageItem, or NULL at the root
314+
* myDistanceValues: distances array associated with pageItem, or NULL at the root
315+
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
306316
* tbm: if not NULL, gistgetbitmap's output bitmap
307317
* ntids: if not NULL, gistgetbitmap's output tuple counter
308318
*
@@ -319,7 +329,8 @@ gistindex_keytest(IndexScanDesc scan,
319329
* sibling will be processed next.
320330
*/
321331
staticvoid
322-
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,double*myDistances,
332+
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,
333+
double*myDistanceValues,bool*myDistanceNulls,
323334
TIDBitmap*tbm,int64*ntids)
324335
{
325336
GISTScanOpaqueso= (GISTScanOpaque)scan->opaque;
@@ -356,7 +367,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
356367
GISTSearchItem*item;
357368

358369
/* This can't happen when starting at the root */
359-
Assert(myDistances!=NULL);
370+
Assert(myDistanceValues!=NULL&&myDistanceNulls!=NULL);
360371

361372
oldcxt=MemoryContextSwitchTo(so->queueCxt);
362373

@@ -366,8 +377,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
366377
item->data.parentlsn=pageItem->data.parentlsn;
367378

368379
/* Insert it into the queue using same distances as for this page */
369-
memcpy(item->distances,myDistances,
370-
sizeof(double)*scan->numberOfOrderBys);
380+
memcpy(GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
381+
myDistanceValues,sizeof(double)*scan->numberOfOrderBys);
382+
memcpy(GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
383+
myDistanceNulls,sizeof(bool)*scan->numberOfOrderBys);
371384

372385
pairingheap_add(so->queue,&item->phNode);
373386

@@ -462,6 +475,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
462475
* search.
463476
*/
464477
GISTSearchItem*item;
478+
intnOrderBys=scan->numberOfOrderBys;
465479

466480
oldcxt=MemoryContextSwitchTo(so->queueCxt);
467481

@@ -496,8 +510,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
496510
}
497511

498512
/* Insert it into the queue using new distance data */
499-
memcpy(item->distances,so->distances,
500-
sizeof(double)*scan->numberOfOrderBys);
513+
memcpy(GISTSearchItemDistanceValues(item,nOrderBys),
514+
so->distanceValues,sizeof(double)*nOrderBys);
515+
memcpy(GISTSearchItemDistanceNulls(item,nOrderBys),
516+
so->distanceNulls,sizeof(bool)*nOrderBys);
501517

502518
pairingheap_add(so->queue,&item->phNode);
503519

@@ -552,6 +568,8 @@ getNextNearest(IndexScanDesc scan)
552568
do
553569
{
554570
GISTSearchItem*item=getNextGISTSearchItem(so);
571+
float8*distanceValues=GISTSearchItemDistanceValues(item,scan->numberOfOrderBys);
572+
bool*distanceNulls=GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys);
555573

556574
if (!item)
557575
break;
@@ -571,8 +589,8 @@ getNextNearest(IndexScanDesc scan)
571589
if (!scan->xs_orderbynulls[i])
572590
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
573591
#endif
574-
scan->xs_orderbyvals[i]=Float8GetDatum(item->distances[i]);
575-
scan->xs_orderbynulls[i]=false;
592+
scan->xs_orderbyvals[i]=Float8GetDatum(distanceValues[i]);
593+
scan->xs_orderbynulls[i]=distanceNulls[i];
576594
}
577595
elseif (so->orderByTypes[i]==FLOAT4OID)
578596
{
@@ -582,8 +600,8 @@ getNextNearest(IndexScanDesc scan)
582600
if (!scan->xs_orderbynulls[i])
583601
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
584602
#endif
585-
scan->xs_orderbyvals[i]=Float4GetDatum((float4)item->distances[i]);
586-
scan->xs_orderbynulls[i]=false;
603+
scan->xs_orderbyvals[i]=Float4GetDatum(distanceValues[i]);
604+
scan->xs_orderbynulls[i]=distanceNulls[i];
587605
}
588606
else
589607
{
@@ -611,7 +629,10 @@ getNextNearest(IndexScanDesc scan)
611629
/* visit an index page, extract its items into queue */
612630
CHECK_FOR_INTERRUPTS();
613631

614-
gistScanPage(scan,item,item->distances,NULL,NULL);
632+
gistScanPage(scan,item,
633+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
634+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
635+
NULL,NULL);
615636
}
616637

617638
pfree(item);
@@ -649,7 +670,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
649670

650671
fakeItem.blkno=GIST_ROOT_BLKNO;
651672
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
652-
gistScanPage(scan,&fakeItem,NULL,NULL,NULL);
673+
gistScanPage(scan,&fakeItem,NULL,NULL,NULL,NULL);
653674
}
654675

655676
if (scan->numberOfOrderBys>0)
@@ -743,7 +764,10 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
743764
* this page, we fall out of the inner "do" and loop around to
744765
* return them.
745766
*/
746-
gistScanPage(scan,item,item->distances,NULL,NULL);
767+
gistScanPage(scan,item,
768+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
769+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
770+
NULL,NULL);
747771

748772
pfree(item);
749773
}while (so->nPageData==0);
@@ -774,7 +798,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
774798

775799
fakeItem.blkno=GIST_ROOT_BLKNO;
776800
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
777-
gistScanPage(scan,&fakeItem,NULL,tbm,&ntids);
801+
gistScanPage(scan,&fakeItem,NULL,NULL,tbm,&ntids);
778802

779803
/*
780804
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -789,7 +813,10 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
789813

790814
CHECK_FOR_INTERRUPTS();
791815

792-
gistScanPage(scan,item,item->distances,tbm,&ntids);
816+
gistScanPage(scan,item,
817+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
818+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
819+
tbm,&ntids);
793820

794821
pfree(item);
795822
}

‎src/backend/access/gist/gistscan.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,30 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
3333
constGISTSearchItem*sb= (constGISTSearchItem*)b;
3434
IndexScanDescscan= (IndexScanDesc)arg;
3535
inti;
36+
double*da=GISTSearchItemDistanceValues(sa,scan->numberOfOrderBys),
37+
*db=GISTSearchItemDistanceValues(sb,scan->numberOfOrderBys);
38+
bool*na=GISTSearchItemDistanceNulls(sa,scan->numberOfOrderBys),
39+
*nb=GISTSearchItemDistanceNulls(sb,scan->numberOfOrderBys);
3640

3741
/* Order according to distance comparison */
3842
for (i=0;i<scan->numberOfOrderBys;i++)
3943
{
40-
intcmp=-float8_cmp_internal(sa->distances[i],sb->distances[i]);
44+
if (na[i])
45+
{
46+
if (!nb[i])
47+
return-1;
48+
}
49+
elseif (nb[i])
50+
{
51+
return1;
52+
}
53+
else
54+
{
55+
intcmp=-float8_cmp_internal(da[i],db[i]);
4156

42-
if (cmp!=0)
43-
returncmp;
57+
if (cmp!=0)
58+
returncmp;
59+
}
4460
}
4561

4662
/* Heap items go before inner pages, to ensure a depth-first search */
@@ -84,7 +100,8 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
84100
so->queueCxt=giststate->scanCxt;/* see gistrescan */
85101

86102
/* workspaces with size dependent on numberOfOrderBys: */
87-
so->distances=palloc(sizeof(double)*scan->numberOfOrderBys);
103+
so->distanceValues=palloc(sizeof(double)*scan->numberOfOrderBys);
104+
so->distanceNulls=palloc(sizeof(bool)*scan->numberOfOrderBys);
88105
so->qual_ok= true;/* in case there are zero keys */
89106
if (scan->numberOfOrderBys>0)
90107
{

‎src/include/access/gist_private.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,30 @@ typedef struct GISTSearchItem
136136
/* we must store parentlsn to detect whether a split occurred */
137137
GISTSearchHeapItemheap;/* heap info, if heap tuple */
138138
}data;
139-
doubledistances[FLEXIBLE_ARRAY_MEMBER];/* numberOfOrderBys
140-
* entries */
139+
140+
/*
141+
* This data structure is followed by arrays of distance values and
142+
* distance null flags. Size of both arrays is
143+
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
144+
* arrays.
145+
*/
141146
}GISTSearchItem;
142147

143148
#defineGISTSearchItemIsHeap(item)((item).blkno == InvalidBlockNumber)
144149

145-
#defineSizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
150+
#defineSizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
151+
(sizeof(double) + sizeof(bool)) * (n_distances))
152+
153+
/*
154+
* We actually don't need n_distances compute pointer to distance values.
155+
* Nevertheless take n_distances as argument to have same arguments list for
156+
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
157+
*/
158+
#defineGISTSearchItemDistanceValues(item,n_distances) \
159+
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
160+
161+
#defineGISTSearchItemDistanceNulls(item,n_distances) \
162+
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
146163

147164
/*
148165
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -158,7 +175,8 @@ typedef struct GISTScanOpaqueData
158175
boolfirstCall;/* true until first gistgettuple call */
159176

160177
/* pre-allocated workspace arrays */
161-
double*distances;/* output area for gistindex_keytest */
178+
double*distanceValues;/* output area for gistindex_keytest */
179+
bool*distanceNulls;
162180

163181
/* info about killed items if any (killedItems is NULL if never used) */
164182
OffsetNumber*killedItems;/* offset numbers of killed items */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp