Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd807200

Browse files
committed
Fix handling of NULL distances in KNN-GiST
In order to implement NULL LAST semantic GiST previously assumed distance tothe NULL value to be Inf. However, our distance functions can return Inf andNaN for non-null values. In such cases, NULL LAST semantic appears to bebroken. This commit fixes that by introducing separate array of null flags fordistances.Backpatch to all supported versions.Discussion:https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.comAuthor: Alexander KorotkovBackpatch-through: 9.4
1 parent749b04d commitd807200

File tree

3 files changed

+95
-33
lines changed

3 files changed

+95
-33
lines changed

‎src/backend/access/gist/gistget.c

Lines changed: 52 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,9 @@ gistkillitems(IndexScanDesc scan)
112112
* Similarly, *recheck_distances_p is set to indicate whether the distances
113113
* need to be rechecked, and it is also ignored for non-leaf entries.
114114
*
115-
* If we are doing an ordered scan, so->distances[] is filled with distance
116-
* data from the distance() functions before returning success.
115+
* If we are doing an ordered scan, so->distancesValues[] and
116+
* so->distancesNulls[] is filled with distance data from the distance()
117+
* functions before returning success.
117118
*
118119
* We must decompress the key in the IndexTuple before passing it to the
119120
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -134,7 +135,8 @@ gistindex_keytest(IndexScanDesc scan,
134135
GISTSTATE*giststate=so->giststate;
135136
ScanKeykey=scan->keyData;
136137
intkeySize=scan->numberOfKeys;
137-
double*distance_p;
138+
double*distance_value_p;
139+
bool*distance_null_p;
138140
Relationr=scan->indexRelation;
139141

140142
*recheck_p= false;
@@ -152,7 +154,10 @@ gistindex_keytest(IndexScanDesc scan,
152154
if (GistPageIsLeaf(page))/* shouldn't happen */
153155
elog(ERROR,"invalid GiST tuple found on leaf page");
154156
for (i=0;i<scan->numberOfOrderBys;i++)
155-
so->distances[i]=-get_float8_infinity();
157+
{
158+
so->distanceValues[i]=-get_float8_infinity();
159+
so->distanceNulls[i]= false;
160+
}
156161
return true;
157162
}
158163

@@ -235,7 +240,8 @@ gistindex_keytest(IndexScanDesc scan,
235240

236241
/* OK, it passes --- now let's compute the distances */
237242
key=scan->orderByData;
238-
distance_p=so->distances;
243+
distance_value_p=so->distanceValues;
244+
distance_null_p=so->distanceNulls;
239245
keySize=scan->numberOfOrderBys;
240246
while (keySize>0)
241247
{
@@ -249,8 +255,9 @@ gistindex_keytest(IndexScanDesc scan,
249255

250256
if ((key->sk_flags&SK_ISNULL)||isNull)
251257
{
252-
/* Assume distance computes as null and sorts to the end */
253-
*distance_p=get_float8_infinity();
258+
/* Assume distance computes as null */
259+
*distance_value_p=0.0;
260+
*distance_null_p= true;
254261
}
255262
else
256263
{
@@ -287,11 +294,13 @@ gistindex_keytest(IndexScanDesc scan,
287294
ObjectIdGetDatum(key->sk_subtype),
288295
PointerGetDatum(&recheck));
289296
*recheck_distances_p |=recheck;
290-
*distance_p=DatumGetFloat8(dist);
297+
*distance_value_p=DatumGetFloat8(dist);
298+
*distance_null_p= false;
291299
}
292300

293301
key++;
294-
distance_p++;
302+
distance_value_p++;
303+
distance_null_p++;
295304
keySize--;
296305
}
297306

@@ -304,7 +313,8 @@ gistindex_keytest(IndexScanDesc scan,
304313
*
305314
* scan: index scan we are executing
306315
* pageItem: search queue item identifying an index page to scan
307-
* myDistances: distances array associated with pageItem, or NULL at the root
316+
* myDistanceValues: distances array associated with pageItem, or NULL at the root
317+
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
308318
* tbm: if not NULL, gistgetbitmap's output bitmap
309319
* ntids: if not NULL, gistgetbitmap's output tuple counter
310320
*
@@ -321,7 +331,8 @@ gistindex_keytest(IndexScanDesc scan,
321331
* sibling will be processed next.
322332
*/
323333
staticvoid
324-
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,double*myDistances,
334+
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,
335+
double*myDistanceValues,bool*myDistanceNulls,
325336
TIDBitmap*tbm,int64*ntids)
326337
{
327338
GISTScanOpaqueso= (GISTScanOpaque)scan->opaque;
@@ -359,7 +370,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
359370
GISTSearchItem*item;
360371

361372
/* This can't happen when starting at the root */
362-
Assert(myDistances!=NULL);
373+
Assert(myDistanceValues!=NULL&&myDistanceNulls!=NULL);
363374

364375
oldcxt=MemoryContextSwitchTo(so->queueCxt);
365376

@@ -369,8 +380,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
369380
item->data.parentlsn=pageItem->data.parentlsn;
370381

371382
/* Insert it into the queue using same distances as for this page */
372-
memcpy(item->distances,myDistances,
373-
sizeof(double)*scan->numberOfOrderBys);
383+
memcpy(GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
384+
myDistanceValues,sizeof(double)*scan->numberOfOrderBys);
385+
memcpy(GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
386+
myDistanceNulls,sizeof(bool)*scan->numberOfOrderBys);
374387

375388
pairingheap_add(so->queue,&item->phNode);
376389

@@ -465,6 +478,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
465478
* search.
466479
*/
467480
GISTSearchItem*item;
481+
intnOrderBys=scan->numberOfOrderBys;
468482

469483
oldcxt=MemoryContextSwitchTo(so->queueCxt);
470484

@@ -499,8 +513,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
499513
}
500514

501515
/* Insert it into the queue using new distance data */
502-
memcpy(item->distances,so->distances,
503-
sizeof(double)*scan->numberOfOrderBys);
516+
memcpy(GISTSearchItemDistanceValues(item,nOrderBys),
517+
so->distanceValues,sizeof(double)*nOrderBys);
518+
memcpy(GISTSearchItemDistanceNulls(item,nOrderBys),
519+
so->distanceNulls,sizeof(bool)*nOrderBys);
504520

505521
pairingheap_add(so->queue,&item->phNode);
506522

@@ -555,6 +571,8 @@ getNextNearest(IndexScanDesc scan)
555571
do
556572
{
557573
GISTSearchItem*item=getNextGISTSearchItem(so);
574+
float8*distanceValues=GISTSearchItemDistanceValues(item,scan->numberOfOrderBys);
575+
bool*distanceNulls=GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys);
558576

559577
if (!item)
560578
break;
@@ -574,8 +592,8 @@ getNextNearest(IndexScanDesc scan)
574592
if (!scan->xs_orderbynulls[i])
575593
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
576594
#endif
577-
scan->xs_orderbyvals[i]=Float8GetDatum(item->distances[i]);
578-
scan->xs_orderbynulls[i]=false;
595+
scan->xs_orderbyvals[i]=Float8GetDatum(distanceValues[i]);
596+
scan->xs_orderbynulls[i]=distanceNulls[i];
579597
}
580598
elseif (so->orderByTypes[i]==FLOAT4OID)
581599
{
@@ -585,8 +603,8 @@ getNextNearest(IndexScanDesc scan)
585603
if (!scan->xs_orderbynulls[i])
586604
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
587605
#endif
588-
scan->xs_orderbyvals[i]=Float4GetDatum((float4)item->distances[i]);
589-
scan->xs_orderbynulls[i]=false;
606+
scan->xs_orderbyvals[i]=Float4GetDatum(distanceValues[i]);
607+
scan->xs_orderbynulls[i]=distanceNulls[i];
590608
}
591609
else
592610
{
@@ -614,7 +632,10 @@ getNextNearest(IndexScanDesc scan)
614632
/* visit an index page, extract its items into queue */
615633
CHECK_FOR_INTERRUPTS();
616634

617-
gistScanPage(scan,item,item->distances,NULL,NULL);
635+
gistScanPage(scan,item,
636+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
637+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
638+
NULL,NULL);
618639
}
619640

620641
pfree(item);
@@ -652,7 +673,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
652673

653674
fakeItem.blkno=GIST_ROOT_BLKNO;
654675
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
655-
gistScanPage(scan,&fakeItem,NULL,NULL,NULL);
676+
gistScanPage(scan,&fakeItem,NULL,NULL,NULL,NULL);
656677
}
657678

658679
if (scan->numberOfOrderBys>0)
@@ -746,7 +767,10 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
746767
* this page, we fall out of the inner "do" and loop around to
747768
* return them.
748769
*/
749-
gistScanPage(scan,item,item->distances,NULL,NULL);
770+
gistScanPage(scan,item,
771+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
772+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
773+
NULL,NULL);
750774

751775
pfree(item);
752776
}while (so->nPageData==0);
@@ -777,7 +801,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
777801

778802
fakeItem.blkno=GIST_ROOT_BLKNO;
779803
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
780-
gistScanPage(scan,&fakeItem,NULL,tbm,&ntids);
804+
gistScanPage(scan,&fakeItem,NULL,NULL,tbm,&ntids);
781805

782806
/*
783807
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -792,7 +816,10 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
792816

793817
CHECK_FOR_INTERRUPTS();
794818

795-
gistScanPage(scan,item,item->distances,tbm,&ntids);
819+
gistScanPage(scan,item,
820+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
821+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
822+
tbm,&ntids);
796823

797824
pfree(item);
798825
}

‎src/backend/access/gist/gistscan.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,30 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
3333
constGISTSearchItem*sb= (constGISTSearchItem*)b;
3434
IndexScanDescscan= (IndexScanDesc)arg;
3535
inti;
36+
double*da=GISTSearchItemDistanceValues(sa,scan->numberOfOrderBys),
37+
*db=GISTSearchItemDistanceValues(sb,scan->numberOfOrderBys);
38+
bool*na=GISTSearchItemDistanceNulls(sa,scan->numberOfOrderBys),
39+
*nb=GISTSearchItemDistanceNulls(sb,scan->numberOfOrderBys);
3640

3741
/* Order according to distance comparison */
3842
for (i=0;i<scan->numberOfOrderBys;i++)
3943
{
40-
intcmp=-float8_cmp_internal(sa->distances[i],sb->distances[i]);
44+
if (na[i])
45+
{
46+
if (!nb[i])
47+
return-1;
48+
}
49+
elseif (nb[i])
50+
{
51+
return1;
52+
}
53+
else
54+
{
55+
intcmp=-float8_cmp_internal(da[i],db[i]);
4156

42-
if (cmp!=0)
43-
returncmp;
57+
if (cmp!=0)
58+
returncmp;
59+
}
4460
}
4561

4662
/* Heap items go before inner pages, to ensure a depth-first search */
@@ -84,7 +100,8 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
84100
so->queueCxt=giststate->scanCxt;/* see gistrescan */
85101

86102
/* workspaces with size dependent on numberOfOrderBys: */
87-
so->distances=palloc(sizeof(double)*scan->numberOfOrderBys);
103+
so->distanceValues=palloc(sizeof(double)*scan->numberOfOrderBys);
104+
so->distanceNulls=palloc(sizeof(bool)*scan->numberOfOrderBys);
88105
so->qual_ok= true;/* in case there are zero keys */
89106
if (scan->numberOfOrderBys>0)
90107
{

‎src/include/access/gist_private.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,13 +136,30 @@ typedef struct GISTSearchItem
136136
/* we must store parentlsn to detect whether a split occurred */
137137
GISTSearchHeapItemheap;/* heap info, if heap tuple */
138138
}data;
139-
doubledistances[FLEXIBLE_ARRAY_MEMBER];/* numberOfOrderBys
140-
* entries */
139+
140+
/*
141+
* This data structure is followed by arrays of distance values and
142+
* distance null flags. Size of both arrays is
143+
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
144+
* arrays.
145+
*/
141146
}GISTSearchItem;
142147

143148
#defineGISTSearchItemIsHeap(item)((item).blkno == InvalidBlockNumber)
144149

145-
#defineSizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
150+
#defineSizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
151+
(sizeof(double) + sizeof(bool)) * (n_distances))
152+
153+
/*
154+
* We actually don't need n_distances compute pointer to distance values.
155+
* Nevertheless take n_distances as argument to have same arguments list for
156+
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
157+
*/
158+
#defineGISTSearchItemDistanceValues(item,n_distances) \
159+
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
160+
161+
#defineGISTSearchItemDistanceNulls(item,n_distances) \
162+
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
146163

147164
/*
148165
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -158,7 +175,8 @@ typedef struct GISTScanOpaqueData
158175
boolfirstCall;/* true until first gistgettuple call */
159176

160177
/* pre-allocated workspace arrays */
161-
double*distances;/* output area for gistindex_keytest */
178+
double*distanceValues;/* output area for gistindex_keytest */
179+
bool*distanceNulls;
162180

163181
/* info about killed items if any (killedItems is NULL if never used) */
164182
OffsetNumber*killedItems;/* offset numbers of killed items */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp