Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite6af7b3

Browse files
committed
Fix handling of NULL distances in KNN-GiST
In order to implement NULL LAST semantic GiST previously assumed distance tothe NULL value to be Inf. However, our distance functions can return Inf andNaN for non-null values. In such cases, NULL LAST semantic appears to bebroken. This commit fixes that by introducing separate array of null flags fordistances.Backpatch to all supported versions.Discussion:https://postgr.es/m/CAPpHfdsNvNdA0DBS%2BwMpFrgwT6C3-q50sFVGLSiuWnV3FqOJuQ%40mail.gmail.comAuthor: Alexander KorotkovBackpatch-through: 9.4
1 parentbc67f41 commite6af7b3

File tree

7 files changed

+106
-36
lines changed

7 files changed

+106
-36
lines changed

‎src/backend/access/gist/gistget.c

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,9 @@ gistkillitems(IndexScanDesc scan)
112112
* Similarly, *recheck_distances_p is set to indicate whether the distances
113113
* need to be rechecked, and it is also ignored for non-leaf entries.
114114
*
115-
* If we are doing an ordered scan, so->distances[] is filled with distance
116-
* data from the distance() functions before returning success.
115+
* If we are doing an ordered scan, so->distancesValues[] and
116+
* so->distancesNulls[] is filled with distance data from the distance()
117+
* functions before returning success.
117118
*
118119
* We must decompress the key in the IndexTuple before passing it to the
119120
* sk_funcs (which actually are the opclass Consistent or Distance methods).
@@ -134,7 +135,8 @@ gistindex_keytest(IndexScanDesc scan,
134135
GISTSTATE*giststate=so->giststate;
135136
ScanKeykey=scan->keyData;
136137
intkeySize=scan->numberOfKeys;
137-
double*distance_p;
138+
double*distance_value_p;
139+
bool*distance_null_p;
138140
Relationr=scan->indexRelation;
139141

140142
*recheck_p= false;
@@ -152,7 +154,10 @@ gistindex_keytest(IndexScanDesc scan,
152154
if (GistPageIsLeaf(page))/* shouldn't happen */
153155
elog(ERROR,"invalid GiST tuple found on leaf page");
154156
for (i=0;i<scan->numberOfOrderBys;i++)
155-
so->distances[i]=-get_float8_infinity();
157+
{
158+
so->distanceValues[i]=-get_float8_infinity();
159+
so->distanceNulls[i]= false;
160+
}
156161
return true;
157162
}
158163

@@ -235,7 +240,8 @@ gistindex_keytest(IndexScanDesc scan,
235240

236241
/* OK, it passes --- now let's compute the distances */
237242
key=scan->orderByData;
238-
distance_p=so->distances;
243+
distance_value_p=so->distanceValues;
244+
distance_null_p=so->distanceNulls;
239245
keySize=scan->numberOfOrderBys;
240246
while (keySize>0)
241247
{
@@ -249,8 +255,9 @@ gistindex_keytest(IndexScanDesc scan,
249255

250256
if ((key->sk_flags&SK_ISNULL)||isNull)
251257
{
252-
/* Assume distance computes as null and sorts to the end */
253-
*distance_p=get_float8_infinity();
258+
/* Assume distance computes as null */
259+
*distance_value_p=0.0;
260+
*distance_null_p= true;
254261
}
255262
else
256263
{
@@ -287,11 +294,13 @@ gistindex_keytest(IndexScanDesc scan,
287294
ObjectIdGetDatum(key->sk_subtype),
288295
PointerGetDatum(&recheck));
289296
*recheck_distances_p |=recheck;
290-
*distance_p=DatumGetFloat8(dist);
297+
*distance_value_p=DatumGetFloat8(dist);
298+
*distance_null_p= false;
291299
}
292300

293301
key++;
294-
distance_p++;
302+
distance_value_p++;
303+
distance_null_p++;
295304
keySize--;
296305
}
297306

@@ -304,7 +313,8 @@ gistindex_keytest(IndexScanDesc scan,
304313
*
305314
* scan: index scan we are executing
306315
* pageItem: search queue item identifying an index page to scan
307-
* myDistances: distances array associated with pageItem, or NULL at the root
316+
* myDistanceValues: distances array associated with pageItem, or NULL at the root
317+
* myDistanceNulls: null flags for myDistanceValues array, or NULL at the root
308318
* tbm: if not NULL, gistgetbitmap's output bitmap
309319
* ntids: if not NULL, gistgetbitmap's output tuple counter
310320
*
@@ -321,7 +331,8 @@ gistindex_keytest(IndexScanDesc scan,
321331
* sibling will be processed next.
322332
*/
323333
staticvoid
324-
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,double*myDistances,
334+
gistScanPage(IndexScanDescscan,GISTSearchItem*pageItem,
335+
double*myDistanceValues,bool*myDistanceNulls,
325336
TIDBitmap*tbm,int64*ntids)
326337
{
327338
GISTScanOpaqueso= (GISTScanOpaque)scan->opaque;
@@ -359,7 +370,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
359370
GISTSearchItem*item;
360371

361372
/* This can't happen when starting at the root */
362-
Assert(myDistances!=NULL);
373+
Assert(myDistanceValues!=NULL&&myDistanceNulls!=NULL);
363374

364375
oldcxt=MemoryContextSwitchTo(so->queueCxt);
365376

@@ -369,8 +380,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
369380
item->data.parentlsn=pageItem->data.parentlsn;
370381

371382
/* Insert it into the queue using same distances as for this page */
372-
memcpy(item->distances,myDistances,
373-
sizeof(double)*scan->numberOfOrderBys);
383+
memcpy(GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
384+
myDistanceValues,sizeof(double)*scan->numberOfOrderBys);
385+
memcpy(GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
386+
myDistanceNulls,sizeof(bool)*scan->numberOfOrderBys);
374387

375388
pairingheap_add(so->queue,&item->phNode);
376389

@@ -479,6 +492,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
479492
* search.
480493
*/
481494
GISTSearchItem*item;
495+
intnOrderBys=scan->numberOfOrderBys;
482496

483497
oldcxt=MemoryContextSwitchTo(so->queueCxt);
484498

@@ -513,8 +527,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
513527
}
514528

515529
/* Insert it into the queue using new distance data */
516-
memcpy(item->distances,so->distances,
517-
sizeof(double)*scan->numberOfOrderBys);
530+
memcpy(GISTSearchItemDistanceValues(item,nOrderBys),
531+
so->distanceValues,sizeof(double)*nOrderBys);
532+
memcpy(GISTSearchItemDistanceNulls(item,nOrderBys),
533+
so->distanceNulls,sizeof(bool)*nOrderBys);
518534

519535
pairingheap_add(so->queue,&item->phNode);
520536

@@ -579,7 +595,8 @@ getNextNearest(IndexScanDesc scan)
579595
scan->xs_recheck=item->data.heap.recheck;
580596

581597
index_store_float8_orderby_distances(scan,so->orderByTypes,
582-
item->distances,
598+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
599+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
583600
item->data.heap.recheckDistances);
584601

585602
/* in an index-only scan, also return the reconstructed tuple. */
@@ -592,7 +609,10 @@ getNextNearest(IndexScanDesc scan)
592609
/* visit an index page, extract its items into queue */
593610
CHECK_FOR_INTERRUPTS();
594611

595-
gistScanPage(scan,item,item->distances,NULL,NULL);
612+
gistScanPage(scan,item,
613+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
614+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
615+
NULL,NULL);
596616
}
597617

598618
pfree(item);
@@ -630,7 +650,7 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
630650

631651
fakeItem.blkno=GIST_ROOT_BLKNO;
632652
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
633-
gistScanPage(scan,&fakeItem,NULL,NULL,NULL);
653+
gistScanPage(scan,&fakeItem,NULL,NULL,NULL,NULL);
634654
}
635655

636656
if (scan->numberOfOrderBys>0)
@@ -724,7 +744,10 @@ gistgettuple(IndexScanDesc scan, ScanDirection dir)
724744
* this page, we fall out of the inner "do" and loop around to
725745
* return them.
726746
*/
727-
gistScanPage(scan,item,item->distances,NULL,NULL);
747+
gistScanPage(scan,item,
748+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
749+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
750+
NULL,NULL);
728751

729752
pfree(item);
730753
}while (so->nPageData==0);
@@ -755,7 +778,7 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
755778

756779
fakeItem.blkno=GIST_ROOT_BLKNO;
757780
memset(&fakeItem.data.parentlsn,0,sizeof(GistNSN));
758-
gistScanPage(scan,&fakeItem,NULL,tbm,&ntids);
781+
gistScanPage(scan,&fakeItem,NULL,NULL,tbm,&ntids);
759782

760783
/*
761784
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -770,7 +793,10 @@ gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
770793

771794
CHECK_FOR_INTERRUPTS();
772795

773-
gistScanPage(scan,item,item->distances,tbm,&ntids);
796+
gistScanPage(scan,item,
797+
GISTSearchItemDistanceValues(item,scan->numberOfOrderBys),
798+
GISTSearchItemDistanceNulls(item,scan->numberOfOrderBys),
799+
tbm,&ntids);
774800

775801
pfree(item);
776802
}

‎src/backend/access/gist/gistscan.c

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,30 @@ pairingheap_GISTSearchItem_cmp(const pairingheap_node *a, const pairingheap_node
3333
constGISTSearchItem*sb= (constGISTSearchItem*)b;
3434
IndexScanDescscan= (IndexScanDesc)arg;
3535
inti;
36+
double*da=GISTSearchItemDistanceValues(sa,scan->numberOfOrderBys),
37+
*db=GISTSearchItemDistanceValues(sb,scan->numberOfOrderBys);
38+
bool*na=GISTSearchItemDistanceNulls(sa,scan->numberOfOrderBys),
39+
*nb=GISTSearchItemDistanceNulls(sb,scan->numberOfOrderBys);
3640

3741
/* Order according to distance comparison */
3842
for (i=0;i<scan->numberOfOrderBys;i++)
3943
{
40-
intcmp=-float8_cmp_internal(sa->distances[i],sb->distances[i]);
44+
if (na[i])
45+
{
46+
if (!nb[i])
47+
return-1;
48+
}
49+
elseif (nb[i])
50+
{
51+
return1;
52+
}
53+
else
54+
{
55+
intcmp=-float8_cmp_internal(da[i],db[i]);
4156

42-
if (cmp!=0)
43-
returncmp;
57+
if (cmp!=0)
58+
returncmp;
59+
}
4460
}
4561

4662
/* Heap items go before inner pages, to ensure a depth-first search */
@@ -84,7 +100,8 @@ gistbeginscan(Relation r, int nkeys, int norderbys)
84100
so->queueCxt=giststate->scanCxt;/* see gistrescan */
85101

86102
/* workspaces with size dependent on numberOfOrderBys: */
87-
so->distances=palloc(sizeof(double)*scan->numberOfOrderBys);
103+
so->distanceValues=palloc(sizeof(double)*scan->numberOfOrderBys);
104+
so->distanceNulls=palloc(sizeof(bool)*scan->numberOfOrderBys);
88105
so->qual_ok= true;/* in case there are zero keys */
89106
if (scan->numberOfOrderBys>0)
90107
{

‎src/backend/access/index/indexam.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -847,13 +847,14 @@ index_getprocinfo(Relation irel,
847847
*/
848848
void
849849
index_store_float8_orderby_distances(IndexScanDescscan,Oid*orderByTypes,
850-
double*distances,boolrecheckOrderBy)
850+
double*distanceValues,
851+
bool*distanceNulls,boolrecheckOrderBy)
851852
{
852853
inti;
853854

854855
scan->xs_recheckorderby=recheckOrderBy;
855856

856-
if (!distances)
857+
if (!distanceValues)
857858
{
858859
Assert(!scan->xs_recheckorderby);
859860

@@ -868,14 +869,19 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
868869

869870
for (i=0;i<scan->numberOfOrderBys;i++)
870871
{
872+
if (distanceNulls&&distanceNulls[i])
873+
{
874+
scan->xs_orderbyvals[i]= (Datum)0;
875+
scan->xs_orderbynulls[i]= true;
876+
}
871877
if (orderByTypes[i]==FLOAT8OID)
872878
{
873879
#ifndefUSE_FLOAT8_BYVAL
874880
/* must free any old value to avoid memory leakage */
875881
if (!scan->xs_orderbynulls[i])
876882
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
877883
#endif
878-
scan->xs_orderbyvals[i]=Float8GetDatum(distances[i]);
884+
scan->xs_orderbyvals[i]=Float8GetDatum(distanceValues[i]);
879885
scan->xs_orderbynulls[i]= false;
880886
}
881887
elseif (orderByTypes[i]==FLOAT4OID)
@@ -886,7 +892,7 @@ index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes,
886892
if (!scan->xs_orderbynulls[i])
887893
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
888894
#endif
889-
scan->xs_orderbyvals[i]=Float4GetDatum((float4)distances[i]);
895+
scan->xs_orderbyvals[i]=Float4GetDatum((float4)distanceValues[i]);
890896
scan->xs_orderbynulls[i]= false;
891897
}
892898
else

‎src/backend/access/spgist/spgscan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ spggettuple(IndexScanDesc scan, ScanDirection dir)
934934
if (so->numberOfOrderBys>0)
935935
index_store_float8_orderby_distances(scan,so->orderByTypes,
936936
so->distances[so->iPtr],
937+
NULL,
937938
so->recheckDistances[so->iPtr]);
938939
so->iPtr++;
939940
return true;

‎src/include/access/genam.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,9 @@ extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
178178
externFmgrInfo*index_getprocinfo(Relationirel,AttrNumberattnum,
179179
uint16procnum);
180180
externvoidindex_store_float8_orderby_distances(IndexScanDescscan,
181-
Oid*orderByTypes,double*distances,
181+
Oid*orderByTypes,
182+
double*distanceValues,
183+
bool*distanceNulls,
182184
boolrecheckOrderBy);
183185

184186
/*

‎src/include/access/gist_private.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,30 @@ typedef struct GISTSearchItem
138138
/* we must store parentlsn to detect whether a split occurred */
139139
GISTSearchHeapItemheap;/* heap info, if heap tuple */
140140
}data;
141-
doubledistances[FLEXIBLE_ARRAY_MEMBER];/* numberOfOrderBys
142-
* entries */
141+
142+
/*
143+
* This data structure is followed by arrays of distance values and
144+
* distance null flags. Size of both arrays is
145+
* IndexScanDesc->numberOfOrderBys. See macros below for accessing those
146+
* arrays.
147+
*/
143148
}GISTSearchItem;
144149

145150
#defineGISTSearchItemIsHeap(item)((item).blkno == InvalidBlockNumber)
146151

147-
#defineSizeOfGISTSearchItem(n_distances) (offsetof(GISTSearchItem, distances) + sizeof(double) * (n_distances))
152+
#defineSizeOfGISTSearchItem(n_distances) (DOUBLEALIGN(sizeof(GISTSearchItem)) + \
153+
(sizeof(double) + sizeof(bool)) * (n_distances))
154+
155+
/*
156+
* We actually don't need n_distances compute pointer to distance values.
157+
* Nevertheless take n_distances as argument to have same arguments list for
158+
* GISTSearchItemDistanceValues() and GISTSearchItemDistanceNulls().
159+
*/
160+
#defineGISTSearchItemDistanceValues(item,n_distances) \
161+
((double *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem))))
162+
163+
#defineGISTSearchItemDistanceNulls(item,n_distances) \
164+
((bool *) ((Pointer) (item) + DOUBLEALIGN(sizeof(GISTSearchItem)) + sizeof(double) * (n_distances)))
148165

149166
/*
150167
* GISTScanOpaqueData: private state for a scan of a GiST index
@@ -160,7 +177,8 @@ typedef struct GISTScanOpaqueData
160177
boolfirstCall;/* true until first gistgettuple call */
161178

162179
/* pre-allocated workspace arrays */
163-
double*distances;/* output area for gistindex_keytest */
180+
double*distanceValues;/* output area for gistindex_keytest */
181+
bool*distanceNulls;
164182

165183
/* info about killed items if any (killedItems is NULL if never used) */
166184
OffsetNumber*killedItems;/* offset numbers of killed items */

‎src/test/regress/expected/create_index.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,8 @@ SELECT * FROM point_tbl ORDER BY f1 <-> '0,1';
531531
(-5,-12)
532532
(5.1,34.5)
533533
(1e+300,Infinity)
534-
535534
(NaN,NaN)
535+
536536
(10 rows)
537537

538538
EXPLAIN (COSTS OFF)

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp