Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2a63683

Browse files
committed
Add support for nearest-neighbor (KNN) searches to SP-GiST
Currently, KNN searches were supported only by GiST. SP-GiST also capable tosupport them. This commit implements that support. SP-GiST scan stack isreplaced with queue, which serves as stack if no ordering is specified. KNNsupport is provided for three SP-GIST opclasses: quad_point_ops, kd_point_opsand poly_ops (catversion is bumped). Some common parts between GiST and SP-GiSTKNNs are extracted into separate functions.Discussion:https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ruAuthor: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad SterzhanovReview: Andrey Borodin, Alexander Korotkov
1 parentd0cfc3d commit2a63683

29 files changed

+1669
-416
lines changed

‎doc/src/sgml/indices.sgml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,13 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10;
281281
For more information see <xref linkend="spgist"/>.
282282
</para>
283283

284+
<para>
285+
Like GiST, SP-GiST supports <quote>nearest-neighbor</quote> searches.
286+
For SP-GiST operator classes that support distance ordering, the
287+
corresponding operator is specified in the <quote>Ordering Operators</quote>
288+
column in <xref linkend="spgist-builtin-opclasses-table"/>.
289+
</para>
290+
284291
<para>
285292
<indexterm>
286293
<primary>index</primary>

‎doc/src/sgml/spgist.sgml

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,13 @@
6464

6565
<table id="spgist-builtin-opclasses-table">
6666
<title>Built-in <acronym>SP-GiST</acronym> Operator Classes</title>
67-
<tgroup cols="3">
67+
<tgroup cols="4">
6868
<thead>
6969
<row>
7070
<entry>Name</entry>
7171
<entry>Indexed Data Type</entry>
7272
<entry>Indexable Operators</entry>
73+
<entry>Ordering Operators</entry>
7374
</row>
7475
</thead>
7576
<tbody>
@@ -84,6 +85,9 @@
8485
<literal>&gt;^</literal>
8586
<literal>~=</literal>
8687
</entry>
88+
<entry>
89+
<literal>&lt;-&gt;</literal>
90+
</entry>
8791
</row>
8892
<row>
8993
<entry><literal>quad_point_ops</literal></entry>
@@ -96,6 +100,9 @@
96100
<literal>&gt;^</literal>
97101
<literal>~=</literal>
98102
</entry>
103+
<entry>
104+
<literal>&lt;-&gt;</literal>
105+
</entry>
99106
</row>
100107
<row>
101108
<entry><literal>range_ops</literal></entry>
@@ -111,6 +118,8 @@
111118
<literal>&gt;&gt;</literal>
112119
<literal>@&gt;</literal>
113120
</entry>
121+
<entry>
122+
</entry>
114123
</row>
115124
<row>
116125
<entry><literal>box_ops</literal></entry>
@@ -129,6 +138,8 @@
129138
<literal>|&gt;&gt;</literal>
130139
<literal>|&amp;&gt;</literal>
131140
</entry>
141+
<entry>
142+
</entry>
132143
</row>
133144
<row>
134145
<entry><literal>poly_ops</literal></entry>
@@ -147,6 +158,9 @@
147158
<literal>|&gt;&gt;</literal>
148159
<literal>|&amp;&gt;</literal>
149160
</entry>
161+
<entry>
162+
<literal>&lt;-&gt;</literal>
163+
</entry>
150164
</row>
151165
<row>
152166
<entry><literal>text_ops</literal></entry>
@@ -163,6 +177,8 @@
163177
<literal>~&gt;~</literal>
164178
<literal>^@</literal>
165179
</entry>
180+
<entry>
181+
</entry>
166182
</row>
167183
<row>
168184
<entry><literal>inet_ops</literal></entry>
@@ -180,6 +196,8 @@
180196
<literal>&lt;=</literal>
181197
<literal>=</literal>
182198
</entry>
199+
<entry>
200+
</entry>
183201
</row>
184202
</tbody>
185203
</tgroup>
@@ -191,6 +209,12 @@
191209
supports the same operators but uses a different index data structure which
192210
may offer better performance in some applications.
193211
</para>
212+
<para>
213+
The <literal>quad_point_ops</literal>, <literal>kd_point_ops</literal> and
214+
<literal>poly_ops</literal> operator classes support the <literal>&lt;-&gt;</literal>
215+
ordering operator, which enables the k-nearest neighbor (<literal>k-NN</literal>)
216+
search over indexed point or polygon datasets.
217+
</para>
194218

195219
</sect1>
196220

@@ -630,7 +654,10 @@ CREATE FUNCTION my_inner_consistent(internal, internal) RETURNS void ...
630654
typedef struct spgInnerConsistentIn
631655
{
632656
ScanKey scankeys; /* array of operators and comparison values */
633-
int nkeys; /* length of array */
657+
ScanKey orderbys; /* array of ordering operators and comparison
658+
* values */
659+
int nkeys; /* length of scankeys array */
660+
int norderbys; /* length of orderbys array */
634661

635662
Datum reconstructedValue; /* value reconstructed at parent */
636663
void *traversalValue; /* opclass-specific traverse value */
@@ -653,6 +680,7 @@ typedef struct spgInnerConsistentOut
653680
int *levelAdds; /* increment level by this much for each */
654681
Datum *reconstructedValues; /* associated reconstructed values */
655682
void **traversalValues; /* opclass-specific traverse values */
683+
double **distances; /* associated distances */
656684
} spgInnerConsistentOut;
657685
</programlisting>
658686

@@ -667,6 +695,8 @@ typedef struct spgInnerConsistentOut
667695
In particular it is not necessary to check <structfield>sk_flags</structfield> to
668696
see if the comparison value is NULL, because the SP-GiST core code
669697
will filter out such conditions.
698+
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
699+
describes ordering operators (if any) in the same manner.
670700
<structfield>reconstructedValue</structfield> is the value reconstructed for the
671701
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
672702
<function>inner_consistent</function> function did not provide a value at the
@@ -709,6 +739,10 @@ typedef struct spgInnerConsistentOut
709739
of <structname>spgConfigOut</structname>.<structfield>leafType</structfield> type
710740
reconstructed for each child node to be visited; otherwise, leave
711741
<structfield>reconstructedValues</structfield> as NULL.
742+
If ordered search is performed, set <structfield>distances</structfield>
743+
to an array of distance values according to <structfield>orderbys</structfield>
744+
array (nodes with lowest distances will be processed first). Leave it
745+
NULL otherwise.
712746
If it is desired to pass down additional out-of-band information
713747
(<quote>traverse values</quote>) to lower levels of the tree search,
714748
set <structfield>traversalValues</structfield> to an array of the appropriate
@@ -717,6 +751,7 @@ typedef struct spgInnerConsistentOut
717751
Note that the <function>inner_consistent</function> function is
718752
responsible for palloc'ing the
719753
<structfield>nodeNumbers</structfield>, <structfield>levelAdds</structfield>,
754+
<structfield>distances</structfield>,
720755
<structfield>reconstructedValues</structfield>, and
721756
<structfield>traversalValues</structfield> arrays in the current memory context.
722757
However, any output traverse values pointed to by
@@ -747,7 +782,10 @@ CREATE FUNCTION my_leaf_consistent(internal, internal) RETURNS bool ...
747782
typedef struct spgLeafConsistentIn
748783
{
749784
ScanKey scankeys; /* array of operators and comparison values */
750-
int nkeys; /* length of array */
785+
ScanKey orderbys; /* array of ordering operators and comparison
786+
* values */
787+
int nkeys; /* length of scankeys array */
788+
int norderbys; /* length of orderbys array */
751789

752790
Datum reconstructedValue; /* value reconstructed at parent */
753791
void *traversalValue; /* opclass-specific traverse value */
@@ -759,8 +797,10 @@ typedef struct spgLeafConsistentIn
759797

760798
typedef struct spgLeafConsistentOut
761799
{
762-
Datum leafValue; /* reconstructed original data, if any */
763-
bool recheck; /* set true if operator must be rechecked */
800+
Datum leafValue; /* reconstructed original data, if any */
801+
bool recheck; /* set true if operator must be rechecked */
802+
bool recheckDistances; /* set true if distances must be rechecked */
803+
double *distances; /* associated distances */
764804
} spgLeafConsistentOut;
765805
</programlisting>
766806

@@ -775,6 +815,8 @@ typedef struct spgLeafConsistentOut
775815
In particular it is not necessary to check <structfield>sk_flags</structfield> to
776816
see if the comparison value is NULL, because the SP-GiST core code
777817
will filter out such conditions.
818+
The array <structfield>orderbys</structfield>, of length <structfield>norderbys</structfield>,
819+
describes the ordering operators in the same manner.
778820
<structfield>reconstructedValue</structfield> is the value reconstructed for the
779821
parent tuple; it is <literal>(Datum) 0</literal> at the root level or if the
780822
<function>inner_consistent</function> function did not provide a value at the
@@ -803,6 +845,12 @@ typedef struct spgLeafConsistentOut
803845
<structfield>recheck</structfield> may be set to <literal>true</literal> if the match
804846
is uncertain and so the operator(s) must be re-applied to the actual
805847
heap tuple to verify the match.
848+
If ordered search is performed, set <structfield>distances</structfield>
849+
to an array of distance values according to <structfield>orderbys</structfield>
850+
array. Leave it NULL otherwise. If at least one of returned distances
851+
is not exact, set <structfield>recheckDistances</structfield> to true.
852+
In this case, the executor will calculate the exact distances after
853+
fetching the tuple from the heap, and will reorder the tuples if needed.
806854
</para>
807855
</listitem>
808856
</varlistentry>

‎doc/src/sgml/xindex.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,7 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING)
12421242
<title>Ordering Operators</title>
12431243

12441244
<para>
1245-
Some index access methods (currently, only GiST) support the concept of
1245+
Some index access methods (currently, only GiST and SP-GiST) support the concept of
12461246
<firstterm>ordering operators</firstterm>. What we have been discussing so far
12471247
are <firstterm>search operators</firstterm>. A search operator is one for which
12481248
the index can be searched to find all rows satisfying

‎src/backend/access/gist/gistget.c

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@
1414
*/
1515
#include"postgres.h"
1616

17+
#include"access/genam.h"
1718
#include"access/gist_private.h"
1819
#include"access/relscan.h"
19-
#include"catalog/pg_type.h"
2020
#include"miscadmin.h"
2121
#include"storage/lmgr.h"
2222
#include"storage/predicate.h"
@@ -543,7 +543,6 @@ getNextNearest(IndexScanDesc scan)
543543
{
544544
GISTScanOpaqueso= (GISTScanOpaque)scan->opaque;
545545
boolres= false;
546-
inti;
547546

548547
if (scan->xs_hitup)
549548
{
@@ -564,45 +563,10 @@ getNextNearest(IndexScanDesc scan)
564563
/* found a heap item at currently minimal distance */
565564
scan->xs_ctup.t_self=item->data.heap.heapPtr;
566565
scan->xs_recheck=item->data.heap.recheck;
567-
scan->xs_recheckorderby=item->data.heap.recheckDistances;
568-
for (i=0;i<scan->numberOfOrderBys;i++)
569-
{
570-
if (so->orderByTypes[i]==FLOAT8OID)
571-
{
572-
#ifndefUSE_FLOAT8_BYVAL
573-
/* must free any old value to avoid memory leakage */
574-
if (!scan->xs_orderbynulls[i])
575-
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
576-
#endif
577-
scan->xs_orderbyvals[i]=Float8GetDatum(item->distances[i]);
578-
scan->xs_orderbynulls[i]= false;
579-
}
580-
elseif (so->orderByTypes[i]==FLOAT4OID)
581-
{
582-
/* convert distance function's result to ORDER BY type */
583-
#ifndefUSE_FLOAT4_BYVAL
584-
/* must free any old value to avoid memory leakage */
585-
if (!scan->xs_orderbynulls[i])
586-
pfree(DatumGetPointer(scan->xs_orderbyvals[i]));
587-
#endif
588-
scan->xs_orderbyvals[i]=Float4GetDatum((float4)item->distances[i]);
589-
scan->xs_orderbynulls[i]= false;
590-
}
591-
else
592-
{
593-
/*
594-
* If the ordering operator's return value is anything
595-
* else, we don't know how to convert the float8 bound
596-
* calculated by the distance function to that. The
597-
* executor won't actually need the order by values we
598-
* return here, if there are no lossy results, so only
599-
* insist on converting if the *recheck flag is set.
600-
*/
601-
if (scan->xs_recheckorderby)
602-
elog(ERROR,"GiST operator family's FOR ORDER BY operator must return float8 or float4 if the distance function is lossy");
603-
scan->xs_orderbynulls[i]= true;
604-
}
605-
}
566+
567+
index_store_float8_orderby_distances(scan,so->orderByTypes,
568+
item->distances,
569+
item->data.heap.recheckDistances);
606570

607571
/* in an index-only scan, also return the reconstructed tuple. */
608572
if (scan->xs_want_itup)

‎src/backend/access/gist/gistutil.c

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include"storage/lmgr.h"
2424
#include"utils/float.h"
2525
#include"utils/syscache.h"
26+
#include"utils/lsyscache.h"
2627

2728

2829
/*
@@ -871,12 +872,6 @@ gistproperty(Oid index_oid, int attno,
871872
IndexAMPropertyprop,constchar*propname,
872873
bool*res,bool*isnull)
873874
{
874-
HeapTupletuple;
875-
Form_pg_indexrd_indexPG_USED_FOR_ASSERTS_ONLY;
876-
Form_pg_opclassrd_opclass;
877-
Datumdatum;
878-
booldisnull;
879-
oidvector*indclass;
880875
Oidopclass,
881876
opfamily,
882877
opcintype;
@@ -910,41 +905,19 @@ gistproperty(Oid index_oid, int attno,
910905
}
911906

912907
/* First we need to know the column's opclass. */
913-
914-
tuple=SearchSysCache1(INDEXRELID,ObjectIdGetDatum(index_oid));
915-
if (!HeapTupleIsValid(tuple))
908+
opclass=get_index_column_opclass(index_oid,attno);
909+
if (!OidIsValid(opclass))
916910
{
917911
*isnull= true;
918912
return true;
919913
}
920-
rd_index= (Form_pg_index)GETSTRUCT(tuple);
921-
922-
/* caller is supposed to guarantee this */
923-
Assert(attno>0&&attno <=rd_index->indnatts);
924-
925-
datum=SysCacheGetAttr(INDEXRELID,tuple,
926-
Anum_pg_index_indclass,&disnull);
927-
Assert(!disnull);
928-
929-
indclass= ((oidvector*)DatumGetPointer(datum));
930-
opclass=indclass->values[attno-1];
931-
932-
ReleaseSysCache(tuple);
933914

934915
/* Now look up the opclass family and input datatype. */
935-
936-
tuple=SearchSysCache1(CLAOID,ObjectIdGetDatum(opclass));
937-
if (!HeapTupleIsValid(tuple))
916+
if (!get_opclass_opfamily_and_input_type(opclass,&opfamily,&opcintype))
938917
{
939918
*isnull= true;
940919
return true;
941920
}
942-
rd_opclass= (Form_pg_opclass)GETSTRUCT(tuple);
943-
944-
opfamily=rd_opclass->opcfamily;
945-
opcintype=rd_opclass->opcintype;
946-
947-
ReleaseSysCache(tuple);
948921

949922
/* And now we can check whether the function is provided. */
950923

@@ -967,6 +940,8 @@ gistproperty(Oid index_oid, int attno,
967940
Int16GetDatum(GIST_COMPRESS_PROC));
968941
}
969942

943+
*isnull= false;
944+
970945
return true;
971946
}
972947

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp