Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb525bf7

Browse files
committed
Add KNNGIST support to contrib/pg_trgm.
Teodor Sigaev, with some revision by Tom
1 parentb576757 commitb525bf7

File tree

9 files changed

+213
-42
lines changed

9 files changed

+213
-42
lines changed

‎contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,13 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
11871187
qwertyu0988 | 0.333333
11881188
(1 row)
11891189

1190+
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
1191+
?column? | t
1192+
----------+-------------
1193+
0.411765 | qwertyu0988
1194+
0.5 | qwertyu0987
1195+
(2 rows)
1196+
11901197
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
11911198
set enable_seqscan=off;
11921199
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
@@ -2315,6 +2322,22 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
23152322
qwertyu0988 | 0.333333
23162323
(1 row)
23172324

2325+
explain (costs off)
2326+
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
2327+
QUERY PLAN
2328+
---------------------------------------------------
2329+
Limit
2330+
-> Index Scan using trgm_idx on test_trgm
2331+
Order By: (t <-> 'q0987wertyu0988'::text)
2332+
(3 rows)
2333+
2334+
select t <-> 'q0987wertyu0988', t from test_trgm order by t <-> 'q0987wertyu0988' limit 2;
2335+
?column? | t
2336+
----------+-------------
2337+
0.411765 | qwertyu0988
2338+
0.5 | qwertyu0987
2339+
(2 rows)
2340+
23182341
drop index trgm_idx;
23192342
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);
23202343
set enable_seqscan=off;

‎contrib/pg_trgm/pg_trgm.sql.in

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ LANGUAGE C STRICT IMMUTABLE;
2626
CREATE OR REPLACE FUNCTION similarity_op(text,text)
2727
RETURNS bool
2828
AS 'MODULE_PATHNAME'
29-
LANGUAGE C STRICT STABLE;
29+
LANGUAGE C STRICT STABLE; -- stable because depends on trgm_limit
3030

3131
CREATE OPERATOR % (
3232
LEFTARG = text,
@@ -37,6 +37,18 @@ CREATE OPERATOR % (
3737
JOIN = contjoinsel
3838
);
3939

40+
CREATE OR REPLACE FUNCTION similarity_dist(text,text)
41+
RETURNS float4
42+
AS 'MODULE_PATHNAME'
43+
LANGUAGE C STRICT IMMUTABLE;
44+
45+
CREATE OPERATOR <-> (
46+
LEFTARG = text,
47+
RIGHTARG = text,
48+
PROCEDURE = similarity_dist,
49+
COMMUTATOR = '<->'
50+
);
51+
4052
-- gist key
4153
CREATE OR REPLACE FUNCTION gtrgm_in(cstring)
4254
RETURNS gtrgm
@@ -60,6 +72,11 @@ RETURNS bool
6072
AS 'MODULE_PATHNAME'
6173
LANGUAGE C IMMUTABLE STRICT;
6274

75+
CREATE OR REPLACE FUNCTION gtrgm_distance(internal,text,int,oid)
76+
RETURNS float8
77+
AS 'MODULE_PATHNAME'
78+
LANGUAGE C IMMUTABLE STRICT;
79+
6380
CREATE OR REPLACE FUNCTION gtrgm_compress(internal)
6481
RETURNS internal
6582
AS 'MODULE_PATHNAME'
@@ -95,13 +112,15 @@ CREATE OPERATOR CLASS gist_trgm_ops
95112
FOR TYPE text USING gist
96113
AS
97114
OPERATOR 1 % (text, text),
115+
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
98116
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
99117
FUNCTION 2 gtrgm_union (bytea, internal),
100118
FUNCTION 3 gtrgm_compress (internal),
101119
FUNCTION 4 gtrgm_decompress (internal),
102120
FUNCTION 5 gtrgm_penalty (internal, internal, internal),
103121
FUNCTION 6 gtrgm_picksplit (internal, internal),
104122
FUNCTION 7 gtrgm_same (gtrgm, gtrgm, internal),
123+
FUNCTION 8 gtrgm_distance (internal, text, int, oid),
105124
STORAGE gtrgm;
106125

107126
-- support functions for gin

‎contrib/pg_trgm/sql/pg_trgm.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,17 @@ CREATE TABLE test_trgm(t text);
2626
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t %'qwertyu0988' order by sml desc, t;
2727
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t %'gwertyu0988' order by sml desc, t;
2828
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t %'gwertyu1988' order by sml desc, t;
29+
select t <->'q0987wertyu0988', t from test_trgm order by t <->'q0987wertyu0988' limit 2;
2930
3031
create index trgm_idx on test_trgm using gist (t gist_trgm_ops);
3132
set enable_seqscan=off;
3233
3334
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t %'qwertyu0988' order by sml desc, t;
3435
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t %'gwertyu0988' order by sml desc, t;
3536
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t %'gwertyu1988' order by sml desc, t;
37+
explain (costs off)
38+
select t <->'q0987wertyu0988', t from test_trgm order by t <->'q0987wertyu0988' limit 2;
39+
select t <->'q0987wertyu0988', t from test_trgm order by t <->'q0987wertyu0988' limit 2;
3640
3741
drop index trgm_idx;
3842
create index trgm_idx on test_trgm using gin (t gin_trgm_ops);

‎contrib/pg_trgm/trgm.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
#ifndef__TRGM_H__
55
#define__TRGM_H__
66

7-
#include"postgres.h"
8-
97
#include"access/gist.h"
108
#include"access/itup.h"
11-
#include"utils/builtins.h"
129
#include"storage/bufpage.h"
10+
#include"utils/builtins.h"
1311

1412
/* options */
1513
#defineLPADDING2
@@ -18,6 +16,10 @@
1816
#defineIGNORECASE
1917
#defineDIVUNION
2018

19+
/* operator strategy numbers */
20+
#defineSimilarityStrategyNumber1
21+
#defineDistanceStrategyNumber2
22+
2123

2224
typedefchartrgm[3];
2325

@@ -89,4 +91,4 @@ extern float4 trgm_limit;
8991
TRGM*generate_trgm(char*str,intslen);
9092
float4cnt_sml(TRGM*trg1,TRGM*trg2);
9193

92-
#endif
94+
#endif/* __TRGM_H__ */

‎contrib/pg_trgm/trgm_gin.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
/*
22
* contrib/pg_trgm/trgm_gin.c
33
*/
4+
#include"postgres.h"
5+
46
#include"trgm.h"
57

68
#include"access/gin.h"
@@ -10,6 +12,7 @@
1012
#include"utils/array.h"
1113
#include"utils/builtins.h"
1214

15+
1316
PG_FUNCTION_INFO_V1(gin_extract_trgm);
1417
Datumgin_extract_trgm(PG_FUNCTION_ARGS);
1518

‎contrib/pg_trgm/trgm_gist.c

Lines changed: 110 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
/*
22
* contrib/pg_trgm/trgm_gist.c
33
*/
4+
#include"postgres.h"
5+
46
#include"trgm.h"
57

68
#include"access/gist.h"
79
#include"access/itup.h"
10+
#include"access/skey.h"
811
#include"access/tuptoaster.h"
912
#include"storage/bufpage.h"
1013
#include"utils/array.h"
1114
#include"utils/builtins.h"
1215

16+
1317
PG_FUNCTION_INFO_V1(gtrgm_in);
1418
Datumgtrgm_in(PG_FUNCTION_ARGS);
1519

@@ -25,6 +29,9 @@ Datumgtrgm_decompress(PG_FUNCTION_ARGS);
2529
PG_FUNCTION_INFO_V1(gtrgm_consistent);
2630
Datumgtrgm_consistent(PG_FUNCTION_ARGS);
2731

32+
PG_FUNCTION_INFO_V1(gtrgm_distance);
33+
Datumgtrgm_distance(PG_FUNCTION_ARGS);
34+
2835
PG_FUNCTION_INFO_V1(gtrgm_union);
2936
Datumgtrgm_union(PG_FUNCTION_ARGS);
3037

@@ -159,18 +166,35 @@ gtrgm_decompress(PG_FUNCTION_ARGS)
159166
}
160167
}
161168

169+
staticint4
170+
cnt_sml_sign_common(TRGM*qtrg,BITVECPsign)
171+
{
172+
int4count=0;
173+
int4k,
174+
len=ARRNELEM(qtrg);
175+
trgm*ptr=GETARR(qtrg);
176+
int4tmp=0;
177+
178+
for (k=0;k<len;k++)
179+
{
180+
CPTRGM(((char*)&tmp),ptr+k);
181+
count+=GETBIT(sign,HASHVAL(tmp));
182+
}
183+
184+
returncount;
185+
}
186+
162187
Datum
163188
gtrgm_consistent(PG_FUNCTION_ARGS)
164189
{
165190
GISTENTRY*entry= (GISTENTRY*)PG_GETARG_POINTER(0);
166191
text*query=PG_GETARG_TEXT_P(1);
167-
168-
/* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */
192+
StrategyNumberstrategy= (StrategyNumber)PG_GETARG_UINT16(2);
169193
/* Oidsubtype = PG_GETARG_OID(3); */
170194
bool*recheck= (bool*)PG_GETARG_POINTER(4);
171195
TRGM*key= (TRGM*)DatumGetPointer(entry->key);
172196
TRGM*qtrg;
173-
boolres= false;
197+
boolres;
174198
char*cache= (char*)fcinfo->flinfo->fn_extra;
175199

176200
/* All cases served by this function are exact */
@@ -193,39 +217,95 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
193217

194218
qtrg= (TRGM*) (cache+MAXALIGN(VARSIZE(query)));
195219

196-
if (GIST_LEAF(entry))
197-
{/* all leafs contains orig trgm */
198-
float4tmpsml=cnt_sml(key,qtrg);
220+
switch (strategy)
221+
{
222+
caseSimilarityStrategyNumber:
223+
if (GIST_LEAF(entry))
224+
{/* all leafs contains orig trgm */
225+
float4tmpsml=cnt_sml(key,qtrg);
199226

200-
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
201-
res= (*(int*)&tmpsml==*(int*)&trgm_limit||tmpsml>trgm_limit) ? true : false;
227+
/* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
228+
res= (*(int*)&tmpsml==*(int*)&trgm_limit||tmpsml>trgm_limit) ? true : false;
229+
}
230+
elseif (ISALLTRUE(key))
231+
{/* non-leaf contains signature */
232+
res= true;
233+
}
234+
else
235+
{/* non-leaf contains signature */
236+
int4count=cnt_sml_sign_common(qtrg,GETSIGN(key));
237+
int4len=ARRNELEM(qtrg);
238+
239+
if (len==0)
240+
res= false;
241+
else
242+
res= (((((float8)count) / ((float8)len))) >=trgm_limit) ? true : false;
243+
}
244+
break;
245+
default:
246+
elog(ERROR,"unrecognized strategy number: %d",strategy);
247+
res= false;/* keep compiler quiet */
248+
break;
202249
}
203-
elseif (ISALLTRUE(key))
204-
{/* non-leaf contains signature */
205-
res= true;
250+
251+
PG_RETURN_BOOL(res);
252+
}
253+
254+
Datum
255+
gtrgm_distance(PG_FUNCTION_ARGS)
256+
{
257+
GISTENTRY*entry= (GISTENTRY*)PG_GETARG_POINTER(0);
258+
text*query=PG_GETARG_TEXT_P(1);
259+
StrategyNumberstrategy= (StrategyNumber)PG_GETARG_UINT16(2);
260+
/* Oidsubtype = PG_GETARG_OID(3); */
261+
TRGM*key= (TRGM*)DatumGetPointer(entry->key);
262+
TRGM*qtrg;
263+
float8res;
264+
char*cache= (char*)fcinfo->flinfo->fn_extra;
265+
266+
if (cache==NULL||VARSIZE(cache)!=VARSIZE(query)||memcmp(cache,query,VARSIZE(query))!=0)
267+
{
268+
qtrg=generate_trgm(VARDATA(query),VARSIZE(query)-VARHDRSZ);
269+
270+
if (cache)
271+
pfree(cache);
272+
273+
fcinfo->flinfo->fn_extra=MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
274+
MAXALIGN(VARSIZE(query))+VARSIZE(qtrg));
275+
cache= (char*)fcinfo->flinfo->fn_extra;
276+
277+
memcpy(cache,query,VARSIZE(query));
278+
memcpy(cache+MAXALIGN(VARSIZE(query)),qtrg,VARSIZE(qtrg));
206279
}
207-
else
208-
{/* non-leaf contains signature */
209-
int4count=0;
210-
int4k,
211-
len=ARRNELEM(qtrg);
212-
trgm*ptr=GETARR(qtrg);
213-
BITVECPsign=GETSIGN(key);
214-
int4tmp=0;
215280

216-
for (k=0;k<len;k++)
217-
{
218-
CPTRGM(((char*)&tmp),ptr+k);
219-
count+=GETBIT(sign,HASHVAL(tmp));
220-
}
221-
#ifdefDIVUNION
222-
res= (len==count) ? true : ((((((float4)count) / ((float4) (len-count)))) >=trgm_limit) ? true : false);
223-
#else
224-
res= (len==0) ? false : ((((((float4)count) / ((float4)len))) >=trgm_limit) ? true : false);
225-
#endif
281+
qtrg= (TRGM*) (cache+MAXALIGN(VARSIZE(query)));
282+
283+
switch (strategy)
284+
{
285+
caseDistanceStrategyNumber:
286+
if (GIST_LEAF(entry))
287+
{/* all leafs contains orig trgm */
288+
res=1.0-cnt_sml(key,qtrg);
289+
}
290+
elseif (ISALLTRUE(key))
291+
{/* all leafs contains orig trgm */
292+
res=0.0;
293+
}
294+
else
295+
{/* non-leaf contains signature */
296+
int4count=cnt_sml_sign_common(qtrg,GETSIGN(key));
297+
int4len=ARRNELEM(qtrg);
298+
299+
res= (len==0) ?-1.0 :1.0- ((float8)count) / ((float8)len);
300+
}
301+
break;
302+
default:
303+
elog(ERROR,"unrecognized strategy number: %d",strategy);
304+
res=0;/* keep compiler quiet */
305+
break;
226306
}
227307

228-
PG_RETURN_BOOL(res);
308+
PG_RETURN_FLOAT8(res);
229309
}
230310

231311
staticint4

‎contrib/pg_trgm/trgm_op.c

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
/*
22
* contrib/pg_trgm/trgm_op.c
33
*/
4-
#include"trgm.h"
4+
#include"postgres.h"
5+
56
#include<ctype.h>
6-
#include"utils/array.h"
7+
8+
#include"trgm.h"
9+
710
#include"catalog/pg_type.h"
811
#include"tsearch/ts_locale.h"
12+
#include"utils/array.h"
13+
914

1015
PG_MODULE_MAGIC;
1116

@@ -359,16 +364,25 @@ similarity(PG_FUNCTION_ARGS)
359364
PG_RETURN_FLOAT4(res);
360365
}
361366

367+
PG_FUNCTION_INFO_V1(similarity_dist);
368+
Datumsimilarity_dist(PG_FUNCTION_ARGS);
369+
Datum
370+
similarity_dist(PG_FUNCTION_ARGS)
371+
{
372+
float4res=DatumGetFloat4(DirectFunctionCall2(similarity,
373+
PG_GETARG_DATUM(0),
374+
PG_GETARG_DATUM(1)));
375+
PG_RETURN_FLOAT4(1.0-res);
376+
}
377+
362378
PG_FUNCTION_INFO_V1(similarity_op);
363379
Datumsimilarity_op(PG_FUNCTION_ARGS);
364380
Datum
365381
similarity_op(PG_FUNCTION_ARGS)
366382
{
367-
float4res=DatumGetFloat4(DirectFunctionCall2(
368-
similarity,
383+
float4res=DatumGetFloat4(DirectFunctionCall2(similarity,
369384
PG_GETARG_DATUM(0),
370-
PG_GETARG_DATUM(1)
371-
));
385+
PG_GETARG_DATUM(1)));
372386

373387
PG_RETURN_BOOL(res >=trgm_limit);
374388
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp