Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf576b17

Browse files
committed
Add word_similarity to pg_trgm contrib module.
Patch introduces a concept of similarity over string and just a word fromanother string.Version of extension is not changed because 1.2 was already introduced in 9.6release cycle, so, there wasn't a public version.Author: Alexander Korotkov, Artur Zakirov
1 parent1c4f001 commitf576b17

File tree

10 files changed

+726
-75
lines changed

10 files changed

+726
-75
lines changed

‎contrib/pg_trgm/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ EXTENSION = pg_trgm
77
DATA = pg_trgm--1.2.sql pg_trgm--1.0--1.1.sql pg_trgm--1.1--1.2.sql pg_trgm--unpackaged--1.0.sql
88
PGFILEDESC = "pg_trgm - trigram matching"
99

10-
REGRESS = pg_trgm
10+
REGRESS = pg_trgm pg_word_trgm
1111

1212
ifdefUSE_PGXS
1313
PG_CONFIG = pg_config

‎contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ select similarity('---', '####---');
5959
0
6060
(1 row)
6161

62-
CREATE TABLE test_trgm(t text);
62+
CREATE TABLE test_trgm(t text COLLATE "C");
6363
\copy test_trgm from 'data/trgm.data'
6464
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
6565
t | sml
@@ -3467,7 +3467,7 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
34673467
qwertyu0988 | 0.333333
34683468
(1 row)
34693469

3470-
create table test2(t text);
3470+
create table test2(t text COLLATE "C");
34713471
insert into test2 values ('abcdef');
34723472
insert into test2 values ('quark');
34733473
insert into test2 values (' z foo bar');

‎contrib/pg_trgm/pg_trgm--1.1--1.2.sql

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,72 @@
33
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
44
\echo Use"ALTER EXTENSION pg_trgm UPDATE TO '1.2'" to load this file. \quit
55

6+
CREATEFUNCTIONword_similarity(text,text)
7+
RETURNS float4
8+
AS'MODULE_PATHNAME'
9+
LANGUAGE C STRICT IMMUTABLE;
10+
11+
CREATEFUNCTIONword_similarity_op(text,text)
12+
RETURNS bool
13+
AS'MODULE_PATHNAME'
14+
LANGUAGE C STRICT STABLE;-- stable because depends on pg_trgm.word_similarity_threshold
15+
16+
CREATEFUNCTIONword_similarity_commutator_op(text,text)
17+
RETURNS bool
18+
AS'MODULE_PATHNAME'
19+
LANGUAGE C STRICT STABLE;-- stable because depends on pg_trgm.word_similarity_threshold
20+
21+
CREATEFUNCTIONword_similarity_dist_op(text,text)
22+
RETURNS float4
23+
AS'MODULE_PATHNAME'
24+
LANGUAGE C STRICT IMMUTABLE;
25+
26+
CREATEFUNCTIONword_similarity_dist_commutator_op(text,text)
27+
RETURNS float4
28+
AS'MODULE_PATHNAME'
29+
LANGUAGE C STRICT IMMUTABLE;
30+
31+
CREATE OPERATOR<% (
32+
LEFTARG=text,
33+
RIGHTARG=text,
34+
PROCEDURE= word_similarity_op,
35+
COMMUTATOR='%>',
36+
RESTRICT= contsel,
37+
JOIN= contjoinsel
38+
);
39+
40+
CREATE OPERATOR %> (
41+
LEFTARG=text,
42+
RIGHTARG=text,
43+
PROCEDURE= word_similarity_commutator_op,
44+
COMMUTATOR='<%',
45+
RESTRICT= contsel,
46+
JOIN= contjoinsel
47+
);
48+
49+
CREATE OPERATOR<<-> (
50+
LEFTARG=text,
51+
RIGHTARG=text,
52+
PROCEDURE= word_similarity_dist_op,
53+
COMMUTATOR='<->>'
54+
);
55+
56+
CREATE OPERATOR<->> (
57+
LEFTARG=text,
58+
RIGHTARG=text,
59+
PROCEDURE= word_similarity_dist_commutator_op,
60+
COMMUTATOR='<<->'
61+
);
62+
663
CREATEFUNCTIONgin_trgm_triconsistent(internal, int2,text, int4, internal, internal, internal)
764
RETURNS"char"
865
AS'MODULE_PATHNAME'
966
LANGUAGE C IMMUTABLE STRICT;
1067

68+
ALTEROPERATOR FAMILY gist_trgm_ops USING gist ADD
69+
OPERATOR7 %> (text,text),
70+
OPERATOR8<->> (text,text) FORORDER BYpg_catalog.float_ops;
71+
1172
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
12-
FUNCTION6 (text,text) gin_trgm_triconsistent (internal, int2,text, int4, internal, internal, internal);
73+
OPERATOR7 %> (text,text),
74+
FUNCTION6 (text,text) gin_trgm_triconsistent (internal, int2,text, int4, internal, internal, internal);

‎contrib/pg_trgm/pg_trgm--1.2.sql

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,39 @@ CREATE OPERATOR % (
3939
JOIN= contjoinsel
4040
);
4141

42+
CREATEFUNCTIONword_similarity(text,text)
43+
RETURNS float4
44+
AS'MODULE_PATHNAME'
45+
LANGUAGE C STRICT IMMUTABLE;
46+
47+
CREATEFUNCTIONword_similarity_op(text,text)
48+
RETURNS bool
49+
AS'MODULE_PATHNAME'
50+
LANGUAGE C STRICT STABLE;-- stable because depends on pg_trgm.word_similarity_threshold
51+
52+
CREATEFUNCTIONword_similarity_commutator_op(text,text)
53+
RETURNS bool
54+
AS'MODULE_PATHNAME'
55+
LANGUAGE C STRICT STABLE;-- stable because depends on pg_trgm.word_similarity_threshold
56+
57+
CREATE OPERATOR<% (
58+
LEFTARG=text,
59+
RIGHTARG=text,
60+
PROCEDURE= word_similarity_op,
61+
COMMUTATOR='%>',
62+
RESTRICT= contsel,
63+
JOIN= contjoinsel
64+
);
65+
66+
CREATE OPERATOR %> (
67+
LEFTARG=text,
68+
RIGHTARG=text,
69+
PROCEDURE= word_similarity_commutator_op,
70+
COMMUTATOR='<%',
71+
RESTRICT= contsel,
72+
JOIN= contjoinsel
73+
);
74+
4275
CREATEFUNCTIONsimilarity_dist(text,text)
4376
RETURNS float4
4477
AS'MODULE_PATHNAME'
@@ -51,6 +84,30 @@ CREATE OPERATOR <-> (
5184
COMMUTATOR='<->'
5285
);
5386

87+
CREATEFUNCTIONword_similarity_dist_op(text,text)
88+
RETURNS float4
89+
AS'MODULE_PATHNAME'
90+
LANGUAGE C STRICT IMMUTABLE;
91+
92+
CREATEFUNCTIONword_similarity_dist_commutator_op(text,text)
93+
RETURNS float4
94+
AS'MODULE_PATHNAME'
95+
LANGUAGE C STRICT IMMUTABLE;
96+
97+
CREATE OPERATOR<<-> (
98+
LEFTARG=text,
99+
RIGHTARG=text,
100+
PROCEDURE= word_similarity_dist_op,
101+
COMMUTATOR='<->>'
102+
);
103+
104+
CREATE OPERATOR<->> (
105+
LEFTARG=text,
106+
RIGHTARG=text,
107+
PROCEDURE= word_similarity_dist_commutator_op,
108+
COMMUTATOR='<<->'
109+
);
110+
54111
-- gist key
55112
CREATEFUNCTIONgtrgm_in(cstring)
56113
RETURNS gtrgm
@@ -140,6 +197,12 @@ ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
140197
OPERATOR5 pg_catalog.~ (text,text),
141198
OPERATOR6 pg_catalog.~* (text,text);
142199

200+
-- Add operators that are new in 9.6 (pg_trgm 1.2).
201+
202+
ALTEROPERATOR FAMILY gist_trgm_ops USING gist ADD
203+
OPERATOR7 %> (text,text),
204+
OPERATOR8<->> (text,text) FORORDER BYpg_catalog.float_ops;
205+
143206
-- support functions for gin
144207
CREATEFUNCTIONgin_extract_value_trgm(text, internal)
145208
RETURNS internal
@@ -187,4 +250,5 @@ AS 'MODULE_PATHNAME'
187250
LANGUAGE C IMMUTABLE STRICT;
188251

189252
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
253+
OPERATOR7 %> (text,text),
190254
FUNCTION6 (text,text) gin_trgm_triconsistent (internal, int2,text, int4, internal, internal, internal);

‎contrib/pg_trgm/sql/pg_trgm.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ select similarity('wow',' WOW ');
1313

1414
select similarity('---','####---');
1515

16-
CREATETABLEtest_trgm(ttext);
16+
CREATETABLEtest_trgm(ttext COLLATE"C");
1717

1818
\copy test_trgmfrom'data/trgm.data'
1919

@@ -40,7 +40,7 @@ select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu098
4040
select t,similarity(t,'gwertyu0988')as smlfrom test_trgmwhere t %'gwertyu0988'order by smldesc, t;
4141
select t,similarity(t,'gwertyu1988')as smlfrom test_trgmwhere t %'gwertyu1988'order by smldesc, t;
4242

43-
createtabletest2(ttext);
43+
createtabletest2(ttext COLLATE"C");
4444
insert into test2values ('abcdef');
4545
insert into test2values ('quark');
4646
insert into test2values (' z foo bar');

‎contrib/pg_trgm/trgm.h

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,14 @@
2626
#defineDIVUNION
2727

2828
/* operator strategy numbers */
29-
#defineSimilarityStrategyNumber1
30-
#defineDistanceStrategyNumber2
31-
#defineLikeStrategyNumber3
32-
#defineILikeStrategyNumber4
33-
#defineRegExpStrategyNumber5
34-
#defineRegExpICaseStrategyNumber6
35-
29+
#defineSimilarityStrategyNumber1
30+
#defineDistanceStrategyNumber2
31+
#defineLikeStrategyNumber3
32+
#defineILikeStrategyNumber4
33+
#defineRegExpStrategyNumber5
34+
#defineRegExpICaseStrategyNumber6
35+
#defineWordSimilarityStrategyNumber7
36+
#defineWordDistanceStrategyNumber8
3637

3738
typedefchartrgm[3];
3839

@@ -103,15 +104,28 @@ typedef char *BITVECP;
103104
#defineGETARR(x)( (trgm*)( (char*)x+TRGMHDRSIZE ) )
104105
#defineARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
105106

107+
/*
108+
* If DIVUNION is defined then similarity formula is:
109+
* count / (len1 + len2 - count)
110+
* else if DIVUNION is not defined then similarity formula is:
111+
* count / max(len1, len2)
112+
*/
113+
#ifdefDIVUNION
114+
#defineCALCSML(count,len1,len2) ((float4) (count)) / ((float4) ((len1) + (len2) - (count)))
115+
#else
116+
#defineCALCSML(count,len1,len2) ((float4) (count)) / ((float4) (((len1) > (len2)) ? (len1) : (len2)))
117+
#endif
118+
106119
typedefstructTrgmPackedGraphTrgmPackedGraph;
107120

108121
externdoublesimilarity_threshold;
122+
externdoubleword_similarity_threshold;
109123

110124
externuint32trgm2int(trgm*ptr);
111125
externvoidcompact_trigram(trgm*tptr,char*str,intbytelen);
112126
externTRGM*generate_trgm(char*str,intslen);
113127
externTRGM*generate_wildcard_trgm(constchar*str,intslen);
114-
externfloat4cnt_sml(TRGM*trg1,TRGM*trg2);
128+
externfloat4cnt_sml(TRGM*trg1,TRGM*trg2,boolinexact);
115129
externbooltrgm_contained_by(TRGM*trg1,TRGM*trg2);
116130
externbool*trgm_presence_map(TRGM*query,TRGM*key);
117131
externTRGM*createTrgmNFA(text*text_re,Oidcollation,

‎contrib/pg_trgm/trgm_gin.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
8989
switch (strategy)
9090
{
9191
caseSimilarityStrategyNumber:
92+
caseWordSimilarityStrategyNumber:
9293
trg=generate_trgm(VARDATA(val),VARSIZE(val)-VARHDRSZ);
9394
break;
9495
caseILikeStrategyNumber:
@@ -176,13 +177,18 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
176177
boolres;
177178
int32i,
178179
ntrue;
180+
doublenlimit;
179181

180182
/* All cases served by this function are inexact */
181183
*recheck= true;
182184

183185
switch (strategy)
184186
{
185187
caseSimilarityStrategyNumber:
188+
caseWordSimilarityStrategyNumber:
189+
nlimit= (strategy==SimilarityStrategyNumber) ?
190+
similarity_threshold :word_similarity_threshold;
191+
186192
/* Count the matches */
187193
ntrue=0;
188194
for (i=0;i<nkeys;i++)
@@ -207,8 +213,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
207213
* So, independly on DIVUNION the upper bound formula is the same.
208214
*/
209215
res= (nkeys==0) ? false :
210-
((((((float4)ntrue) / ((float4)nkeys))) >=similarity_threshold)
211-
? true : false);
216+
(((((float4)ntrue) / ((float4)nkeys))) >=nlimit);
212217
break;
213218
caseILikeStrategyNumber:
214219
#ifndefIGNORECASE
@@ -270,10 +275,15 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
270275
int32i,
271276
ntrue;
272277
bool*boolcheck;
278+
doublenlimit;
273279

274280
switch (strategy)
275281
{
276282
caseSimilarityStrategyNumber:
283+
caseWordSimilarityStrategyNumber:
284+
nlimit= (strategy==SimilarityStrategyNumber) ?
285+
similarity_threshold :word_similarity_threshold;
286+
277287
/* Count the matches */
278288
ntrue=0;
279289
for (i=0;i<nkeys;i++)
@@ -285,9 +295,9 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
285295
/*
286296
* See comment in gin_trgm_consistent() about * upper bound formula
287297
*/
288-
res= (nkeys==0) ?GIN_FALSE :
289-
(((((float4)ntrue) / ((float4)nkeys)) >=similarity_threshold)
290-
?GIN_MAYBE :GIN_FALSE);
298+
res= (nkeys==0)
299+
?GIN_FALSE :(((((float4)ntrue) / ((float4)nkeys)) >=nlimit)
300+
?GIN_MAYBE :GIN_FALSE);
291301
break;
292302
caseILikeStrategyNumber:
293303
#ifndefIGNORECASE

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp