Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbe8a7a6

Browse files
committed
Add strict_word_similarity to pg_trgm module
strict_word_similarity is similar to existing word_similarity function butit takes into account word boundaries to compute similarity.Author: Alexander KorotkovReview by: David Steele, Liudmila Mantrova, meDiscussion:https://www.postgresql.org/message-id/flat/CY4PR17MB13207ED8310F847CF117EED0D85A0@CY4PR17MB1320.namprd17.prod.outlook.com
1 parentf20b328 commitbe8a7a6

File tree

10 files changed

+1461
-61
lines changed

10 files changed

+1461
-61
lines changed

‎contrib/pg_trgm/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ MODULE_big = pg_trgm
44
OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o$(WIN32RES)
55

66
EXTENSION = pg_trgm
7-
DATA = pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql\
7+
DATA = pg_trgm--1.3--1.4.sql\
8+
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql\
89
pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
910
PGFILEDESC = "pg_trgm - trigram matching"
1011

11-
REGRESS = pg_trgm pg_word_trgm
12+
REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm
1213

1314
ifdefUSE_PGXS
1415
PG_CONFIG = pg_config

‎contrib/pg_trgm/expected/pg_strict_word_trgm.out

Lines changed: 1025 additions & 0 deletions
Large diffs are not rendered by default.

‎contrib/pg_trgm/pg_trgm--1.3--1.4.sql

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/* contrib/pg_trgm/pg_trgm--1.3--1.4.sql*/
2+
3+
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
4+
\echo Use"ALTER EXTENSION pg_trgm UPDATE TO '1.4'" to load this file. \quit
5+
6+
CREATEFUNCTIONstrict_word_similarity(text,text)
7+
RETURNS float4
8+
AS'MODULE_PATHNAME'
9+
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
10+
11+
CREATEFUNCTIONstrict_word_similarity_op(text,text)
12+
RETURNS bool
13+
AS'MODULE_PATHNAME'
14+
LANGUAGE C STRICT STABLE PARALLEL SAFE;-- stable because depends on pg_trgm.word_similarity_threshold
15+
16+
CREATEFUNCTIONstrict_word_similarity_commutator_op(text,text)
17+
RETURNS bool
18+
AS'MODULE_PATHNAME'
19+
LANGUAGE C STRICT STABLE PARALLEL SAFE;-- stable because depends on pg_trgm.word_similarity_threshold
20+
21+
CREATE OPERATOR<<% (
22+
LEFTARG=text,
23+
RIGHTARG=text,
24+
PROCEDURE= strict_word_similarity_op,
25+
COMMUTATOR='%>>',
26+
RESTRICT= contsel,
27+
JOIN= contjoinsel
28+
);
29+
30+
CREATE OPERATOR %>> (
31+
LEFTARG=text,
32+
RIGHTARG=text,
33+
PROCEDURE= strict_word_similarity_commutator_op,
34+
COMMUTATOR='<<%',
35+
RESTRICT= contsel,
36+
JOIN= contjoinsel
37+
);
38+
39+
CREATEFUNCTIONstrict_word_similarity_dist_op(text,text)
40+
RETURNS float4
41+
AS'MODULE_PATHNAME'
42+
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
43+
44+
CREATEFUNCTIONstrict_word_similarity_dist_commutator_op(text,text)
45+
RETURNS float4
46+
AS'MODULE_PATHNAME'
47+
LANGUAGE C STRICT IMMUTABLE PARALLEL SAFE;
48+
49+
CREATE OPERATOR<<<-> (
50+
LEFTARG=text,
51+
RIGHTARG=text,
52+
PROCEDURE= strict_word_similarity_dist_op,
53+
COMMUTATOR='<->>>'
54+
);
55+
56+
CREATE OPERATOR<->>> (
57+
LEFTARG=text,
58+
RIGHTARG=text,
59+
PROCEDURE= strict_word_similarity_dist_commutator_op,
60+
COMMUTATOR='<<<->'
61+
);
62+
63+
ALTEROPERATOR FAMILY gist_trgm_ops USING gist ADD
64+
OPERATOR9 %>> (text,text),
65+
OPERATOR10<->>> (text,text) FORORDER BYpg_catalog.float_ops;
66+
67+
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
68+
OPERATOR9 %>> (text,text);

‎contrib/pg_trgm/pg_trgm.control

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# pg_trgm extension
22
comment = 'text similarity measurement and index searching based on trigrams'
3-
default_version = '1.3'
3+
default_version = '1.4'
44
module_pathname = '$libdir/pg_trgm'
55
relocatable = true
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
DROPINDEX trgm_idx2;
2+
3+
\copy test_trgm3from'data/trgm2.data'
4+
5+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where'Baykal'<<% torder by smldesc, t;
6+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where'Kabankala'<<% torder by smldesc, t;
7+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where t %>>'Baykal'order by smldesc, t;
8+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where t %>>'Kabankala'order by smldesc, t;
9+
select t<->>>'Alaikallupoddakulam', tfrom test_trgm2order by t<->>>'Alaikallupoddakulam'limit7;
10+
11+
createindextrgm_idx2on test_trgm2 using gist (t gist_trgm_ops);
12+
set enable_seqscan=off;
13+
14+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where'Baykal'<<% torder by smldesc, t;
15+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where'Kabankala'<<% torder by smldesc, t;
16+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where t %>>'Baykal'order by smldesc, t;
17+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where t %>>'Kabankala'order by smldesc, t;
18+
19+
explain (costs off)
20+
select t<->>>'Alaikallupoddakulam', tfrom test_trgm2order by t<->>>'Alaikallupoddakulam'limit7;
21+
select t<->>>'Alaikallupoddakulam', tfrom test_trgm2order by t<->>>'Alaikallupoddakulam'limit7;
22+
23+
dropindex trgm_idx2;
24+
createindextrgm_idx2on test_trgm2 using gin (t gin_trgm_ops);
25+
set enable_seqscan=off;
26+
27+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where'Baykal'<<% torder by smldesc, t;
28+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where'Kabankala'<<% torder by smldesc, t;
29+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where t %>>'Baykal'order by smldesc, t;
30+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where t %>>'Kabankala'order by smldesc, t;
31+
32+
set"pg_trgm.strict_word_similarity_threshold" to0.4;
33+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where'Baykal'<<% torder by smldesc, t;
34+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where'Kabankala'<<% torder by smldesc, t;
35+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where t %>>'Baykal'order by smldesc, t;
36+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where t %>>'Kabankala'order by smldesc, t;
37+
38+
set"pg_trgm.strict_word_similarity_threshold" to0.2;
39+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where'Baykal'<<% torder by smldesc, t;
40+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where'Kabankala'<<% torder by smldesc, t;
41+
select t,strict_word_similarity('Baykal',t)as smlfrom test_trgm2where t %>>'Baykal'order by smldesc, t;
42+
select t,strict_word_similarity('Kabankala',t)as smlfrom test_trgm2where t %>>'Kabankala'order by smldesc, t;

‎contrib/pg_trgm/trgm.h

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include"access/gist.h"
88
#include"access/itup.h"
9+
#include"access/stratnum.h"
910
#include"storage/bufpage.h"
1011

1112
/*
@@ -26,14 +27,16 @@
2627
#defineDIVUNION
2728

2829
/* operator strategy numbers */
29-
#defineSimilarityStrategyNumber1
30-
#defineDistanceStrategyNumber2
31-
#defineLikeStrategyNumber3
32-
#defineILikeStrategyNumber4
33-
#defineRegExpStrategyNumber5
34-
#defineRegExpICaseStrategyNumber6
35-
#defineWordSimilarityStrategyNumber7
36-
#defineWordDistanceStrategyNumber8
30+
#defineSimilarityStrategyNumber1
31+
#defineDistanceStrategyNumber2
32+
#defineLikeStrategyNumber3
33+
#defineILikeStrategyNumber4
34+
#defineRegExpStrategyNumber5
35+
#defineRegExpICaseStrategyNumber6
36+
#defineWordSimilarityStrategyNumber7
37+
#defineWordDistanceStrategyNumber8
38+
#defineStrictWordSimilarityStrategyNumber9
39+
#defineStrictWordDistanceStrategyNumber10
3740

3841
typedefchartrgm[3];
3942

@@ -120,7 +123,9 @@ typedef struct TrgmPackedGraph TrgmPackedGraph;
120123

121124
externdoublesimilarity_threshold;
122125
externdoubleword_similarity_threshold;
126+
externdoublestrict_word_similarity_threshold;
123127

128+
externdoubleindex_strategy_get_limit(StrategyNumberstrategy);
124129
externuint32trgm2int(trgm*ptr);
125130
externvoidcompact_trigram(trgm*tptr,char*str,intbytelen);
126131
externTRGM*generate_trgm(char*str,intslen);

‎contrib/pg_trgm/trgm_gin.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
9090
{
9191
caseSimilarityStrategyNumber:
9292
caseWordSimilarityStrategyNumber:
93+
caseStrictWordSimilarityStrategyNumber:
9394
trg=generate_trgm(VARDATA_ANY(val),VARSIZE_ANY_EXHDR(val));
9495
break;
9596
caseILikeStrategyNumber:
@@ -187,8 +188,8 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
187188
{
188189
caseSimilarityStrategyNumber:
189190
caseWordSimilarityStrategyNumber:
190-
nlimit= (strategy==SimilarityStrategyNumber) ?
191-
similarity_threshold :word_similarity_threshold;
191+
caseStrictWordSimilarityStrategyNumber:
192+
nlimit=index_strategy_get_limit(strategy);
192193

193194
/* Count the matches */
194195
ntrue=0;
@@ -282,8 +283,8 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
282283
{
283284
caseSimilarityStrategyNumber:
284285
caseWordSimilarityStrategyNumber:
285-
nlimit= (strategy==SimilarityStrategyNumber) ?
286-
similarity_threshold :word_similarity_threshold;
286+
caseStrictWordSimilarityStrategyNumber:
287+
nlimit=index_strategy_get_limit(strategy);
287288

288289
/* Count the matches */
289290
ntrue=0;

‎contrib/pg_trgm/trgm_gist.c

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
221221
{
222222
caseSimilarityStrategyNumber:
223223
caseWordSimilarityStrategyNumber:
224+
caseStrictWordSimilarityStrategyNumber:
224225
qtrg=generate_trgm(VARDATA(query),
225226
querysize-VARHDRSZ);
226227
break;
@@ -290,10 +291,11 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
290291
{
291292
caseSimilarityStrategyNumber:
292293
caseWordSimilarityStrategyNumber:
293-
/* Similarity search is exact. Word similarity search is inexact */
294-
*recheck= (strategy==WordSimilarityStrategyNumber);
295-
nlimit= (strategy==SimilarityStrategyNumber) ?
296-
similarity_threshold :word_similarity_threshold;
294+
caseStrictWordSimilarityStrategyNumber:
295+
/* Similarity search is exact. (Strict) word similarity search is inexact */
296+
*recheck= (strategy!=SimilarityStrategyNumber);
297+
298+
nlimit=index_strategy_get_limit(strategy);
297299

298300
if (GIST_LEAF(entry))
299301
{/* all leafs contains orig trgm */
@@ -468,7 +470,9 @@ gtrgm_distance(PG_FUNCTION_ARGS)
468470
{
469471
caseDistanceStrategyNumber:
470472
caseWordDistanceStrategyNumber:
471-
*recheck=strategy==WordDistanceStrategyNumber;
473+
caseStrictWordDistanceStrategyNumber:
474+
/* Only plain trigram distance is exact */
475+
*recheck= (strategy!=DistanceStrategyNumber);
472476
if (GIST_LEAF(entry))
473477
{/* all leafs contains orig trgm */
474478

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp