Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6e2f3ae

Browse files
committed
Support LIKE and ILIKE index searches via contrib/pg_trgm indexes.
Unlike Btree-based LIKE optimization, this works for non-left-anchoredsearch patterns. The effectiveness of the search depends on how manytrigrams can be extracted from the pattern. (The worst case, with notrigrams, degrades to a full-table scan, so this isn't a panacea. Butit can be very useful.)Alexander Korotkov, reviewed by Jan Urbanski
1 parent6238473 commit6e2f3ae

File tree

9 files changed

+640
-50
lines changed

9 files changed

+640
-50
lines changed

‎contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3467,3 +3467,93 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
34673467
qwertyu0988 | 0.333333
34683468
(1 row)
34693469

3470+
create table test2(t text);
3471+
insert into test2 values ('abcdef');
3472+
insert into test2 values ('quark');
3473+
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
3474+
set enable_seqscan=off;
3475+
explain (costs off)
3476+
select * from test2 where t like '%BCD%';
3477+
QUERY PLAN
3478+
------------------------------------------
3479+
Bitmap Heap Scan on test2
3480+
Recheck Cond: (t ~~ '%BCD%'::text)
3481+
-> Bitmap Index Scan on test2_idx_gin
3482+
Index Cond: (t ~~ '%BCD%'::text)
3483+
(4 rows)
3484+
3485+
explain (costs off)
3486+
select * from test2 where t ilike '%BCD%';
3487+
QUERY PLAN
3488+
-------------------------------------------
3489+
Bitmap Heap Scan on test2
3490+
Recheck Cond: (t ~~* '%BCD%'::text)
3491+
-> Bitmap Index Scan on test2_idx_gin
3492+
Index Cond: (t ~~* '%BCD%'::text)
3493+
(4 rows)
3494+
3495+
select * from test2 where t like '%BCD%';
3496+
t
3497+
---
3498+
(0 rows)
3499+
3500+
select * from test2 where t like '%bcd%';
3501+
t
3502+
--------
3503+
abcdef
3504+
(1 row)
3505+
3506+
select * from test2 where t ilike '%BCD%';
3507+
t
3508+
--------
3509+
abcdef
3510+
(1 row)
3511+
3512+
select * from test2 where t ilike 'qua%';
3513+
t
3514+
-------
3515+
quark
3516+
(1 row)
3517+
3518+
drop index test2_idx_gin;
3519+
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
3520+
set enable_seqscan=off;
3521+
explain (costs off)
3522+
select * from test2 where t like '%BCD%';
3523+
QUERY PLAN
3524+
------------------------------------------
3525+
Index Scan using test2_idx_gist on test2
3526+
Index Cond: (t ~~ '%BCD%'::text)
3527+
(2 rows)
3528+
3529+
explain (costs off)
3530+
select * from test2 where t ilike '%BCD%';
3531+
QUERY PLAN
3532+
------------------------------------------
3533+
Index Scan using test2_idx_gist on test2
3534+
Index Cond: (t ~~* '%BCD%'::text)
3535+
(2 rows)
3536+
3537+
select * from test2 where t like '%BCD%';
3538+
t
3539+
---
3540+
(0 rows)
3541+
3542+
select * from test2 where t like '%bcd%';
3543+
t
3544+
--------
3545+
abcdef
3546+
(1 row)
3547+
3548+
select * from test2 where t ilike '%BCD%';
3549+
t
3550+
--------
3551+
abcdef
3552+
(1 row)
3553+
3554+
select * from test2 where t ilike 'qua%';
3555+
t
3556+
-------
3557+
quark
3558+
(1 row)
3559+

‎contrib/pg_trgm/pg_trgm.sql.in

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ FOR TYPE text USING gist
113113
AS
114114
OPERATOR 1 % (text, text),
115115
OPERATOR 2 <-> (text, text) FOR ORDER BY pg_catalog.float_ops,
116+
OPERATOR 3 pg_catalog.~~ (text, text),
117+
OPERATOR 4 pg_catalog.~~* (text, text),
116118
FUNCTION 1 gtrgm_consistent (internal, text, int, oid, internal),
117119
FUNCTION 2 gtrgm_union (bytea, internal),
118120
FUNCTION 3 gtrgm_compress (internal),
@@ -124,17 +126,17 @@ AS
124126
STORAGE gtrgm;
125127

126128
-- support functions for gin
127-
CREATE OR REPLACE FUNCTIONgin_extract_trgm(text, internal)
129+
CREATE OR REPLACE FUNCTIONgin_extract_value_trgm(text, internal)
128130
RETURNS internal
129131
AS 'MODULE_PATHNAME'
130132
LANGUAGE C IMMUTABLE STRICT;
131133

132-
CREATE OR REPLACE FUNCTIONgin_extract_trgm(text, internal, int2, internal, internal)
134+
CREATE OR REPLACE FUNCTIONgin_extract_query_trgm(text, internal, int2, internal, internal, internal, internal)
133135
RETURNS internal
134136
AS 'MODULE_PATHNAME'
135137
LANGUAGE C IMMUTABLE STRICT;
136138

137-
CREATE OR REPLACE FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal)
139+
CREATE OR REPLACE FUNCTION gin_trgm_consistent(internal, int2, text, int4, internal, internal, internal, internal)
138140
RETURNS bool
139141
AS 'MODULE_PATHNAME'
140142
LANGUAGE C IMMUTABLE STRICT;
@@ -144,8 +146,10 @@ CREATE OPERATOR CLASS gin_trgm_ops
144146
FOR TYPE text USING gin
145147
AS
146148
OPERATOR 1 % (text, text),
149+
OPERATOR 3 pg_catalog.~~ (text, text),
150+
OPERATOR 4 pg_catalog.~~* (text, text),
147151
FUNCTION 1 btint4cmp (int4, int4),
148-
FUNCTION 2gin_extract_trgm (text, internal),
149-
FUNCTION 3gin_extract_trgm (text, internal, int2, internal, internal),
150-
FUNCTION 4 gin_trgm_consistent (internal, int2, text, int4, internal, internal),
152+
FUNCTION 2gin_extract_value_trgm (text, internal),
153+
FUNCTION 3gin_extract_query_trgm (text, internal, int2, internal, internal, internal, internal),
154+
FUNCTION 4 gin_trgm_consistent (internal, int2, text, int4, internal, internal, internal, internal),
151155
STORAGE int4;

‎contrib/pg_trgm/sql/pg_trgm.sql

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,28 @@ set enable_seqscan=off;
4545
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t %'qwertyu0988' order by sml desc, t;
4646
select t,similarity(t,'gwertyu0988') as sml from test_trgm where t %'gwertyu0988' order by sml desc, t;
4747
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t %'gwertyu1988' order by sml desc, t;
48+
49+
create table test2(t text);
50+
insert into test2 values ('abcdef');
51+
insert into test2 values ('quark');
52+
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
53+
set enable_seqscan=off;
54+
explain (costs off)
55+
select * from test2 where t like'%BCD%';
56+
explain (costs off)
57+
select * from test2 where t ilike'%BCD%';
58+
select * from test2 where t like'%BCD%';
59+
select * from test2 where t like'%bcd%';
60+
select * from test2 where t ilike'%BCD%';
61+
select * from test2 where t ilike'qua%';
62+
drop index test2_idx_gin;
63+
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
64+
set enable_seqscan=off;
65+
explain (costs off)
66+
select * from test2 where t like'%BCD%';
67+
explain (costs off)
68+
select * from test2 where t ilike'%BCD%';
69+
select * from test2 where t like'%BCD%';
70+
select * from test2 where t like'%bcd%';
71+
select * from test2 where t ilike'%BCD%';
72+
select * from test2 where t ilike'qua%';

‎contrib/pg_trgm/trgm.h

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,21 @@
1313
#defineLPADDING2
1414
#defineRPADDING1
1515
#defineKEEPONLYALNUM
16+
/*
17+
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
18+
* If this macro is disabled, the ~~* operator must be removed from the
19+
* operator classes, because we can't handle case-insensitive wildcard search
20+
* with case-sensitive trigrams. Failure to do this will result in "cannot
21+
* handle ~~* with case-sensitive trigrams" errors.
22+
*/
1623
#defineIGNORECASE
1724
#defineDIVUNION
1825

1926
/* operator strategy numbers */
20-
#defineSimilarityStrategyNumber1
21-
#defineDistanceStrategyNumber2
27+
#defineSimilarityStrategyNumber1
28+
#defineDistanceStrategyNumber2
29+
#defineLikeStrategyNumber3
30+
#defineILikeStrategyNumber4
2231

2332

2433
typedefchartrgm[3];
@@ -40,7 +49,10 @@ uint32trgm2int(trgm *ptr);
4049
#else
4150
#defineISPRINTABLECHAR(a)( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
4251
#endif
43-
#defineISPRINTABLETRGM(t)( ISPRINTABLECHAR( ((char*)t) ) && ISPRINTABLECHAR( ((char*)t)+1 ) && ISPRINTABLECHAR( ((char*)t)+2 ) )
52+
#defineISPRINTABLETRGM(t)( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
53+
54+
#defineISESCAPECHAR(x) (*(x) == '\\')/* Wildcard escape character */
55+
#defineISWILDCARDCHAR(x) (*(x) == '_' || *(x) == '%')/* Wildcard meta-character */
4456

4557
typedefstruct
4658
{
@@ -65,7 +77,7 @@ typedef char *BITVECP;
6577
for(i=0;i<SIGLEN;i++)
6678

6779
#defineGETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITBYTE ) ) )
68-
#defineGETBITBYTE(x,i) ( ((char)(x)) >>i & 0x01 )
80+
#defineGETBITBYTE(x,i) ( (((char)(x)) >>(i)) & 0x01 )
6981
#defineCLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) )
7082
#defineSETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) )
7183
#defineGETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 )
@@ -89,6 +101,8 @@ typedef char *BITVECP;
89101
externfloat4trgm_limit;
90102

91103
TRGM*generate_trgm(char*str,intslen);
104+
TRGM*generate_wildcard_trgm(constchar*str,intslen);
92105
float4cnt_sml(TRGM*trg1,TRGM*trg2);
106+
booltrgm_contained_by(TRGM*trg1,TRGM*trg2);
93107

94108
#endif/* __TRGM_H__ */

‎contrib/pg_trgm/trgm_gin.c

Lines changed: 120 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include"access/gin.h"
99
#include"access/itup.h"
10+
#include"access/skey.h"
1011
#include"access/tuptoaster.h"
1112
#include"storage/bufpage.h"
1213
#include"utils/array.h"
@@ -16,14 +17,31 @@
1617
PG_FUNCTION_INFO_V1(gin_extract_trgm);
1718
Datumgin_extract_trgm(PG_FUNCTION_ARGS);
1819

20+
PG_FUNCTION_INFO_V1(gin_extract_value_trgm);
21+
Datumgin_extract_value_trgm(PG_FUNCTION_ARGS);
22+
23+
PG_FUNCTION_INFO_V1(gin_extract_query_trgm);
24+
Datumgin_extract_query_trgm(PG_FUNCTION_ARGS);
25+
1926
PG_FUNCTION_INFO_V1(gin_trgm_consistent);
2027
Datumgin_trgm_consistent(PG_FUNCTION_ARGS);
2128

2229
/*
23-
* This function is used as both extractValue and extractQuery
30+
* This function can only be called if a pre-9.1 version of the GIN operator
31+
* class definition is present in the catalogs (probably as a consequence
32+
* of upgrade-in-place). Complain.
2433
*/
2534
Datum
2635
gin_extract_trgm(PG_FUNCTION_ARGS)
36+
{
37+
ereport(ERROR,
38+
(errmsg("GIN operator class for pg_trgm is out of date"),
39+
errhint("Please drop and re-create the pg_trgm catalog entries.")));
40+
PG_RETURN_NULL();
41+
}
42+
43+
Datum
44+
gin_extract_value_trgm(PG_FUNCTION_ARGS)
2745
{
2846
text*val= (text*)PG_GETARG_TEXT_P(0);
2947
int32*nentries= (int32*)PG_GETARG_POINTER(1);
@@ -57,34 +75,124 @@ gin_extract_trgm(PG_FUNCTION_ARGS)
5775
PG_RETURN_POINTER(entries);
5876
}
5977

78+
Datum
79+
gin_extract_query_trgm(PG_FUNCTION_ARGS)
80+
{
81+
text*val= (text*)PG_GETARG_TEXT_P(0);
82+
int32*nentries= (int32*)PG_GETARG_POINTER(1);
83+
StrategyNumberstrategy=PG_GETARG_UINT16(2);
84+
/* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
85+
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
86+
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
87+
int32*searchMode= (int32*)PG_GETARG_POINTER(6);
88+
Datum*entries=NULL;
89+
TRGM*trg;
90+
int32trglen;
91+
trgm*ptr;
92+
int32i;
93+
94+
switch (strategy)
95+
{
96+
caseSimilarityStrategyNumber:
97+
trg=generate_trgm(VARDATA(val),VARSIZE(val)-VARHDRSZ);
98+
break;
99+
caseILikeStrategyNumber:
100+
#ifndefIGNORECASE
101+
elog(ERROR,"cannot handle ~~* with case-sensitive trigrams");
102+
#endif
103+
/* FALL THRU */
104+
caseLikeStrategyNumber:
105+
/*
106+
* For wildcard search we extract all the trigrams that every
107+
* potentially-matching string must include.
108+
*/
109+
trg=generate_wildcard_trgm(VARDATA(val),VARSIZE(val)-VARHDRSZ);
110+
break;
111+
default:
112+
elog(ERROR,"unrecognized strategy number: %d",strategy);
113+
trg=NULL;/* keep compiler quiet */
114+
break;
115+
}
116+
117+
trglen=ARRNELEM(trg);
118+
*nentries=trglen;
119+
120+
if (trglen>0)
121+
{
122+
entries= (Datum*)palloc(sizeof(Datum)*trglen);
123+
ptr=GETARR(trg);
124+
for (i=0;i<trglen;i++)
125+
{
126+
int32item=trgm2int(ptr);
127+
128+
entries[i]=Int32GetDatum(item);
129+
ptr++;
130+
}
131+
}
132+
133+
/*
134+
* If no trigram was extracted then we have to scan all the index.
135+
*/
136+
if (trglen==0)
137+
*searchMode=GIN_SEARCH_MODE_ALL;
138+
139+
PG_RETURN_POINTER(entries);
140+
}
141+
60142
Datum
61143
gin_trgm_consistent(PG_FUNCTION_ARGS)
62144
{
63145
bool*check= (bool*)PG_GETARG_POINTER(0);
64-
/*StrategyNumber strategy = PG_GETARG_UINT16(1); */
146+
StrategyNumberstrategy=PG_GETARG_UINT16(1);
65147
/* text *query = PG_GETARG_TEXT_P(2); */
66148
int32nkeys=PG_GETARG_INT32(3);
67149
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
68150
bool*recheck= (bool*)PG_GETARG_POINTER(5);
69-
boolres= FALSE;
151+
boolres;
70152
int32i,
71-
ntrue=0;
153+
ntrue;
72154

73155
/* All cases served by this function are inexact */
74156
*recheck= true;
75157

76-
/* Count the matches */
77-
for (i=0;i<nkeys;i++)
158+
switch (strategy)
78159
{
79-
if (check[i])
80-
ntrue++;
81-
}
82-
160+
caseSimilarityStrategyNumber:
161+
/* Count the matches */
162+
ntrue=0;
163+
for (i=0;i<nkeys;i++)
164+
{
165+
if (check[i])
166+
ntrue++;
167+
}
83168
#ifdefDIVUNION
84-
res= (nkeys==ntrue) ? true : ((((((float4)ntrue) / ((float4) (nkeys-ntrue)))) >=trgm_limit) ? true : false);
169+
res= (nkeys==ntrue) ? true : ((((((float4)ntrue) / ((float4) (nkeys-ntrue)))) >=trgm_limit) ? true : false);
85170
#else
86-
res= (nkeys==0) ? false : ((((((float4)ntrue) / ((float4)nkeys))) >=trgm_limit) ? true : false);
171+
res= (nkeys==0) ? false : ((((((float4)ntrue) / ((float4)nkeys))) >=trgm_limit) ? true : false);
87172
#endif
173+
break;
174+
caseILikeStrategyNumber:
175+
#ifndefIGNORECASE
176+
elog(ERROR,"cannot handle ~~* with case-sensitive trigrams");
177+
#endif
178+
/* FALL THRU */
179+
caseLikeStrategyNumber:
180+
/* Check if all extracted trigrams are presented. */
181+
res= true;
182+
for (i=0;i<nkeys;i++)
183+
{
184+
if (!check[i])
185+
{
186+
res= false;
187+
break;
188+
}
189+
}
190+
break;
191+
default:
192+
elog(ERROR,"unrecognized strategy number: %d",strategy);
193+
res= false;/* keep compiler quiet */
194+
break;
195+
}
88196

89197
PG_RETURN_BOOL(res);
90198
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp