Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit3ccae48

Browse files
committed
Support indexing of regular-expression searches in contrib/pg_trgm.
This works by extracting trigrams from the given regular expression,in generally the same spirit as the previously-existing support forLIKE searches, though of course the details are far more complicated.Currently, only GIN indexes are supported. We might be able to makeit work with GiST indexes later.The implementation includes adding API functions to backend/regex/to provide a view of the search NFA created from a regular expression.These functions are meant to be generic enough to be supportable ina standalone version of the regex library, should that ever happen.Alexander Korotkov, reviewed by Heikki Linnakangas and Tom Lane
1 parente60d20a commit3ccae48

File tree

17 files changed

+2865
-43
lines changed

17 files changed

+2865
-43
lines changed

‎contrib/pg_trgm/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# contrib/pg_trgm/Makefile
22

33
MODULE_big = pg_trgm
4-
OBJS = trgm_op.o trgm_gist.o trgm_gin.o
4+
OBJS = trgm_op.o trgm_gist.o trgm_gin.o trgm_regexp.o
55

66
EXTENSION = pg_trgm
7-
DATA = pg_trgm--1.0.sql pg_trgm--unpackaged--1.0.sql
7+
DATA = pg_trgm--1.1.sql pg_trgm--1.0--1.1.sql pg_trgm--unpackaged--1.0.sql
88

99
REGRESS = pg_trgm
1010

‎contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ select similarity('---', '####---');
6060
(1 row)
6161

6262
CREATE TABLE test_trgm(t text);
63-
\copy test_trgm from 'data/trgm.data
63+
\copy test_trgm from 'data/trgm.data'
6464
select t,similarity(t,'qwertyu0988') as sml from test_trgm where t % 'qwertyu0988' order by sml desc, t;
6565
t | sml
6666
-------------+----------
@@ -3470,6 +3470,7 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
34703470
create table test2(t text);
34713471
insert into test2 values ('abcdef');
34723472
insert into test2 values ('quark');
3473+
insert into test2 values (' z foo bar');
34733474
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
34743475
set enable_seqscan=off;
34753476
explain (costs off)
@@ -3521,6 +3522,142 @@ select * from test2 where t ilike 'qua%';
35213522
quark
35223523
(1 row)
35233524

3525+
select * from test2 where t like '%z foo bar%';
3526+
t
3527+
-------------
3528+
z foo bar
3529+
(1 row)
3530+
3531+
select * from test2 where t like ' z foo%';
3532+
t
3533+
-------------
3534+
z foo bar
3535+
(1 row)
3536+
3537+
explain (costs off)
3538+
select * from test2 where t ~ '[abc]{3}';
3539+
QUERY PLAN
3540+
--------------------------------------------
3541+
Bitmap Heap Scan on test2
3542+
Recheck Cond: (t ~ '[abc]{3}'::text)
3543+
-> Bitmap Index Scan on test2_idx_gin
3544+
Index Cond: (t ~ '[abc]{3}'::text)
3545+
(4 rows)
3546+
3547+
explain (costs off)
3548+
select * from test2 where t ~* 'DEF';
3549+
QUERY PLAN
3550+
------------------------------------------
3551+
Bitmap Heap Scan on test2
3552+
Recheck Cond: (t ~* 'DEF'::text)
3553+
-> Bitmap Index Scan on test2_idx_gin
3554+
Index Cond: (t ~* 'DEF'::text)
3555+
(4 rows)
3556+
3557+
select * from test2 where t ~ '[abc]{3}';
3558+
t
3559+
--------
3560+
abcdef
3561+
(1 row)
3562+
3563+
select * from test2 where t ~ 'a[bc]+d';
3564+
t
3565+
--------
3566+
abcdef
3567+
(1 row)
3568+
3569+
select * from test2 where t ~ '(abc)*$';
3570+
t
3571+
-------------
3572+
abcdef
3573+
quark
3574+
z foo bar
3575+
(3 rows)
3576+
3577+
select * from test2 where t ~* 'DEF';
3578+
t
3579+
--------
3580+
abcdef
3581+
(1 row)
3582+
3583+
select * from test2 where t ~ 'dEf';
3584+
t
3585+
---
3586+
(0 rows)
3587+
3588+
select * from test2 where t ~* '^q';
3589+
t
3590+
-------
3591+
quark
3592+
(1 row)
3593+
3594+
select * from test2 where t ~* '[abc]{3}[def]{3}';
3595+
t
3596+
--------
3597+
abcdef
3598+
(1 row)
3599+
3600+
select * from test2 where t ~* 'ab[a-z]{3}';
3601+
t
3602+
--------
3603+
abcdef
3604+
(1 row)
3605+
3606+
select * from test2 where t ~* '(^| )qua';
3607+
t
3608+
-------
3609+
quark
3610+
(1 row)
3611+
3612+
select * from test2 where t ~ 'q.*rk$';
3613+
t
3614+
-------
3615+
quark
3616+
(1 row)
3617+
3618+
select * from test2 where t ~ 'q';
3619+
t
3620+
-------
3621+
quark
3622+
(1 row)
3623+
3624+
select * from test2 where t ~ '[a-z]{3}';
3625+
t
3626+
-------------
3627+
abcdef
3628+
quark
3629+
z foo bar
3630+
(3 rows)
3631+
3632+
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
3633+
t
3634+
---
3635+
(0 rows)
3636+
3637+
select * from test2 where t ~ 'z foo bar';
3638+
t
3639+
-------------
3640+
z foo bar
3641+
(1 row)
3642+
3643+
select * from test2 where t ~ ' z foo bar';
3644+
t
3645+
-------------
3646+
z foo bar
3647+
(1 row)
3648+
3649+
select * from test2 where t ~ ' z foo bar';
3650+
t
3651+
-------------
3652+
z foo bar
3653+
(1 row)
3654+
3655+
select * from test2 where t ~ ' z foo';
3656+
t
3657+
-------------
3658+
z foo bar
3659+
(1 row)
3660+
35243661
drop index test2_idx_gin;
35253662
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
35263663
set enable_seqscan=off;

‎contrib/pg_trgm/pg_trgm--1.0--1.1.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/* contrib/pg_trgm/pg_trgm--1.0--1.1.sql*/
2+
3+
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
4+
\echo Use"ALTER EXTENSION pg_trgm UPDATE TO '1.1'" to load this file. \quit
5+
6+
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
7+
OPERATOR5 pg_catalog.~ (text,text),
8+
OPERATOR6 pg_catalog.~* (text,text);

‎contrib/pg_trgm/pg_trgm--1.0.sqlrenamed to‎contrib/pg_trgm/pg_trgm--1.1.sql

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* contrib/pg_trgm/pg_trgm--1.0.sql*/
1+
/* contrib/pg_trgm/pg_trgm--1.1.sql*/
22

33
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
44
\echo Use"CREATE EXTENSION pg_trgm" to load this file. \quit
@@ -164,3 +164,9 @@ AS
164164
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
165165
OPERATOR3 pg_catalog.~~ (text,text),
166166
OPERATOR4 pg_catalog.~~* (text,text);
167+
168+
-- Add operators that are new in 9.3.
169+
170+
ALTEROPERATOR FAMILY gin_trgm_ops USING gin ADD
171+
OPERATOR5 pg_catalog.~ (text,text),
172+
OPERATOR6 pg_catalog.~* (text,text);

‎contrib/pg_trgm/pg_trgm.control

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# pg_trgm extension
22
comment = 'text similarity measurement and index searching based on trigrams'
3-
default_version = '1.0'
3+
default_version = '1.1'
44
module_pathname = '$libdir/pg_trgm'
55
relocatable = true

‎contrib/pg_trgm/sql/pg_trgm.sql

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ select similarity('---', '####---');
1515

1616
CREATETABLEtest_trgm(ttext);
1717

18-
\copy test_trgmfrom'data/trgm.data
18+
\copy test_trgmfrom'data/trgm.data'
1919

2020
select t,similarity(t,'qwertyu0988')as smlfrom test_trgmwhere t %'qwertyu0988'order by smldesc, t;
2121
select t,similarity(t,'gwertyu0988')as smlfrom test_trgmwhere t %'gwertyu0988'order by smldesc, t;
@@ -43,6 +43,7 @@ select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu198
4343
createtabletest2(ttext);
4444
insert into test2values ('abcdef');
4545
insert into test2values ('quark');
46+
insert into test2values (' z foo bar');
4647
createindextest2_idx_ginon test2 using gin (t gin_trgm_ops);
4748
set enable_seqscan=off;
4849
explain (costs off)
@@ -54,6 +55,29 @@ select * from test2 where t like '%bcd%';
5455
select*from test2where tlike E'%\\bcd%';
5556
select*from test2where t ilike'%BCD%';
5657
select*from test2where t ilike'qua%';
58+
select*from test2where tlike'%z foo bar%';
59+
select*from test2where tlike' z foo%';
60+
explain (costs off)
61+
select*from test2where t ~'[abc]{3}';
62+
explain (costs off)
63+
select*from test2where t ~*'DEF';
64+
select*from test2where t ~'[abc]{3}';
65+
select*from test2where t ~'a[bc]+d';
66+
select*from test2where t ~'(abc)*$';
67+
select*from test2where t ~*'DEF';
68+
select*from test2where t ~'dEf';
69+
select*from test2where t ~*'^q';
70+
select*from test2where t ~*'[abc]{3}[def]{3}';
71+
select*from test2where t ~*'ab[a-z]{3}';
72+
select*from test2where t ~*'(^| )qua';
73+
select*from test2where t ~'q.*rk$';
74+
select*from test2where t ~'q';
75+
select*from test2where t ~'[a-z]{3}';
76+
select*from test2where t ~*'(a{10}|b{10}|c{10}){10}';
77+
select*from test2where t ~'z foo bar';
78+
select*from test2where t ~' z foo bar';
79+
select*from test2where t ~' z foo bar';
80+
select*from test2where t ~' z foo';
5781
dropindex test2_idx_gin;
5882
createindextest2_idx_giston test2 using gist (t gist_trgm_ops);
5983
set enable_seqscan=off;

‎contrib/pg_trgm/trgm.h

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,20 @@
77
#include"access/gist.h"
88
#include"access/itup.h"
99
#include"storage/bufpage.h"
10-
#include"utils/builtins.h"
1110

12-
/* options */
11+
/*
12+
* Options ... but note that trgm_regexp.c effectively assumes these values
13+
* of LPADDING and RPADDING.
14+
*/
1315
#defineLPADDING2
1416
#defineRPADDING1
1517
#defineKEEPONLYALNUM
1618
/*
1719
* Caution: IGNORECASE macro means that trigrams are case-insensitive.
18-
* If this macro is disabled, the ~~* operator must be removed from the
19-
* operator classes, because we can't handle case-insensitive wildcard search
20-
* with case-sensitive trigrams. Failure to do this will result in "cannot
21-
* handle ~~* with case-sensitive trigrams" errors.
20+
* If this macro is disabled, the ~* and ~~* operators must be removed from
21+
*theoperator classes, because we can't handle case-insensitive wildcard
22+
*searchwith case-sensitive trigrams. Failure to do this will result in
23+
*"cannothandle ~*(~~*) with case-sensitive trigrams" errors.
2224
*/
2325
#defineIGNORECASE
2426
#defineDIVUNION
@@ -28,6 +30,8 @@
2830
#defineDistanceStrategyNumber2
2931
#defineLikeStrategyNumber3
3032
#defineILikeStrategyNumber4
33+
#defineRegExpStrategyNumber5
34+
#defineRegExpICaseStrategyNumber6
3135

3236

3337
typedefchartrgm[3];
@@ -42,11 +46,11 @@ typedef char trgm[3];
4246
*(((char*)(a))+2) = *(((char*)(b))+2);\
4347
} while(0);
4448

45-
uint32trgm2int(trgm*ptr);
46-
4749
#ifdefKEEPONLYALNUM
50+
#defineISWORDCHR(c)(t_isalpha(c) || t_isdigit(c))
4851
#defineISPRINTABLECHAR(a)( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') )
4952
#else
53+
#defineISWORDCHR(c)(!t_isspace(c))
5054
#defineISPRINTABLECHAR(a)( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) )
5155
#endif
5256
#defineISPRINTABLETRGM(t)( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) )
@@ -99,11 +103,18 @@ typedef char *BITVECP;
99103
#defineGETARR(x)( (trgm*)( (char*)x+TRGMHDRSIZE ) )
100104
#defineARRNELEM(x) ( ( VARSIZE(x) - TRGMHDRSIZE )/sizeof(trgm) )
101105

106+
typedefstructTrgmPackedGraphTrgmPackedGraph;
107+
102108
externfloat4trgm_limit;
103109

104-
TRGM*generate_trgm(char*str,intslen);
105-
TRGM*generate_wildcard_trgm(constchar*str,intslen);
106-
float4cnt_sml(TRGM*trg1,TRGM*trg2);
107-
booltrgm_contained_by(TRGM*trg1,TRGM*trg2);
110+
externuint32trgm2int(trgm*ptr);
111+
externvoidcompact_trigram(trgm*tptr,char*str,intbytelen);
112+
externTRGM*generate_trgm(char*str,intslen);
113+
externTRGM*generate_wildcard_trgm(constchar*str,intslen);
114+
externfloat4cnt_sml(TRGM*trg1,TRGM*trg2);
115+
externbooltrgm_contained_by(TRGM*trg1,TRGM*trg2);
116+
externTRGM*createTrgmNFA(text*text_re,TrgmPackedGraph**graph,
117+
Oidcollation);
118+
externbooltrigramsMatchGraph(TrgmPackedGraph*graph,bool*check);
108119

109120
#endif/* __TRGM_H__ */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp