Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitabd8c94

Browse files
committed
Add prefix support for synonym dictionary
1 parent0c73808 commitabd8c94

File tree

5 files changed

+111
-8
lines changed

5 files changed

+111
-8
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.52 2009/06/17 21:58:49 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.53 2009/08/14 14:53:20 teodor Exp $ -->
22

33
<chapter id="textsearch">
44
<title id="textsearch-title">Full Text Search</title>
@@ -2288,6 +2288,63 @@ SELECT * FROM ts_debug('english', 'Paris');
22882288
asciiword | Word, all ASCII | Paris | {my_synonym,english_stem} | my_synonym | {paris}
22892289
</programlisting>
22902290
</para>
2291+
2292+
<para>
2293+
An asterisk (<literal>*</literal>) at the end of definition word indicates
2294+
that definition word is a prefix, and <function>to_tsquery()</function>
2295+
function will transform that definition to the prefix search format (see
2296+
<xref linkend="textsearch-parsing-queries">).
2297+
Notice that it is ignored in <function>to_tsvector()</function>.
2298+
</para>
2299+
2300+
<para>
2301+
Contents of <filename>$SHAREDIR/tsearch_data/synonym_sample.syn</>:
2302+
</para>
2303+
<programlisting>
2304+
postgres pgsql
2305+
postgresql pgsql
2306+
postgre pgsql
2307+
gogle googl
2308+
indices index*
2309+
</programlisting>
2310+
2311+
<para>
2312+
Results:
2313+
</para>
2314+
<programlisting>
2315+
=# create text search dictionary syn( template=synonym,synonyms='synonym_sample');
2316+
=# select ts_lexize('syn','indices');
2317+
ts_lexize
2318+
-----------
2319+
{index}
2320+
(1 row)
2321+
2322+
=# create text search configuration tst ( copy=simple);
2323+
=# alter text search configuration tst alter mapping for asciiword with syn;
2324+
=# select to_tsquery('tst','indices');
2325+
to_tsquery
2326+
------------
2327+
'index':*
2328+
(1 row)
2329+
2330+
=# select 'indexes are very useful'::tsvector;
2331+
tsvector
2332+
---------------------------------
2333+
'are' 'indexes' 'useful' 'very'
2334+
(1 row)
2335+
2336+
=# select 'indexes are very useful'::tsvector @@ to_tsquery('tst','indices');
2337+
?column?
2338+
----------
2339+
t
2340+
(1 row)
2341+
2342+
=# select to_tsvector('tst','indices');
2343+
to_tsvector
2344+
-------------
2345+
'index':1
2346+
(1 row)
2347+
</programlisting>
22912348

22922349
<para>
22932350
The only parameter required by the <literal>synonym</> template is

‎src/backend/tsearch/dict_synonym.c

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.10 2009/01/01 17:23:48 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.11 2009/08/14 14:53:20 teodor Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,6 +23,8 @@ typedef struct
2323
{
2424
char*in;
2525
char*out;
26+
intoutlen;
27+
uint16flags;
2628
}Syn;
2729

2830
typedefstruct
@@ -36,11 +38,14 @@ typedef struct
3638
* Finds the next whitespace-delimited word within the 'in' string.
3739
* Returns a pointer to the first character of the word, and a pointer
3840
* to the next byte after the last character in the word (in *end).
41+
* Character '*' at the end of word will not be threated as word
42+
* charater if flags is not null.
3943
*/
4044
staticchar*
41-
findwrd(char*in,char**end)
45+
findwrd(char*in,char**end,uint16*flags)
4246
{
4347
char*start;
48+
char*lastchar;
4449

4550
/* Skip leading spaces */
4651
while (*in&&t_isspace(in))
@@ -53,13 +58,27 @@ findwrd(char *in, char **end)
5358
returnNULL;
5459
}
5560

56-
start=in;
61+
lastchar=start=in;
5762

5863
/* Find end of word */
5964
while (*in&& !t_isspace(in))
65+
{
66+
lastchar=in;
6067
in+=pg_mblen(in);
68+
}
69+
70+
if (in-lastchar==1&&t_iseq(lastchar,'*')&&flags )
71+
{
72+
*flags=TSL_PREFIX;
73+
*end=lastchar;
74+
}
75+
else
76+
{
77+
if (flags)
78+
*flags=0;
79+
*end=in;
80+
}
6181

62-
*end=in;
6382
returnstart;
6483
}
6584

@@ -84,6 +103,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
84103
*end=NULL;
85104
intcur=0;
86105
char*line=NULL;
106+
uint16flags=0;
87107

88108
foreach(l,dictoptions)
89109
{
@@ -117,7 +137,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
117137

118138
while ((line=tsearch_readline(&trst))!=NULL)
119139
{
120-
starti=findwrd(line,&end);
140+
starti=findwrd(line,&end,NULL);
121141
if (!starti)
122142
{
123143
/* Empty line */
@@ -130,7 +150,7 @@ dsynonym_init(PG_FUNCTION_ARGS)
130150
}
131151
*end='\0';
132152

133-
starto=findwrd(end+1,&end);
153+
starto=findwrd(end+1,&end,&flags);
134154
if (!starto)
135155
{
136156
/* A line with only one word (+whitespace). Ignore silently. */
@@ -168,6 +188,9 @@ dsynonym_init(PG_FUNCTION_ARGS)
168188
d->syn[cur].out=lowerstr(starto);
169189
}
170190

191+
d->syn[cur].outlen=strlen(starto);
192+
d->syn[cur].flags=flags;
193+
171194
cur++;
172195

173196
skipline:
@@ -212,7 +235,8 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
212235
PG_RETURN_POINTER(NULL);
213236

214237
res=palloc0(sizeof(TSLexeme)*2);
215-
res[0].lexeme=pstrdup(found->out);
238+
res[0].lexeme=pnstrdup(found->out,found->outlen);
239+
res[0].flags=found->flags;
216240

217241
PG_RETURN_POINTER(res);
218242
}

‎src/backend/tsearch/synonym_sample.syn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ postgrespgsql
22
postgresqlpgsql
33
postgrepgsql
44
goglegoogl
5+
indicesindex*

‎src/test/regress/expected/tsdicts.out

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ SELECT ts_lexize('synonym', 'Gogle');
208208
{googl}
209209
(1 row)
210210

211+
SELECT ts_lexize('synonym', 'indices');
212+
ts_lexize
213+
-----------
214+
{index}
215+
(1 row)
216+
211217
-- Create and simple test thesaurus dictionary
212218
-- More tests in configuration checks because ts_lexize()
213219
-- cannot pass more than one word to thesaurus.
@@ -290,6 +296,18 @@ SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead
290296
'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
291297
(1 row)
292298

299+
SELECT to_tsvector('synonym_tst', 'Indexes or indices - Which is right plural form of index?');
300+
to_tsvector
301+
----------------------------------------------
302+
'form':8 'index':1,3,10 'plural':7 'right':6
303+
(1 row)
304+
305+
SELECT to_tsquery('synonym_tst', 'Index & indices');
306+
to_tsquery
307+
---------------------
308+
'index' & 'index':*
309+
(1 row)
310+
293311
-- test thesaurus in configuration
294312
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector
295313
CREATE TEXT SEARCH CONFIGURATION thesaurus_tst (

‎src/test/regress/sql/tsdicts.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ CREATE TEXT SEARCH DICTIONARY synonym (
5656

5757
SELECT ts_lexize('synonym','PoStGrEs');
5858
SELECT ts_lexize('synonym','Gogle');
59+
SELECT ts_lexize('synonym','indices');
5960

6061
-- Create and simple test thesaurus dictionary
6162
-- More tests in configuration checks because ts_lexize()
@@ -104,6 +105,8 @@ ALTER TEXT SEARCH CONFIGURATION synonym_tst ALTER MAPPING FOR
104105

105106
SELECT to_tsvector('synonym_tst','Postgresql is often called as postgres or pgsql and pronounced as postgre');
106107
SELECT to_tsvector('synonym_tst','Most common mistake is to write Gogle instead of Google');
108+
SELECT to_tsvector('synonym_tst','Indexes or indices - Which is right plural form of index?');
109+
SELECT to_tsquery('synonym_tst','Index & indices');
107110

108111
-- test thesaurus in configuration
109112
-- see thesaurus_sample.ths to understand 'odd' resulting tsvector

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp