Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit25bd9ce

Browse files
committed
Add matchorig, matchsynonyms, and keepsynonyms options to contrib/dict_xsyn.
Sergey Karpov
1 parent23dc89d commit25bd9ce

File tree

4 files changed

+282
-52
lines changed

4 files changed

+282
-52
lines changed

‎contrib/dict_xsyn/dict_xsyn.c

Lines changed: 69 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2007-2009, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.6 2009/01/01 17:23:32 momjian Exp $
9+
* $PostgreSQL: pgsql/contrib/dict_xsyn/dict_xsyn.c,v 1.7 2009/08/05 18:06:49 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -33,7 +33,10 @@ typedef struct
3333
intlen;
3434
Syn*syn;
3535

36+
boolmatchorig;
3637
boolkeeporig;
38+
boolmatchsynonyms;
39+
boolkeepsynonyms;
3740
}DictSyn;
3841

3942

@@ -88,34 +91,45 @@ read_dictionary(DictSyn *d, char *filename)
8891
{
8992
char*value;
9093
char*key;
91-
char*end=NULL;
94+
char*pos;
95+
char*end;
9296

9397
if (*line=='\0')
9498
continue;
9599

96100
value=lowerstr(line);
97101
pfree(line);
98102

99-
key=find_word(value,&end);
100-
if (!key)
103+
pos=value;
104+
while ((key=find_word(pos,&end))!=NULL)
101105
{
102-
pfree(value);
103-
continue;
104-
}
106+
/* Enlarge syn structure if full */
107+
if (cur==d->len)
108+
{
109+
d->len= (d->len>0) ?2*d->len :16;
110+
if (d->syn)
111+
d->syn= (Syn*)repalloc(d->syn,sizeof(Syn)*d->len);
112+
else
113+
d->syn= (Syn*)palloc(sizeof(Syn)*d->len);
114+
}
105115

106-
if (cur==d->len)
107-
{
108-
d->len= (d->len>0) ?2*d->len :16;
109-
if (d->syn)
110-
d->syn= (Syn*)repalloc(d->syn,sizeof(Syn)*d->len);
111-
else
112-
d->syn= (Syn*)palloc(sizeof(Syn)*d->len);
113-
}
116+
/* Save first word only if we will match it */
117+
if (pos!=value||d->matchorig)
118+
{
119+
d->syn[cur].key=pnstrdup(key,end-key);
120+
d->syn[cur].value=pstrdup(value);
114121

115-
d->syn[cur].key=pnstrdup(key,end-key);
116-
d->syn[cur].value=value;
122+
cur++;
123+
}
124+
125+
pos=end;
117126

118-
cur++;
127+
/* Don't bother scanning synonyms if we will not match them */
128+
if (!d->matchsynonyms)
129+
break;
130+
}
131+
132+
pfree(value);
119133
}
120134

121135
tsearch_readline_end(&trst);
@@ -133,23 +147,40 @@ dxsyn_init(PG_FUNCTION_ARGS)
133147
List*dictoptions= (List*)PG_GETARG_POINTER(0);
134148
DictSyn*d;
135149
ListCell*l;
150+
char*filename=NULL;
136151

137152
d= (DictSyn*)palloc0(sizeof(DictSyn));
138153
d->len=0;
139154
d->syn=NULL;
155+
d->matchorig= true;
140156
d->keeporig= true;
157+
d->matchsynonyms= false;
158+
d->keepsynonyms= true;
141159

142160
foreach(l,dictoptions)
143161
{
144162
DefElem*defel= (DefElem*)lfirst(l);
145163

146-
if (pg_strcasecmp(defel->defname,"KEEPORIG")==0)
164+
if (pg_strcasecmp(defel->defname,"MATCHORIG")==0)
165+
{
166+
d->matchorig=defGetBoolean(defel);
167+
}
168+
elseif (pg_strcasecmp(defel->defname,"KEEPORIG")==0)
147169
{
148170
d->keeporig=defGetBoolean(defel);
149171
}
172+
elseif (pg_strcasecmp(defel->defname,"MATCHSYNONYMS")==0)
173+
{
174+
d->matchsynonyms=defGetBoolean(defel);
175+
}
176+
elseif (pg_strcasecmp(defel->defname,"KEEPSYNONYMS")==0)
177+
{
178+
d->keepsynonyms=defGetBoolean(defel);
179+
}
150180
elseif (pg_strcasecmp(defel->defname,"RULES")==0)
151181
{
152-
read_dictionary(d,defGetString(defel));
182+
/* we can't read the rules before parsing all options! */
183+
filename=defGetString(defel);
153184
}
154185
else
155186
{
@@ -160,6 +191,9 @@ dxsyn_init(PG_FUNCTION_ARGS)
160191
}
161192
}
162193

194+
if (filename)
195+
read_dictionary(d,filename);
196+
163197
PG_RETURN_POINTER(d);
164198
}
165199

@@ -194,41 +228,33 @@ dxsyn_lexize(PG_FUNCTION_ARGS)
194228

195229
/* Parse string of synonyms and return array of words */
196230
{
197-
char*value=pstrdup(found->value);
198-
intvalue_length=strlen(value);
199-
char*pos=value;
231+
char*value=found->value;
232+
char*syn;
233+
char*pos;
234+
char*end;
200235
intnsyns=0;
201-
boolis_first= true;
202236

203-
res=palloc(0);
237+
res=palloc(sizeof(TSLexeme));
204238

205-
while (pos<value+value_length)
239+
pos=value;
240+
while ((syn=find_word(pos,&end))!=NULL)
206241
{
207-
char*end;
208-
char*syn=find_word(pos,&end);
209-
210-
if (!syn)
211-
break;
212-
*end='\0';
213-
214242
res=repalloc(res,sizeof(TSLexeme)* (nsyns+2));
215-
res[nsyns].lexeme=NULL;
216243

217-
/* first word isadded to resultonly ifKEEPORIG flag is set */
218-
if (d->keeporig||!is_first)
244+
/*Thefirst word isoutputonly ifkeeporig=true */
245+
if (pos!=value||d->keeporig)
219246
{
220-
res[nsyns].lexeme=pstrdup(syn);
221-
res[nsyns+1].lexeme=NULL;
222-
247+
res[nsyns].lexeme=pnstrdup(syn,end-syn);
223248
nsyns++;
224249
}
225250

226-
is_first=false;
251+
pos=end;
227252

228-
pos=end+1;
253+
/* Stop if we are not to output the synonyms */
254+
if (!d->keepsynonyms)
255+
break;
229256
}
230-
231-
pfree(value);
257+
res[nsyns].lexeme=NULL;
232258
}
233259

234260
PG_RETURN_POINTER(res);

‎contrib/dict_xsyn/expected/dict_xsyn.out

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,76 @@
55
SET client_min_messages = warning;
66
\set ECHO none
77
RESET client_min_messages;
8-
--configuration
9-
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
8+
-- defaultconfiguration - match first word and return it among with all synonyms
9+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1010
--lexize
1111
SELECT ts_lexize('xsyn', 'supernova');
12+
ts_lexize
13+
--------------------------
14+
{supernova,sn,sne,1987a}
15+
(1 row)
16+
17+
SELECT ts_lexize('xsyn', 'sn');
18+
ts_lexize
19+
-----------
20+
21+
(1 row)
22+
23+
SELECT ts_lexize('xsyn', 'grb');
24+
ts_lexize
25+
-----------
26+
27+
(1 row)
28+
29+
-- the same, but return only synonyms
30+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
31+
SELECT ts_lexize('xsyn', 'supernova');
32+
ts_lexize
33+
----------------
34+
{sn,sne,1987a}
35+
(1 row)
36+
37+
SELECT ts_lexize('xsyn', 'sn');
38+
ts_lexize
39+
-----------
40+
41+
(1 row)
42+
43+
SELECT ts_lexize('xsyn', 'grb');
44+
ts_lexize
45+
-----------
46+
47+
(1 row)
48+
49+
-- match any word and return all words
50+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn', 'supernova');
52+
ts_lexize
53+
--------------------------
54+
{supernova,sn,sne,1987a}
55+
(1 row)
56+
57+
SELECT ts_lexize('xsyn', 'sn');
58+
ts_lexize
59+
--------------------------
60+
{supernova,sn,sne,1987a}
61+
(1 row)
62+
63+
SELECT ts_lexize('xsyn', 'grb');
64+
ts_lexize
65+
-----------
66+
67+
(1 row)
68+
69+
-- match any word and return all words except first one
70+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
71+
SELECT ts_lexize('xsyn', 'supernova');
72+
ts_lexize
73+
----------------
74+
{sn,sne,1987a}
75+
(1 row)
76+
77+
SELECT ts_lexize('xsyn', 'sn');
1278
ts_lexize
1379
----------------
1480
{sn,sne,1987a}
@@ -20,3 +86,63 @@ SELECT ts_lexize('xsyn', 'grb');
2086

2187
(1 row)
2288

89+
-- match any synonym but not first word, and return first word instead
90+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
91+
SELECT ts_lexize('xsyn', 'supernova');
92+
ts_lexize
93+
-----------
94+
95+
(1 row)
96+
97+
SELECT ts_lexize('xsyn', 'sn');
98+
ts_lexize
99+
-------------
100+
{supernova}
101+
(1 row)
102+
103+
SELECT ts_lexize('xsyn', 'grb');
104+
ts_lexize
105+
-----------
106+
107+
(1 row)
108+
109+
-- do not match or return anything
110+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
111+
SELECT ts_lexize('xsyn', 'supernova');
112+
ts_lexize
113+
-----------
114+
115+
(1 row)
116+
117+
SELECT ts_lexize('xsyn', 'sn');
118+
ts_lexize
119+
-----------
120+
121+
(1 row)
122+
123+
SELECT ts_lexize('xsyn', 'grb');
124+
ts_lexize
125+
-----------
126+
127+
(1 row)
128+
129+
-- match any word but return nothing
130+
ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
131+
SELECT ts_lexize('xsyn', 'supernova');
132+
ts_lexize
133+
-----------
134+
{}
135+
(1 row)
136+
137+
SELECT ts_lexize('xsyn', 'sn');
138+
ts_lexize
139+
-----------
140+
{}
141+
(1 row)
142+
143+
SELECT ts_lexize('xsyn', 'grb');
144+
ts_lexize
145+
-----------
146+
147+
(1 row)
148+

‎contrib/dict_xsyn/sql/dict_xsyn.sql

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,46 @@ SET client_min_messages = warning;
88
\set ECHO all
99
RESET client_min_messages;
1010

11-
--configuration
12-
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
11+
-- defaultconfiguration - match first word and return it among with all synonyms
12+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
1313

1414
--lexize
1515
SELECT ts_lexize('xsyn','supernova');
16+
SELECT ts_lexize('xsyn','sn');
17+
SELECT ts_lexize('xsyn','grb');
18+
19+
-- the same, but return only synonyms
20+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
21+
SELECT ts_lexize('xsyn','supernova');
22+
SELECT ts_lexize('xsyn','sn');
23+
SELECT ts_lexize('xsyn','grb');
24+
25+
-- match any word and return all words
26+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
27+
SELECT ts_lexize('xsyn','supernova');
28+
SELECT ts_lexize('xsyn','sn');
29+
SELECT ts_lexize('xsyn','grb');
30+
31+
-- match any word and return all words except first one
32+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
33+
SELECT ts_lexize('xsyn','supernova');
34+
SELECT ts_lexize('xsyn','sn');
35+
SELECT ts_lexize('xsyn','grb');
36+
37+
-- match any synonym but not first word, and return first word instead
38+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
39+
SELECT ts_lexize('xsyn','supernova');
40+
SELECT ts_lexize('xsyn','sn');
41+
SELECT ts_lexize('xsyn','grb');
42+
43+
-- do not match or return anything
44+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
45+
SELECT ts_lexize('xsyn','supernova');
46+
SELECT ts_lexize('xsyn','sn');
47+
SELECT ts_lexize('xsyn','grb');
48+
49+
-- match any word but return nothing
50+
ALTERTEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
51+
SELECT ts_lexize('xsyn','supernova');
52+
SELECT ts_lexize('xsyn','sn');
1653
SELECT ts_lexize('xsyn','grb');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp