Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit324300b

Browse files
committed
improve support of agglutinative languages (query with compound words).
regression=# select to_tsquery( '\'fotballklubber\''); to_tsquery------------------------------------------------ 'fotball' & 'klubb' | 'fot' & 'ball' & 'klubb'(1 row)So, changed interface to dictionaries, lexize method of dictionary shoud returnpointer to aray of TSLexeme structs instead of char**. Last element shouldhave TSLexeme->lexeme == NULL.typedef struct { /* number of variant of split word , for example Word 'fotballklubber' (norwegian) has two varian to split: ( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary should return: nvariant lexeme 1 fotball 1 klubb 2 fot 2 ball 2 klubb */ uint16 nvariant; /* currently unused */ uint16 flags; /* C-string */ char *lexeme;} TSLexeme;
1 parentd314616 commit324300b

File tree

12 files changed

+146
-85
lines changed

12 files changed

+146
-85
lines changed

‎contrib/tsearch2/dict.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ lexize(PG_FUNCTION_ARGS)
183183
{
184184
text*in=PG_GETARG_TEXT_P(1);
185185
DictInfo*dict;
186-
char**res,
187-
**ptr;
186+
TSLexeme*res,
187+
*ptr;
188188
Datum*da;
189189
ArrayType*a;
190190

191191
SET_FUNCOID();
192192
dict=finddict(PG_GETARG_OID(0));
193193

194-
ptr=res= (char**)DatumGetPointer(
194+
ptr=res= (TSLexeme*)DatumGetPointer(
195195
FunctionCall3(&(dict->lexize_info),
196196
PointerGetDatum(dict->dictionary),
197197
PointerGetDatum(VARDATA(in)),
@@ -207,13 +207,13 @@ lexize(PG_FUNCTION_ARGS)
207207
PG_RETURN_NULL();
208208
}
209209

210-
while (*ptr)
210+
while (ptr->lexeme)
211211
ptr++;
212212
da= (Datum*)palloc(sizeof(Datum)* (ptr-res+1));
213213
ptr=res;
214-
while (*ptr)
214+
while (ptr->lexeme)
215215
{
216-
da[ptr-res]=PointerGetDatum(char2text(*ptr));
216+
da[ptr-res]=PointerGetDatum(char2text(ptr->lexeme));
217217
ptr++;
218218
}
219219

@@ -227,10 +227,10 @@ lexize(PG_FUNCTION_ARGS)
227227
);
228228

229229
ptr=res;
230-
while (*ptr)
230+
while (ptr->lexeme)
231231
{
232232
pfree(DatumGetPointer(da[ptr-res]));
233-
pfree(*ptr);
233+
pfree(ptr->lexeme);
234234
ptr++;
235235
}
236236
pfree(res);

‎contrib/tsearch2/dict.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,27 @@ typedef struct
3838

3939
voidparse_cfgdict(text*in,Map**m);
4040

41+
/* return struct for any lexize function */
42+
typedefstruct {
43+
/* number of variant of split word , for example
44+
Word 'fotballklubber' (norwegian) has two varian to split:
45+
( fotball, klubb ) and ( fot, ball, klubb ). So, dictionary
46+
should return:
47+
nvariantlexeme
48+
1fotball
49+
1klubb
50+
2fot
51+
2ball
52+
2klubb
53+
54+
*/
55+
uint16nvariant;
56+
57+
/* currently unused */
58+
uint16flags;
59+
60+
/* C-string */
61+
char*lexeme;
62+
}TSLexeme;
63+
4164
#endif

‎contrib/tsearch2/dict_ex.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,16 +54,16 @@ dex_lexize(PG_FUNCTION_ARGS)
5454
DictExample*d= (DictExample*)PG_GETARG_POINTER(0);
5555
char*in= (char*)PG_GETARG_POINTER(1);
5656
char*txt=pnstrdup(in,PG_GETARG_INT32(2));
57-
char**res=palloc(sizeof(char*)*2);
57+
TSLexeme*res=palloc(sizeof(TSLexeme)*2);
58+
59+
memset(res,0,sizeof(TSLexeme)*2);
5860

5961
if (*txt=='\0'||searchstoplist(&(d->stoplist),txt))
6062
{
6163
pfree(txt);
62-
res[0]=NULL;
6364
}
6465
else
65-
res[0]=txt;
66-
res[1]=NULL;
66+
res[0].lexeme=txt;
6767

6868
PG_RETURN_POINTER(res);
6969
}

‎contrib/tsearch2/dict_ispell.c

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -159,14 +159,13 @@ spell_lexize(PG_FUNCTION_ARGS)
159159
DictISpell*d= (DictISpell*)PG_GETARG_POINTER(0);
160160
char*in= (char*)PG_GETARG_POINTER(1);
161161
char*txt;
162-
char**res;
163-
char**ptr,
164-
**cptr;
162+
TSLexeme*res;
163+
TSLexeme*ptr,
164+
*cptr;
165165

166166
if (!PG_GETARG_INT32(2))
167167
PG_RETURN_POINTER(NULL);
168168

169-
res=palloc(sizeof(char*)*2);
170169
txt=pnstrdup(in,PG_GETARG_INT32(2));
171170
res=NINormalizeWord(&(d->obj),txt);
172171
pfree(txt);
@@ -175,22 +174,22 @@ spell_lexize(PG_FUNCTION_ARGS)
175174
PG_RETURN_POINTER(NULL);
176175

177176
ptr=cptr=res;
178-
while (*ptr)
177+
while (ptr->lexeme)
179178
{
180-
if (searchstoplist(&(d->stoplist),*ptr))
179+
if (searchstoplist(&(d->stoplist),ptr->lexeme))
181180
{
182-
pfree(*ptr);
183-
*ptr=NULL;
181+
pfree(ptr->lexeme);
182+
ptr->lexeme=NULL;
184183
ptr++;
185184
}
186185
else
187186
{
188-
*cptr=*ptr;
187+
memcpy(cptr,ptr,sizeof(TSLexeme));
189188
cptr++;
190189
ptr++;
191190
}
192191
}
193-
*cptr=NULL;
192+
cptr->lexeme=NULL;
194193

195194
PG_RETURN_POINTER(res);
196195
}

‎contrib/tsearch2/dict_snowball.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,12 @@ snb_lexize(PG_FUNCTION_ARGS)
105105
DictSnowball*d= (DictSnowball*)PG_GETARG_POINTER(0);
106106
char*in= (char*)PG_GETARG_POINTER(1);
107107
char*txt=pnstrdup(in,PG_GETARG_INT32(2));
108-
char**res=palloc(sizeof(char*)*2);
108+
TSLexeme*res=palloc(sizeof(TSLexeme)*2);
109109

110+
memset(res,0,sizeof(TSLexeme)*2);
110111
if (*txt=='\0'||searchstoplist(&(d->stoplist),txt))
111112
{
112113
pfree(txt);
113-
res[0]=NULL;
114114
}
115115
else
116116
{
@@ -122,10 +122,8 @@ snb_lexize(PG_FUNCTION_ARGS)
122122
memcpy(txt,d->z->p,d->z->l);
123123
txt[d->z->l]='\0';
124124
}
125-
res[0]=txt;
125+
res->lexeme=txt;
126126
}
127-
res[1]=NULL;
128-
129127

130128
PG_RETURN_POINTER(res);
131129
}

‎contrib/tsearch2/dict_syn.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ syn_lexize(PG_FUNCTION_ARGS)
162162
char*in= (char*)PG_GETARG_POINTER(1);
163163
Synkey,
164164
*found;
165-
char**res=NULL;
165+
TSLexeme*res=NULL;
166166

167167
if (!PG_GETARG_INT32(2))
168168
PG_RETURN_POINTER(NULL);
@@ -176,10 +176,9 @@ syn_lexize(PG_FUNCTION_ARGS)
176176
if (!found)
177177
PG_RETURN_POINTER(NULL);
178178

179-
res=palloc(sizeof(char*)*2);
180-
181-
res[0]=pstrdup(found->out);
182-
res[1]=NULL;
179+
res=palloc(sizeof(TSLexeme)*2);
180+
memset(res,0,sizeof(TSLexeme)*2);
181+
res[0].lexeme=pstrdup(found->out);
183182

184183
PG_RETURN_POINTER(res);
185184
}

‎contrib/tsearch2/gendict/dict_tmpl.c.IN

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) {
5252
HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0);
5353
char *in = (char*)PG_GETARG_POINTER(1);
5454
char *txt = pnstrdup(in, PG_GETARG_INT32(2));
55-
char**res=palloc(sizeof(char*)*2);
55+
TSLexeme*res=palloc(sizeof(TSLexeme*)*2);
5656

57-
/* YourINIT dictionary code */
57+
/* YourLEXIZE dictionary code */
5858
HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) {
5959
HASINIT pfree(txt);
60-
HASINIT res[0]=NULL;
60+
HASINIT res[0].lexeme=NULL;
6161
HASINIT } else
62-
res[0]=txt;
63-
res[1]=NULL;
62+
res[0].lexeme=txt;
63+
res[1].lexeme=NULL;
6464

6565
PG_RETURN_POINTER(res);
6666
}

‎contrib/tsearch2/ispell/spell.c

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,17 +1119,32 @@ SplitToVariants(IspellDict * Conf, SPNode * snode, SplitVar * orig, char *word,
11191119
returnvar;
11201120
}
11211121

1122-
char**
1122+
TSLexeme*
11231123
NINormalizeWord(IspellDict*Conf,char*word)
11241124
{
11251125
char**res=NormalizeSubWord(Conf,word,0);
1126+
TSLexeme*lcur=NULL,*lres=NULL;
1127+
u_int16_tNVariant=1;
1128+
1129+
if (res) {
1130+
char**ptr=res;
1131+
lcur=lres= (TSLexeme*)palloc(MAX_NORM*sizeof(TSLexeme) );
1132+
while(*ptr) {
1133+
lcur->lexeme=*ptr;
1134+
lcur->flags=0;
1135+
lcur->nvariant=NVariant++;
1136+
lcur++;
1137+
ptr++;
1138+
}
1139+
lcur->lexeme=NULL;
1140+
pfree(res);
1141+
}
11261142

11271143
if (Conf->compoundcontrol!='\t')
11281144
{
11291145
intwordlen=strlen(word);
11301146
SplitVar*ptr,
11311147
*var=SplitToVariants(Conf,NULL,NULL,word,wordlen,0,-1);
1132-
char**cur=res;
11331148
inti;
11341149

11351150
while (var)
@@ -1140,30 +1155,31 @@ NINormalizeWord(IspellDict * Conf, char *word)
11401155

11411156
if (subres)
11421157
{
1143-
char**ptr=subres;
1158+
char**subptr=subres;
1159+
1160+
if ( !lcur )
1161+
lcur=lres= (TSLexeme*)palloc(MAX_NORM*sizeof(TSLexeme) );
1162+
1163+
while(*subptr) {
1164+
for(i=0;i<var->nstem-1;i++) {
1165+
lcur->lexeme=(subptr==subres) ?var->stem[i ] :pstrdup(var->stem[i ]);
1166+
lcur->flags=0;
1167+
lcur->nvariant=NVariant;
1168+
lcur++;
1169+
}
11441170

1145-
if (cur)
1146-
{
1147-
while (*cur)
1148-
cur++;
1149-
}
1150-
else
1151-
res=cur= (char**)palloc(MAX_NORM*sizeof(char*));
1171+
lcur->lexeme=*subptr;
1172+
lcur->flags=0;
1173+
lcur->nvariant=NVariant;
1174+
lcur++;
1175+
subptr++;
1176+
NVariant++;
1177+
}
11521178

1153-
for (i=0;i<var->nstem-1;i++)
1154-
{
1155-
*cur=var->stem[i];
1156-
cur++;
1157-
}
1158-
while (*ptr)
1159-
{
1160-
*cur=*ptr;
1161-
cur++;
1162-
ptr++;
1163-
}
1164-
*cur=NULL;
1179+
lcur->lexeme=NULL;
11651180
pfree(subres);
11661181
var->stem[0]=NULL;
1182+
pfree(var->stem[var->nstem-1 ] );
11671183
}
11681184
}
11691185

@@ -1175,7 +1191,7 @@ NINormalizeWord(IspellDict * Conf, char *word)
11751191
var=ptr;
11761192
}
11771193
}
1178-
returnres;
1194+
returnlres;
11791195
}
11801196

11811197

‎contrib/tsearch2/ispell/spell.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
#include<sys/types.h>
55
#include"regex/regex.h"
6-
#include"regis.h"
76
#include"c.h"
87

9-
8+
#include"regis.h"
9+
#include"dict.h"
10+
1011
structSPNode;
1112

1213

@@ -116,7 +117,7 @@ typedef struct
116117

117118
}IspellDict;
118119

119-
char**NINormalizeWord(IspellDict*Conf,char*word);
120+
TSLexeme*NINormalizeWord(IspellDict*Conf,char*word);
120121
intNIImportAffixes(IspellDict*Conf,constchar*filename);
121122
intNIImportDictionary(IspellDict*Conf,constchar*filename);
122123

‎contrib/tsearch2/query.c

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
265265
{
266266
int4count=0;
267267
PRSTEXTprs;
268+
uint32variant,pos,cntvar=0,cntpos=0,cnt=0;
268269

269270
prs.lenwords=32;
270271
prs.curwords=0;
@@ -273,17 +274,39 @@ pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 we
273274

274275
parsetext_v2(findcfg(state->cfg_id),&prs,strval,lenval);
275276

276-
for (count=0;count<prs.curwords;count++)
277-
{
278-
pushval_asis(state,VAL,prs.words[count].word,prs.words[count].len,weight);
279-
pfree(prs.words[count].word);
280-
if (count)
281-
pushquery(state,OPR, (int4)'&',0,0,0);
282-
}
283-
pfree(prs.words);
277+
if (prs.curwords>0 ) {
278+
279+
while (count<prs.curwords) {
280+
pos=prs.words[count].pos.pos;
281+
cntvar=0;
282+
while(count<prs.curwords&&pos==prs.words[count].pos.pos) {
283+
variant=prs.words[count].nvariant;
284+
285+
cnt=0;
286+
while(count<prs.curwords&&pos==prs.words[count].pos.pos&&variant==prs.words[count].nvariant){
287+
288+
pushval_asis(state,VAL,prs.words[count].word,prs.words[count].len,weight);
289+
pfree(prs.words[count].word);
290+
if (cnt )
291+
pushquery(state,OPR, (int4)'&',0,0,0);
292+
cnt++;
293+
count++;
294+
}
295+
296+
if (cntvar )
297+
pushquery(state,OPR, (int4)'|',0,0,0);
298+
cntvar++;
299+
}
300+
301+
if (cntpos)
302+
pushquery(state,OPR, (int4)'&',0,0,0);
303+
304+
cntpos++;
305+
}
306+
307+
pfree(prs.words);
284308

285-
/* XXX */
286-
if (prs.curwords==0)
309+
}else
287310
pushval_asis(state,VALSTOP,NULL,0,0);
288311
}
289312

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp