Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7351b5f

Browse files
committed
Cleanup for some problems in tsearch patch:
- ispell initialization crashed on empty dictionary file- ispell initialization crashed on affix file with prefixes but no suffixes- stop words file was run through pg_verify_mbstr, with database encoding, but it's supposed to be UTF-8; similar bug for synonym files- bunch of comments added, typos fixed, and other cleanupIntroduced consistent encoding checking/conversion of data read from tsearchconfiguration files, by doing this in a single t_readline() subroutine(replacing direct usages of fgets). Cleaned up API for readstopwords too.Heikki Linnakangas
1 parentb918bf8 commit7351b5f

File tree

14 files changed

+341
-328
lines changed

14 files changed

+341
-328
lines changed

‎src/backend/snowball/dict_snowball.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/snowball/dict_snowball.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -192,7 +192,6 @@ dsnowball_init(PG_FUNCTION_ARGS)
192192
ListCell*l;
193193

194194
d= (DictSnowball*)palloc0(sizeof(DictSnowball));
195-
d->stoplist.wordop=recode_and_lowerstr;
196195

197196
foreach(l,dictoptions)
198197
{
@@ -204,8 +203,7 @@ dsnowball_init(PG_FUNCTION_ARGS)
204203
ereport(ERROR,
205204
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
206205
errmsg("multiple StopWords parameters")));
207-
readstoplist(defGetString(defel),&d->stoplist);
208-
sortstoplist(&d->stoplist);
206+
readstoplist(defGetString(defel),&d->stoplist,lowerstr);
209207
stoploaded= true;
210208
}
211209
elseif (pg_strcasecmp("Language",defel->defname)==0)

‎src/backend/tsearch/dict_ispell.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_ispell.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -39,7 +39,6 @@ dispell_init(PG_FUNCTION_ARGS)
3939
ListCell*l;
4040

4141
d= (DictISpell*)palloc0(sizeof(DictISpell));
42-
d->stoplist.wordop=recode_and_lowerstr;
4342

4443
foreach(l,dictoptions)
4544
{
@@ -73,8 +72,7 @@ dispell_init(PG_FUNCTION_ARGS)
7372
ereport(ERROR,
7473
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
7574
errmsg("multiple StopWords parameters")));
76-
readstoplist(defGetString(defel),&(d->stoplist));
77-
sortstoplist(&(d->stoplist));
75+
readstoplist(defGetString(defel),&(d->stoplist),lowerstr);
7876
stoploaded= true;
7977
}
8078
else

‎src/backend/tsearch/dict_simple.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,19 +23,17 @@
2323
typedefstruct
2424
{
2525
StopListstoplist;
26-
}DictExample;
26+
}DictSimple;
2727

2828

2929
Datum
3030
dsimple_init(PG_FUNCTION_ARGS)
3131
{
3232
List*dictoptions= (List*)PG_GETARG_POINTER(0);
33-
DictExample*d= (DictExample*)palloc0(sizeof(DictExample));
33+
DictSimple*d= (DictSimple*)palloc0(sizeof(DictSimple));
3434
boolstoploaded= false;
3535
ListCell*l;
3636

37-
d->stoplist.wordop=recode_and_lowerstr;
38-
3937
foreach(l,dictoptions)
4038
{
4139
DefElem*defel= (DefElem*)lfirst(l);
@@ -46,8 +44,7 @@ dsimple_init(PG_FUNCTION_ARGS)
4644
ereport(ERROR,
4745
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4846
errmsg("multiple StopWords parameters")));
49-
readstoplist(defGetString(defel),&d->stoplist);
50-
sortstoplist(&d->stoplist);
47+
readstoplist(defGetString(defel),&d->stoplist,lowerstr);
5148
stoploaded= true;
5249
}
5350
else
@@ -65,16 +62,16 @@ dsimple_init(PG_FUNCTION_ARGS)
6562
Datum
6663
dsimple_lexize(PG_FUNCTION_ARGS)
6764
{
68-
DictExample*d= (DictExample*)PG_GETARG_POINTER(0);
65+
DictSimple*d= (DictSimple*)PG_GETARG_POINTER(0);
6966
char*in= (char*)PG_GETARG_POINTER(1);
7067
int32len=PG_GETARG_INT32(2);
71-
char*txt=lowerstr_with_len(in,len);
68+
char*txt;
7269
TSLexeme*res=palloc0(sizeof(TSLexeme)*2);
7370

71+
txt=lowerstr_with_len(in,len);
72+
7473
if (*txt=='\0'||searchstoplist(&(d->stoplist),txt))
75-
{
7674
pfree(txt);
77-
}
7875
else
7976
res[0].lexeme=txt;
8077

‎src/backend/tsearch/dict_synonym.c

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.2 2007/08/22 04:13:15 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_synonym.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -20,9 +20,6 @@
2020
#include"tsearch/ts_utils.h"
2121
#include"utils/builtins.h"
2222

23-
24-
#defineSYNBUFLEN4096
25-
2623
typedefstruct
2724
{
2825
char*in;
@@ -31,23 +28,34 @@ typedef struct
3128

3229
typedefstruct
3330
{
34-
intlen;
31+
intlen;/* length of syn array */
3532
Syn*syn;
3633
}DictSyn;
3734

35+
/*
36+
* Finds the next whitespace-delimited word within the 'in' string.
37+
* Returns a pointer to the first character of the word, and a pointer
38+
* to the next byte after the last character in the word (in *end).
39+
*/
3840
staticchar*
3941
findwrd(char*in,char**end)
4042
{
4143
char*start;
4244

43-
*end=NULL;
45+
/* Skip leading spaces */
4446
while (*in&&t_isspace(in))
4547
in+=pg_mblen(in);
4648

49+
/* Return NULL on empty lines */
4750
if (*in=='\0')
51+
{
52+
*end=NULL;
4853
returnNULL;
54+
}
55+
4956
start=in;
5057

58+
/* Find end of word */
5159
while (*in&& !t_isspace(in))
5260
in+=pg_mblen(in);
5361

@@ -70,12 +78,11 @@ dsynonym_init(PG_FUNCTION_ARGS)
7078
ListCell*l;
7179
char*filename=NULL;
7280
FILE*fin;
73-
charbuf[SYNBUFLEN];
7481
char*starti,
7582
*starto,
7683
*end=NULL;
7784
intcur=0;
78-
intslen;
85+
char*line=NULL;
7986

8087
foreach(l,dictoptions)
8188
{
@@ -105,10 +112,33 @@ dsynonym_init(PG_FUNCTION_ARGS)
105112

106113
d= (DictSyn*)palloc0(sizeof(DictSyn));
107114

108-
while (fgets(buf,SYNBUFLEN,fin))
115+
while ((line=t_readline(fin))!=NULL)
109116
{
110-
slen=strlen(buf);
111-
pg_verifymbstr(buf,slen, false);
117+
starti=findwrd(line,&end);
118+
if (!starti)
119+
{
120+
/* Empty line */
121+
gotoskipline;
122+
}
123+
*end='\0';
124+
if (end >=line+strlen(line))
125+
{
126+
/* A line with only one word. Ignore silently. */
127+
gotoskipline;
128+
}
129+
130+
starto=findwrd(end+1,&end);
131+
if (!starto)
132+
{
133+
/* A line with only one word. Ignore silently. */
134+
gotoskipline;
135+
}
136+
*end='\0';
137+
138+
/* starti now points to the first word, and starto to the second
139+
* word on the line, with a \0 terminator at the end of both words.
140+
*/
141+
112142
if (cur==d->len)
113143
{
114144
if (d->len==0)
@@ -123,36 +153,19 @@ dsynonym_init(PG_FUNCTION_ARGS)
123153
}
124154
}
125155

126-
starti=findwrd(buf,&end);
127-
if (!starti)
128-
continue;
129-
*end='\0';
130-
if (end >=buf+slen)
131-
continue;
132-
133-
starto=findwrd(end+1,&end);
134-
if (!starto)
135-
continue;
136-
*end='\0';
137-
138-
d->syn[cur].in=recode_and_lowerstr(starti);
139-
d->syn[cur].out=recode_and_lowerstr(starto);
140-
if (!(d->syn[cur].in&&d->syn[cur].out))
141-
{
142-
FreeFile(fin);
143-
ereport(ERROR,
144-
(errcode(ERRCODE_OUT_OF_MEMORY),
145-
errmsg("out of memory")));
146-
}
156+
d->syn[cur].in=lowerstr(starti);
157+
d->syn[cur].out=lowerstr(starto);
147158

148159
cur++;
160+
161+
skipline:
162+
pfree(line);
149163
}
150164

151165
FreeFile(fin);
152166

153167
d->len=cur;
154-
if (cur>1)
155-
qsort(d->syn,d->len,sizeof(Syn),compareSyn);
168+
qsort(d->syn,d->len,sizeof(Syn),compareSyn);
156169

157170
PG_RETURN_POINTER(d);
158171
}
@@ -179,8 +192,7 @@ dsynonym_lexize(PG_FUNCTION_ARGS)
179192
if (!found)
180193
PG_RETURN_POINTER(NULL);
181194

182-
res=palloc(sizeof(TSLexeme)*2);
183-
memset(res,0,sizeof(TSLexeme)*2);
195+
res=palloc0(sizeof(TSLexeme)*2);
184196
res[0].lexeme=pstrdup(found->out);
185197

186198
PG_RETURN_POINTER(res);

‎src/backend/tsearch/dict_thesaurus.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.2 2007/08/22 01:39:44 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -170,10 +170,10 @@ static void
170170
thesaurusRead(char*filename,DictThesaurus*d)
171171
{
172172
FILE*fh;
173-
charstr[BUFSIZ];
174173
intlineno=0;
175174
uint16idsubst=0;
176175
booluseasis= false;
176+
char*line;
177177

178178
filename=get_tsearch_config_filename(filename,"ths");
179179
fh=AllocateFile(filename,"r");
@@ -183,27 +183,28 @@ thesaurusRead(char *filename, DictThesaurus * d)
183183
errmsg("could not open thesaurus file \"%s\": %m",
184184
filename)));
185185

186-
while (fgets(str,sizeof(str),fh))
186+
while ((line=t_readline(fh))!=NULL)
187187
{
188-
char*ptr,
189-
*recoded;
188+
char*ptr;
190189
intstate=TR_WAITLEX;
191190
char*beginwrd=NULL;
192191
uint16posinsubst=0;
193192
uint16nwrd=0;
194193

195-
ptr=recoded= (char*)pg_do_encoding_conversion((unsignedchar*)str,strlen(str),
196-
GetDatabaseEncoding(),PG_UTF8);
197-
if (recoded==NULL)
198-
elog(ERROR,"encoding conversion failed");
199-
200194
lineno++;
201195

202-
/* is it comment ? */
203-
while (t_isspace(ptr))
196+
ptr=line;
197+
198+
/* is it a comment? */
199+
while (*ptr&&t_isspace(ptr))
204200
ptr+=pg_mblen(ptr);
205-
if (t_iseq(recoded,'#')||*recoded=='\0'||t_iseq(recoded,'\n')||t_iseq(recoded,'\r'))
201+
202+
if (t_iseq(ptr,'#')||*ptr=='\0'||
203+
t_iseq(ptr,'\n')||t_iseq(ptr,'\r'))
204+
{
205+
pfree(line);
206206
continue;
207+
}
207208

208209
while (*ptr)
209210
{
@@ -301,8 +302,7 @@ thesaurusRead(char *filename, DictThesaurus * d)
301302
lineno,filename)));
302303
}
303304

304-
if (recoded!=str)
305-
pfree(recoded);
305+
pfree(line);
306306
}
307307

308308
d->nsubst=idsubst;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp