Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitbb36c51

Browse files
committed
Fix several bugs in tsvectorin, including crash due to uninitialized field and
miscomputation of required palloc size. The crash could only occur if theinput contained lexemes both with and without positions, which is probably notcommon in practice. The miscomputation would definitely result in wastedspace. Also fix some inconsistent coding around alignment of strings andpositions in a tsvector value; these errors could also lead to crashes givenmixed with/without position data and a machine that's picky about alignment.And be more careful about checking for overflow of string offsets.Patch is only against HEAD --- I have not looked to see if same bugs arein back-branch contrib/tsearch2 code.
1 parentf551348 commitbb36c51

File tree

3 files changed

+166
-126
lines changed

3 files changed

+166
-126
lines changed

‎src/backend/tsearch/to_tsany.c

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.4 2007/09/26 10:09:57 teodor Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.5 2007/10/23 00:51:23 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -140,55 +140,64 @@ uniqueWORD(ParsedWord * a, int4 l)
140140
TSVector
141141
make_tsvector(ParsedText*prs)
142142
{
143-
int4i,
143+
inti,
144144
j,
145145
lenstr=0,
146146
totallen;
147147
TSVectorin;
148148
WordEntry*ptr;
149-
char*str,
150-
*cur;
149+
char*str;
150+
intstroff;
151151

152152
prs->curwords=uniqueWORD(prs->words,prs->curwords);
153153
for (i=0;i<prs->curwords;i++)
154154
{
155-
lenstr+=SHORTALIGN(prs->words[i].len);
156-
155+
lenstr+=prs->words[i].len;
157156
if (prs->words[i].alen)
157+
{
158+
lenstr=SHORTALIGN(lenstr);
158159
lenstr+=sizeof(uint16)+prs->words[i].pos.apos[0]*sizeof(WordEntryPos);
160+
}
159161
}
160162

163+
if (lenstr>MAXSTRPOS)
164+
ereport(ERROR,
165+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
166+
errmsg("string is too long for tsvector")));
167+
161168
totallen=CALCDATASIZE(prs->curwords,lenstr);
162169
in= (TSVector)palloc0(totallen);
163170
SET_VARSIZE(in,totallen);
164171
in->size=prs->curwords;
165172

166173
ptr=ARRPTR(in);
167-
cur=str=STRPTR(in);
174+
str=STRPTR(in);
175+
stroff=0;
168176
for (i=0;i<prs->curwords;i++)
169177
{
170178
ptr->len=prs->words[i].len;
171-
if (cur-str>MAXSTRPOS)
172-
ereport(ERROR,
173-
(errcode(ERRCODE_SYNTAX_ERROR),
174-
errmsg("string is too long for tsvector")));
175-
ptr->pos=cur-str;
176-
memcpy((void*)cur, (void*)prs->words[i].word,prs->words[i].len);
179+
ptr->pos=stroff;
180+
memcpy(str+stroff,prs->words[i].word,prs->words[i].len);
181+
stroff+=prs->words[i].len;
177182
pfree(prs->words[i].word);
178-
cur+=SHORTALIGN(prs->words[i].len);
179183
if (prs->words[i].alen)
180184
{
185+
intk=prs->words[i].pos.apos[0];
181186
WordEntryPos*wptr;
182187

188+
if (k>0xFFFF)
189+
elog(ERROR,"positions array too long");
190+
183191
ptr->haspos=1;
184-
*(uint16*)cur=prs->words[i].pos.apos[0];
192+
stroff=SHORTALIGN(stroff);
193+
*(uint16*) (str+stroff)= (uint16)k;
185194
wptr=POSDATAPTR(in,ptr);
186-
for (j=0;j<*(uint16*)cur;j++)
195+
for (j=0;j<k;j++)
187196
{
188197
WEP_SETWEIGHT(wptr[j],0);
189198
WEP_SETPOS(wptr[j],prs->words[i].pos.apos[j+1]);
190199
}
191-
cur+=sizeof(uint16)+prs->words[i].pos.apos[0]*sizeof(WordEntryPos);
200+
stroff+=sizeof(uint16)+k*sizeof(WordEntryPos);
192201
pfree(prs->words[i].pos.apos);
193202
}
194203
else

‎src/backend/utils/adt/tsvector.c

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.6 2007/10/23 00:51:23 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -22,16 +22,18 @@
2222

2323
typedefstruct
2424
{
25-
WordEntryentry;/*should be first! */
25+
WordEntryentry;/*must be first! */
2626
WordEntryPos*pos;
2727
intposlen;/* number of elements in pos */
2828
}WordEntryIN;
2929

30+
31+
/* Compare two WordEntryPos values for qsort */
3032
staticint
3133
comparePos(constvoid*a,constvoid*b)
3234
{
33-
intapos=WEP_GETPOS(*(WordEntryPos*)a);
34-
intbpos=WEP_GETPOS(*(WordEntryPos*)b);
35+
intapos=WEP_GETPOS(*(constWordEntryPos*)a);
36+
intbpos=WEP_GETPOS(*(constWordEntryPos*)b);
3537

3638
if (apos==bpos)
3739
return0;
@@ -53,17 +55,18 @@ uniquePos(WordEntryPos * a, int l)
5355
if (l <=1)
5456
returnl;
5557

56-
res=a;
5758
qsort((void*)a,l,sizeof(WordEntryPos),comparePos);
5859

60+
res=a;
5961
ptr=a+1;
6062
while (ptr-a<l)
6163
{
6264
if (WEP_GETPOS(*ptr)!=WEP_GETPOS(*res))
6365
{
6466
res++;
6567
*res=*ptr;
66-
if (res-a >=MAXNUMPOS-1||WEP_GETPOS(*res)==MAXENTRYPOS-1)
68+
if (res-a >=MAXNUMPOS-1||
69+
WEP_GETPOS(*res)==MAXENTRYPOS-1)
6770
break;
6871
}
6972
elseif (WEP_GETWEIGHT(*ptr)>WEP_GETWEIGHT(*res))
@@ -74,12 +77,13 @@ uniquePos(WordEntryPos * a, int l)
7477
returnres+1-a;
7578
}
7679

80+
/* Compare two WordEntryIN values for qsort */
7781
staticint
7882
compareentry(constvoid*va,constvoid*vb,void*arg)
7983
{
84+
constWordEntryIN*a= (constWordEntryIN*)va;
85+
constWordEntryIN*b= (constWordEntryIN*)vb;
8086
char*BufferStr= (char*)arg;
81-
WordEntryIN*a= (WordEntryIN*)va;
82-
WordEntryIN*b= (WordEntryIN*)vb;
8387

8488
if (a->entry.len==b->entry.len)
8589
{
@@ -91,82 +95,78 @@ compareentry(const void *va, const void *vb, void *arg)
9195
return (a->entry.len>b->entry.len) ?1 :-1;
9296
}
9397

98+
/*
99+
* Sort an array of WordEntryIN, remove duplicates.
100+
* *outbuflen receives the amount of space needed for strings and positions.
101+
*/
94102
staticint
95103
uniqueentry(WordEntryIN*a,intl,char*buf,int*outbuflen)
96104
{
105+
intbuflen;
97106
WordEntryIN*ptr,
98107
*res;
99108

100109
Assert(l >=1);
101110

102-
if (l==1)
103-
{
104-
if (a->entry.haspos)
105-
{
106-
a->poslen=uniquePos(a->pos,a->poslen);
107-
*outbuflen=SHORTALIGN(a->entry.len)+ (a->poslen+1)*sizeof(WordEntryPos);
108-
}
109-
else
110-
*outbuflen=a->entry.len;
111+
if (l>1)
112+
qsort_arg((void*)a,l,sizeof(WordEntryIN),compareentry,
113+
(void*)buf);
111114

112-
returnl;
113-
}
115+
buflen=0;
114116
res=a;
115-
116117
ptr=a+1;
117-
qsort_arg((void*)a,l,sizeof(WordEntryIN),compareentry, (void*)buf);
118-
119118
while (ptr-a<l)
120119
{
121120
if (!(ptr->entry.len==res->entry.len&&
122-
strncmp(&buf[ptr->entry.pos],&buf[res->entry.pos],res->entry.len)==0))
121+
strncmp(&buf[ptr->entry.pos],&buf[res->entry.pos],
122+
res->entry.len)==0))
123123
{
124+
/* done accumulating data into *res, count space needed */
125+
buflen+=res->entry.len;
124126
if (res->entry.haspos)
125127
{
126-
*outbuflen+=SHORTALIGN(res->entry.len);
127128
res->poslen=uniquePos(res->pos,res->poslen);
128-
*outbuflen+=res->poslen*sizeof(WordEntryPos);
129+
buflen=SHORTALIGN(buflen);
130+
buflen+=res->poslen*sizeof(WordEntryPos)+sizeof(uint16);
129131
}
130-
else
131-
*outbuflen+=res->entry.len;
132132
res++;
133133
memcpy(res,ptr,sizeof(WordEntryIN));
134134
}
135135
elseif (ptr->entry.haspos)
136136
{
137137
if (res->entry.haspos)
138138
{
139+
/* append ptr's positions to res's positions */
139140
intnewlen=ptr->poslen+res->poslen;
140141

141-
/* Append res to pos */
142-
143-
res->pos= (WordEntryPos*)repalloc(res->pos,newlen*sizeof(WordEntryPos));
144-
memcpy(&res->pos[res->poslen],
145-
ptr->pos,ptr->poslen*sizeof(WordEntryPos));
142+
res->pos= (WordEntryPos*)
143+
repalloc(res->pos,newlen*sizeof(WordEntryPos));
144+
memcpy(&res->pos[res->poslen],ptr->pos,
145+
ptr->poslen*sizeof(WordEntryPos));
146146
res->poslen=newlen;
147147
pfree(ptr->pos);
148148
}
149149
else
150150
{
151+
/* just give ptr's positions to pos */
151152
res->entry.haspos=1;
152153
res->pos=ptr->pos;
154+
res->poslen=ptr->poslen;
153155
}
154156
}
155157
ptr++;
156158
}
157159

158-
/*add last item */
159-
160+
/*count space needed for last item */
161+
buflen+=res->entry.len;
160162
if (res->entry.haspos)
161163
{
162-
*outbuflen+=SHORTALIGN(res->entry.len);
163-
164164
res->poslen=uniquePos(res->pos,res->poslen);
165-
*outbuflen+=res->poslen*sizeof(WordEntryPos);
165+
buflen=SHORTALIGN(buflen);
166+
buflen+=res->poslen*sizeof(WordEntryPos)+sizeof(uint16);
166167
}
167-
else
168-
*outbuflen+=res->entry.len;
169168

169+
*outbuflen=buflen;
170170
returnres+1-a;
171171
}
172172

@@ -193,6 +193,8 @@ tsvectorin(PG_FUNCTION_ARGS)
193193
inttoklen;
194194
WordEntryPos*pos;
195195
intposlen;
196+
char*strbuf;
197+
intstroff;
196198

197199
/*
198200
* Tokens are appended to tmpbuf, cur is a pointer
@@ -212,27 +214,26 @@ tsvectorin(PG_FUNCTION_ARGS)
212214

213215
while (gettoken_tsvector(state,&token,&toklen,&pos,&poslen,NULL))
214216
{
215-
216217
if (toklen >=MAXSTRLEN)
217218
ereport(ERROR,
218-
(errcode(ERRCODE_SYNTAX_ERROR),
219+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
219220
errmsg("word is too long (%ld bytes, max %ld bytes)",
220221
(long)toklen,
221-
(long)MAXSTRLEN)));
222-
222+
(long) (MAXSTRLEN-1))));
223223

224224
if (cur-tmpbuf>MAXSTRPOS)
225225
ereport(ERROR,
226-
(errcode(ERRCODE_SYNTAX_ERROR),
227-
errmsg("position valueis toolarge")));
226+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
227+
errmsg("stringis toolong for tsvector")));
228228

229229
/*
230230
* Enlarge buffers if needed
231231
*/
232232
if (len >=arrlen)
233233
{
234234
arrlen *=2;
235-
arr= (WordEntryIN*)repalloc((void*)arr,sizeof(WordEntryIN)*arrlen);
235+
arr= (WordEntryIN*)
236+
repalloc((void*)arr,sizeof(WordEntryIN)*arrlen);
236237
}
237238
while ((cur-tmpbuf)+toklen >=buflen)
238239
{
@@ -254,7 +255,11 @@ tsvectorin(PG_FUNCTION_ARGS)
254255
arr[len].poslen=poslen;
255256
}
256257
else
258+
{
257259
arr[len].entry.haspos=0;
260+
arr[len].pos=NULL;
261+
arr[len].poslen=0;
262+
}
258263
len++;
259264
}
260265

@@ -264,40 +269,45 @@ tsvectorin(PG_FUNCTION_ARGS)
264269
len=uniqueentry(arr,len,tmpbuf,&buflen);
265270
else
266271
buflen=0;
272+
273+
if (buflen>MAXSTRPOS)
274+
ereport(ERROR,
275+
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
276+
errmsg("string is too long for tsvector")));
277+
267278
totallen=CALCDATASIZE(len,buflen);
268279
in= (TSVector)palloc0(totallen);
269-
270280
SET_VARSIZE(in,totallen);
271281
in->size=len;
272-
cur=STRPTR(in);
273282
inarr=ARRPTR(in);
283+
strbuf=STRPTR(in);
284+
stroff=0;
274285
for (i=0;i<len;i++)
275286
{
276-
memcpy((void*)cur, (void*)&tmpbuf[arr[i].entry.pos],arr[i].entry.len);
277-
arr[i].entry.pos=cur-STRPTR(in);
278-
cur+=SHORTALIGN(arr[i].entry.len);
287+
memcpy(strbuf+stroff,&tmpbuf[arr[i].entry.pos],arr[i].entry.len);
288+
arr[i].entry.pos=stroff;
289+
stroff+=arr[i].entry.len;
279290
if (arr[i].entry.haspos)
280291
{
281-
uint16tmplen;
282-
283-
if(arr[i].poslen>0xFFFF)
292+
if (arr[i].poslen>0xFFFF)
284293
elog(ERROR,"positions array too long");
285294

286-
tmplen= (uint16)arr[i].poslen;
287-
288-
/* Copy length to output struct */
289-
memcpy(cur,&tmplen,sizeof(uint16));
290-
cur+=sizeof(uint16);
295+
/* Copy number of positions */
296+
stroff=SHORTALIGN(stroff);
297+
*(uint16*) (strbuf+stroff)= (uint16)arr[i].poslen;
298+
stroff+=sizeof(uint16);
291299

292300
/* Copy positions */
293-
memcpy(cur,arr[i].pos,(arr[i].poslen)*sizeof(WordEntryPos));
294-
cur+=arr[i].poslen*sizeof(WordEntryPos);
301+
memcpy(strbuf+stroff,arr[i].pos,arr[i].poslen*sizeof(WordEntryPos));
302+
stroff+=arr[i].poslen*sizeof(WordEntryPos);
295303

296304
pfree(arr[i].pos);
297305
}
298306
inarr[i]=arr[i].entry;
299307
}
300308

309+
Assert((strbuf+stroff- (char*)in)==totallen);
310+
301311
PG_RETURN_TSVECTOR(in);
302312
}
303313

@@ -495,11 +505,12 @@ tsvectorrecv(PG_FUNCTION_ARGS)
495505

496506
datalen+=lex_len;
497507

498-
if (i>0&&WordEntryCMP(&vec->entries[i],&vec->entries[i-1],STRPTR(vec)) <=0)
508+
if (i>0&&WordEntryCMP(&vec->entries[i],
509+
&vec->entries[i-1],
510+
STRPTR(vec)) <=0)
499511
elog(ERROR,"lexemes are misordered");
500512

501513
/* Receive positions */
502-
503514
if (npos>0)
504515
{
505516
uint16j;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp