Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf10eab7

Browse files
committed
Make array_to_tsvector() sort and de-duplicate the given strings.
This is required for the result to be a legal tsvector value.Noted while fooling with Andreas Seltenreich's ts_delete() crash.Discussion: <87invhoj6e.fsf@credativ.de>
1 parentc50d192 commitf10eab7

File tree

4 files changed

+52
-8
lines changed

4 files changed

+52
-8
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9294,7 +9294,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92949294
<entry><type>tsvector</type></entry>
92959295
<entry>convert array of lexemes to <type>tsvector</type></entry>
92969296
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
9297-
<entry><literal>'fat' 'cat' 'rat'</literal></entry>
9297+
<entry><literal>'cat' 'fat' 'rat'</literal></entry>
92989298
</row>
92999299
<row>
93009300
<entry>

‎src/backend/utils/adt/tsvector_op.c

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
416416
return-1;
417417
}
418418

419+
/*
420+
* qsort comparator functions
421+
*/
422+
419423
staticint
420-
compareint(constvoid*va,constvoid*vb)
424+
compare_int(constvoid*va,constvoid*vb)
421425
{
422-
int32a=*((constint32*)va);
423-
int32b=*((constint32*)vb);
426+
inta=*((constint*)va);
427+
intb=*((constint*)vb);
424428

425429
if (a==b)
426430
return0;
427431
return (a>b) ?1 :-1;
428432
}
429433

434+
staticint
435+
compare_text_lexemes(constvoid*va,constvoid*vb)
436+
{
437+
Datuma=*((constDatum*)va);
438+
Datumb=*((constDatum*)vb);
439+
char*alex=VARDATA_ANY(a);
440+
intalex_len=VARSIZE_ANY_EXHDR(a);
441+
char*blex=VARDATA_ANY(b);
442+
intblex_len=VARSIZE_ANY_EXHDR(b);
443+
444+
returntsCompareString(alex,alex_len,blex,blex_len, false);
445+
}
446+
430447
/*
431448
* Internal routine to delete lexemes from TSVector by array of offsets.
432449
*
@@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
459476
{
460477
intkp;
461478

462-
qsort(indices_to_delete,indices_count,sizeof(int),compareint);
479+
qsort(indices_to_delete,indices_count,sizeof(int),compare_int);
463480
kp=0;
464481
for (k=1;k<indices_count;k++)
465482
{
@@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
743760
bool*nulls;
744761
intnitems,
745762
i,
763+
j,
746764
tslen,
747765
datalen=0;
748766
char*cur;
749767

750768
deconstruct_array(v,TEXTOID,-1, false,'i',&dlexemes,&nulls,&nitems);
751769

770+
/* Reject nulls (maybe we should just ignore them, instead?) */
752771
for (i=0;i<nitems;i++)
753772
{
754773
if (nulls[i])
755774
ereport(ERROR,
756775
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
757776
errmsg("lexeme array may not contain nulls")));
777+
}
758778

759-
datalen+=VARSIZE_ANY_EXHDR(dlexemes[i]);
779+
/* Sort and de-dup, because this is required for a valid tsvector. */
780+
if (nitems>1)
781+
{
782+
qsort(dlexemes,nitems,sizeof(Datum),compare_text_lexemes);
783+
j=0;
784+
for (i=1;i<nitems;i++)
785+
{
786+
if (compare_text_lexemes(&dlexemes[j],&dlexemes[i])<0)
787+
dlexemes[++j]=dlexemes[i];
788+
}
789+
nitems=++j;
760790
}
761791

792+
/* Calculate space needed for surviving lexemes. */
793+
for (i=0;i<nitems;i++)
794+
datalen+=VARSIZE_ANY_EXHDR(dlexemes[i]);
762795
tslen=CALCDATASIZE(nitems,datalen);
796+
797+
/* Allocate and fill tsvector. */
763798
tsout= (TSVector)palloc0(tslen);
764799
SET_VARSIZE(tsout,tslen);
765800
tsout->size=nitems;
801+
766802
arrout=ARRPTR(tsout);
767803
cur=STRPTR(tsout);
768-
769804
for (i=0;i<nitems;i++)
770805
{
771-
char*lex=VARDATA(dlexemes[i]);
806+
char*lex=VARDATA_ANY(dlexemes[i]);
772807
intlex_len=VARSIZE_ANY_EXHDR(dlexemes[i]);
773808

774809
memcpy(cur,lex,lex_len);

‎src/test/regress/expected/tstypes.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,13 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
11651165

11661166
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
11671167
ERROR: lexeme array may not contain nulls
1168+
-- array_to_tsvector must sort and de-dup
1169+
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
1170+
array_to_tsvector
1171+
-------------------
1172+
'bar' 'baz' 'foo'
1173+
(1 row)
1174+
11681175
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
11691176
setweight
11701177
----------------------------------------------------------

‎src/test/regress/sql/tstypes.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,8 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
226226

227227
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
228228
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship',NULL]);
229+
-- array_to_tsvector must sort and de-dup
230+
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
229231

230232
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector,'c');
231233
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector,'c');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp