Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc50d192

Browse files
committed
Fix ts_delete(tsvector, text[]) to cope with duplicate array entries.
Such cases either failed an Assert, or produced a corrupt tsvector innon-Assert builds, as reported by Andreas Seltenreich. The reason isthat tsvector_delete_by_indices() just assumed that its input array hadno duplicates. Fix by explicitly de-duping.In passing, improve some comments, and fix a number of tests for nullvalues to use ERRCODE_NULL_VALUE_NOT_ALLOWED notERRCODE_INVALID_PARAMETER_VALUE.Discussion: <87invhoj6e.fsf@credativ.de>
1 parent33fe736 commitc50d192

File tree

3 files changed

+53
-31
lines changed

3 files changed

+53
-31
lines changed

‎src/backend/utils/adt/tsvector_op.c

Lines changed: 46 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
317317

318318
if (nulls[i])
319319
ereport(ERROR,
320-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
320+
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
321321
errmsg("lexeme array may not contain nulls")));
322322

323323
lex=VARDATA(dlexemes[i]);
@@ -430,7 +430,7 @@ compareint(const void *va, const void *vb)
430430
/*
431431
* Internal routine to delete lexemes from TSVector by array of offsets.
432432
*
433-
* int *indices_to_delete -- array of lexeme offsets to delete
433+
* int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
434434
* int indices_count -- size of that array
435435
*
436436
* Returns new TSVector without given lexemes along with their positions
@@ -445,52 +445,68 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
445445
*arrout;
446446
char*data=STRPTR(tsv),
447447
*dataout;
448-
inti,
449-
j,
450-
k,
451-
curoff;
448+
inti,/* index in arrin */
449+
j,/* index in arrout */
450+
k,/* index in indices_to_delete */
451+
curoff;/* index in dataout area */
452452

453453
/*
454-
*Here we overestimates tsout size, since we don't know exact size
455-
*occupied by positions and weights. We will set exact size later after a
456-
*pass through TSVector.
454+
*Sort the filter array to simplify membership checks below. Also, get
455+
*rid of any duplicate entries, so that we can assume that indices_count
456+
*is exactly equal to the number of lexemes that will be removed.
457457
*/
458-
tsout= (TSVector)palloc0(VARSIZE(tsv));
459-
arrout=ARRPTR(tsout);
460-
tsout->size=tsv->size-indices_count;
461-
462-
/* Sort our filter array to simplify membership check later. */
463458
if (indices_count>1)
459+
{
460+
intkp;
461+
464462
qsort(indices_to_delete,indices_count,sizeof(int),compareint);
463+
kp=0;
464+
for (k=1;k<indices_count;k++)
465+
{
466+
if (indices_to_delete[k]!=indices_to_delete[kp])
467+
indices_to_delete[++kp]=indices_to_delete[k];
468+
}
469+
indices_count=++kp;
470+
}
465471

466472
/*
467-
* Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
473+
* Here we overestimate tsout size, since we don't know how much space is
474+
* used by the deleted lexeme(s). We will set exact size below.
468475
*/
469-
curoff=0;
476+
tsout= (TSVector)palloc0(VARSIZE(tsv));
477+
478+
/* This count must be correct because STRPTR(tsout) relies on it. */
479+
tsout->size=tsv->size-indices_count;
480+
481+
/*
482+
* Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
483+
*/
484+
arrout=ARRPTR(tsout);
470485
dataout=STRPTR(tsout);
486+
curoff=0;
471487
for (i=j=k=0;i<tsv->size;i++)
472488
{
473489
/*
474-
*Here we should check whethercurrent i is present in
475-
*indices_to_delete or not.Since indices_to_delete is already sorted
476-
*we can advance it index only when we have match.
490+
*Ifcurrent i is present in indices_to_delete, skip this lexeme.
491+
* Since indices_to_delete is already sorted, we only need to check
492+
*the current (k'th) entry.
477493
*/
478494
if (k<indices_count&&i==indices_to_delete[k])
479495
{
480496
k++;
481497
continue;
482498
}
483499

484-
/* Copy lexeme, it's positions and weights */
500+
/* Copy lexeme and its positions and weights */
485501
memcpy(dataout+curoff,data+arrin[i].pos,arrin[i].len);
486502
arrout[j].haspos=arrin[i].haspos;
487503
arrout[j].len=arrin[i].len;
488504
arrout[j].pos=curoff;
489505
curoff+=arrin[i].len;
490506
if (arrin[i].haspos)
491507
{
492-
intlen=POSDATALEN(tsv,arrin+i)*sizeof(WordEntryPos)+
493-
sizeof(uint16);
508+
intlen=POSDATALEN(tsv,arrin+i)*sizeof(WordEntryPos)
509+
+sizeof(uint16);
494510

495511
curoff=SHORTALIGN(curoff);
496512
memcpy(dataout+curoff,
@@ -503,10 +519,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
503519
}
504520

505521
/*
506-
* After the pass through TSVector k should equals exactly to
507-
* indices_count. If it isn't then the caller provided us with indices
508-
* outside of [0, tsv->size) range and estimation of tsout's size is
509-
* wrong.
522+
* k should now be exactly equal to indices_count. If it isn't then the
523+
* caller provided us with indices outside of [0, tsv->size) range and
524+
* estimation of tsout's size is wrong.
510525
*/
511526
Assert(k==indices_count);
512527

@@ -560,7 +575,7 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
560575

561576
/*
562577
* In typical use case array of lexemes to delete is relatively small. So
563-
* here weoptimizing things for that scenario: iterate through lexarr
578+
* here weoptimize things for that scenario: iterate through lexarr
564579
* performing binary search of each lexeme from lexarr in tsvector.
565580
*/
566581
skip_indices=palloc0(nlex*sizeof(int));
@@ -572,10 +587,10 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
572587

573588
if (nulls[i])
574589
ereport(ERROR,
575-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
590+
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
576591
errmsg("lexeme array may not contain nulls")));
577592

578-
lex=VARDATA(dlexemes[i]);
593+
lex=VARDATA_ANY(dlexemes[i]);
579594
lex_len=VARSIZE_ANY_EXHDR(dlexemes[i]);
580595
lex_pos=tsvector_bsearch(tsin,lex,lex_len);
581596

@@ -738,7 +753,7 @@ array_to_tsvector(PG_FUNCTION_ARGS)
738753
{
739754
if (nulls[i])
740755
ereport(ERROR,
741-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
756+
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
742757
errmsg("lexeme array may not contain nulls")));
743758

744759
datalen+=VARSIZE_ANY_EXHDR(dlexemes[i]);
@@ -797,7 +812,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
797812

798813
if (nulls[i])
799814
ereport(ERROR,
800-
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
815+
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
801816
errmsg("weight array may not contain nulls")));
802817

803818
char_weight=DatumGetChar(dweights[i]);

‎src/test/regress/expected/tstypes.out

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,6 +1087,12 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi
10871087
'base' 'hidden' 'strike'
10881088
(1 row)
10891089

1090+
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
1091+
ts_delete
1092+
--------------------------
1093+
'base' 'hidden' 'strike'
1094+
(1 row)
1095+
10901096
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
10911097
ERROR: lexeme array may not contain nulls
10921098
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);

‎src/test/regress/sql/tstypes.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3':
212212
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
213213
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
214214
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
215+
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
215216
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel',NULL]);
216217

217218
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp