@@ -317,7 +317,7 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
317317
318318if (nulls [i ])
319319ereport (ERROR ,
320- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
320+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
321321errmsg ("lexeme array may not contain nulls" )));
322322
323323lex = VARDATA (dlexemes [i ]);
@@ -430,7 +430,7 @@ compareint(const void *va, const void *vb)
430430/*
431431 * Internal routine to delete lexemes from TSVector by array of offsets.
432432 *
433- * int *indices_to_delete -- array of lexeme offsets to delete
433+ * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
434434 * int indices_count -- size of that array
435435 *
436436 * Returns new TSVector without given lexemes along with their positions
@@ -445,52 +445,68 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
445445* arrout ;
446446char * data = STRPTR (tsv ),
447447* dataout ;
448- int i ,
449- j ,
450- k ,
451- curoff ;
448+ int i ,/* index in arrin */
449+ j ,/* index in arrout */
450+ k ,/* index in indices_to_delete */
451+ curoff ;/* index in dataout area */
452452
453453/*
454- *Here we overestimates tsout size, since we don't know exact size
455- *occupied by positions and weights. We will set exact size later after a
456- *pass through TSVector .
454+ *Sort the filter array to simplify membership checks below. Also, get
455+ *rid of any duplicate entries, so that we can assume that indices_count
456+ *is exactly equal to the number of lexemes that will be removed .
457457 */
458- tsout = (TSVector )palloc0 (VARSIZE (tsv ));
459- arrout = ARRPTR (tsout );
460- tsout -> size = tsv -> size - indices_count ;
461-
462- /* Sort our filter array to simplify membership check later. */
463458if (indices_count > 1 )
459+ {
460+ int kp ;
461+
464462qsort (indices_to_delete ,indices_count ,sizeof (int ),compareint );
463+ kp = 0 ;
464+ for (k = 1 ;k < indices_count ;k ++ )
465+ {
466+ if (indices_to_delete [k ]!= indices_to_delete [kp ])
467+ indices_to_delete [++ kp ]= indices_to_delete [k ];
468+ }
469+ indices_count = ++ kp ;
470+ }
465471
466472/*
467- * Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
473+ * Here we overestimate tsout size, since we don't know how much space is
474+ * used by the deleted lexeme(s). We will set exact size below.
468475 */
469- curoff = 0 ;
476+ tsout = (TSVector )palloc0 (VARSIZE (tsv ));
477+
478+ /* This count must be correct because STRPTR(tsout) relies on it. */
479+ tsout -> size = tsv -> size - indices_count ;
480+
481+ /*
482+ * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
483+ */
484+ arrout = ARRPTR (tsout );
470485dataout = STRPTR (tsout );
486+ curoff = 0 ;
471487for (i = j = k = 0 ;i < tsv -> size ;i ++ )
472488{
473489/*
474- *Here we should check whether current i is present in
475- *indices_to_delete or not. Since indices_to_delete is already sorted
476- *we can advance it index only when we have match .
490+ *If current i is present in indices_to_delete, skip this lexeme.
491+ * Since indices_to_delete is already sorted, we only need to check
492+ *the current (k'th) entry .
477493 */
478494if (k < indices_count && i == indices_to_delete [k ])
479495{
480496k ++ ;
481497continue ;
482498}
483499
484- /* Copy lexeme, it's positions and weights */
500+ /* Copy lexeme and its positions and weights */
485501memcpy (dataout + curoff ,data + arrin [i ].pos ,arrin [i ].len );
486502arrout [j ].haspos = arrin [i ].haspos ;
487503arrout [j ].len = arrin [i ].len ;
488504arrout [j ].pos = curoff ;
489505curoff += arrin [i ].len ;
490506if (arrin [i ].haspos )
491507{
492- int len = POSDATALEN (tsv ,arrin + i )* sizeof (WordEntryPos )+
493- sizeof (uint16 );
508+ int len = POSDATALEN (tsv ,arrin + i )* sizeof (WordEntryPos )
509+ + sizeof (uint16 );
494510
495511curoff = SHORTALIGN (curoff );
496512memcpy (dataout + curoff ,
@@ -503,10 +519,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
503519}
504520
505521/*
506- * After the pass through TSVector k should equals exactly to
507- * indices_count. If it isn't then the caller provided us with indices
508- * outside of [0, tsv->size) range and estimation of tsout's size is
509- * wrong.
522+ * k should now be exactly equal to indices_count. If it isn't then the
523+ * caller provided us with indices outside of [0, tsv->size) range and
524+ * estimation of tsout's size is wrong.
510525 */
511526Assert (k == indices_count );
512527
@@ -560,7 +575,7 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
560575
561576/*
562577 * In typical use case array of lexemes to delete is relatively small. So
563- * here weoptimizing things for that scenario: iterate through lexarr
578+ * here weoptimize things for that scenario: iterate through lexarr
564579 * performing binary search of each lexeme from lexarr in tsvector.
565580 */
566581skip_indices = palloc0 (nlex * sizeof (int ));
@@ -572,10 +587,10 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
572587
573588if (nulls [i ])
574589ereport (ERROR ,
575- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
590+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
576591errmsg ("lexeme array may not contain nulls" )));
577592
578- lex = VARDATA (dlexemes [i ]);
593+ lex = VARDATA_ANY (dlexemes [i ]);
579594lex_len = VARSIZE_ANY_EXHDR (dlexemes [i ]);
580595lex_pos = tsvector_bsearch (tsin ,lex ,lex_len );
581596
@@ -738,7 +753,7 @@ array_to_tsvector(PG_FUNCTION_ARGS)
738753{
739754if (nulls [i ])
740755ereport (ERROR ,
741- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
756+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
742757errmsg ("lexeme array may not contain nulls" )));
743758
744759datalen += VARSIZE_ANY_EXHDR (dlexemes [i ]);
@@ -797,7 +812,7 @@ tsvector_filter(PG_FUNCTION_ARGS)
797812
798813if (nulls [i ])
799814ereport (ERROR ,
800- (errcode (ERRCODE_INVALID_PARAMETER_VALUE ),
815+ (errcode (ERRCODE_NULL_VALUE_NOT_ALLOWED ),
801816errmsg ("weight array may not contain nulls" )));
802817
803818char_weight = DatumGetChar (dweights [i ]);