Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcbe25dc

Browse files
committed
Disallow making an empty lexeme via array_to_tsvector().
The tsvector data type has always forbidden lexemes to be empty.However, array_to_tsvector() didn't get that memo, and wouldallow an empty-string array element to become an empty lexeme.This could result in dump/restore failures later, not to mentionwhatever semantic issues might be behind the original prohibition.However, other functions that take a plain text input directly asa lexeme value do not need a similar restriction, because they onlymatch the string against existing tsvector entries. In particularit'd be a bad idea to make ts_delete() reject empty strings, sincethat is the most convenient way to clean up any bad data that mighthave gotten into a tsvector column via this bug.Reflecting on that, let's also remove the prohibition against NULLarray elements in tsvector_delete_arr and tsvector_setweight_by_filter.It seems more consistent to ignore them, as an empty-string elementwould be ignored.There's a case for back-patching this, since it's clearly a bug fix.On balance though, it doesn't seem like something to change in aminor release.Jean-Christophe ArnuDiscussion:https://postgr.es/m/CAHZmTm1YVndPgUVRoag2WL0w900XcoiivDDj-gTTYBsG25c65A@mail.gmail.com
1 parent1241fcb commitcbe25dc

File tree

4 files changed

+44
-17
lines changed

4 files changed

+44
-17
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12920,8 +12920,10 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
1292012920
<returnvalue>tsvector</returnvalue>
1292112921
</para>
1292212922
<para>
12923-
Converts an array of lexemes to a <type>tsvector</type>.
12924-
The given strings are used as-is without further processing.
12923+
Converts an array of text strings to a <type>tsvector</type>.
12924+
The given strings are used as lexemes as-is, without further
12925+
processing. Array elements must not be empty strings
12926+
or <literal>NULL</literal>.
1292512927
</para>
1292612928
<para>
1292712929
<literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal>
@@ -13104,6 +13106,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
1310413106
Assigns the specified <parameter>weight</parameter> to elements
1310513107
of the <parameter>vector</parameter> that are listed
1310613108
in <parameter>lexemes</parameter>.
13109+
The strings in <parameter>lexemes</parameter> are taken as lexemes
13110+
as-is, without further processing. Strings that do not match any
13111+
lexeme in <parameter>vector</parameter> are ignored.
1310713112
</para>
1310813113
<para>
1310913114
<literal>setweight('fat:2,4 cat:3 rat:5,6B'::tsvector, 'A', '{cat,rat}')</literal>
@@ -13265,6 +13270,8 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
1326513270
<para>
1326613271
Removes any occurrence of the given <parameter>lexeme</parameter>
1326713272
from the <parameter>vector</parameter>.
13273+
The <parameter>lexeme</parameter> string is treated as a lexeme as-is,
13274+
without further processing.
1326813275
</para>
1326913276
<para>
1327013277
<literal>ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat')</literal>
@@ -13281,6 +13288,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
1328113288
Removes any occurrences of the lexemes
1328213289
in <parameter>lexemes</parameter>
1328313290
from the <parameter>vector</parameter>.
13291+
The strings in <parameter>lexemes</parameter> are taken as lexemes
13292+
as-is, without further processing. Strings that do not match any
13293+
lexeme in <parameter>vector</parameter> are ignored.
1328413294
</para>
1328513295
<para>
1328613296
<literal>ts_delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat'])</literal>

‎src/backend/utils/adt/tsvector_op.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -322,10 +322,9 @@ tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
322322
intlex_len,
323323
lex_pos;
324324

325+
/* Ignore null array elements, they surely don't match */
325326
if (nulls[i])
326-
ereport(ERROR,
327-
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
328-
errmsg("lexeme array may not contain nulls")));
327+
continue;
329328

330329
lex=VARDATA(dlexemes[i]);
331330
lex_len=VARSIZE(dlexemes[i])-VARHDRSZ;
@@ -602,10 +601,9 @@ tsvector_delete_arr(PG_FUNCTION_ARGS)
602601
intlex_len,
603602
lex_pos;
604603

604+
/* Ignore null array elements, they surely don't match */
605605
if (nulls[i])
606-
ereport(ERROR,
607-
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
608-
errmsg("lexeme array may not contain nulls")));
606+
continue;
609607

610608
lex=VARDATA(dlexemes[i]);
611609
lex_len=VARSIZE(dlexemes[i])-VARHDRSZ;
@@ -761,13 +759,21 @@ array_to_tsvector(PG_FUNCTION_ARGS)
761759

762760
deconstruct_array(v,TEXTOID,-1, false,TYPALIGN_INT,&dlexemes,&nulls,&nitems);
763761

764-
/* Reject nulls (maybe we should just ignore them, instead?) */
762+
/*
763+
* Reject nulls and zero length strings (maybe we should just ignore them,
764+
* instead?)
765+
*/
765766
for (i=0;i<nitems;i++)
766767
{
767768
if (nulls[i])
768769
ereport(ERROR,
769770
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
770771
errmsg("lexeme array may not contain nulls")));
772+
773+
if (VARSIZE(dlexemes[i])-VARHDRSZ==0)
774+
ereport(ERROR,
775+
(errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
776+
errmsg("lexeme array may not contain empty strings")));
771777
}
772778

773779
/* Sort and de-dup, because this is required for a valid tsvector. */

‎src/test/regress/expected/tstypes.out

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
8585
'a':3A,4B 'b':2A 'ba':1237
8686
(1 row)
8787

88+
SELECT $$'' '1' '2'$$::tsvector; -- error, empty lexeme is not allowed
89+
ERROR: syntax error in tsvector: "'' '1' '2'"
90+
LINE 1: SELECT $$'' '1' '2'$$::tsvector;
91+
^
8892
--Base tsquery test
8993
SELECT '1'::tsquery;
9094
tsquery
@@ -1258,8 +1262,12 @@ SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceshi
12581262
'base' 'hidden' 'strike'
12591263
(1 row)
12601264

1261-
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
1262-
ERROR: lexeme array may not contain nulls
1265+
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', '', NULL]);
1266+
ts_delete
1267+
--------------------------
1268+
'base' 'hidden' 'strike'
1269+
(1 row)
1270+
12631271
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
12641272
unnest
12651273
---------------------------------------------
@@ -1328,8 +1336,11 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
13281336
'base' 'hidden' 'rebel' 'spaceship' 'strike'
13291337
(1 row)
13301338

1339+
-- null and empty string are disallowed, since we mustn't make an empty lexeme
13311340
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
13321341
ERROR: lexeme array may not contain nulls
1342+
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', '']);
1343+
ERROR: lexeme array may not contain empty strings
13331344
-- array_to_tsvector must sort and de-dup
13341345
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
13351346
array_to_tsvector
@@ -1367,14 +1378,12 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '
13671378
'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C
13681379
(1 row)
13691380

1370-
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c','{a,zxc}');
1381+
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c',ARRAY['a', 'zxc', '', NULL]);
13711382
setweight
13721383
---------------------------------
13731384
'a' 'asd' 'w':5,6,12B,13A 'zxc'
13741385
(1 row)
13751386

1376-
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
1377-
ERROR: lexeme array may not contain nulls
13781387
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
13791388
ts_filter
13801389
-------------------------------------------------------------

‎src/test/regress/sql/tstypes.sql

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
1717
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
1818
SELECT'''w'':4A,3B,2C,1D,5 a:8';
1919
SELECT'a:3A b:2a'::tsvector||'ba:1234 a:1B';
20+
SELECT $$'''1''2'$$::tsvector;-- error, empty lexeme is not allowed
2021

2122
--Base tsquery test
2223
SELECT'1'::tsquery;
@@ -239,7 +240,7 @@ SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3':
239240
SELECT ts_delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
240241
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
241242
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','rebel']);
242-
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel',NULL]);
243+
SELECT ts_delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel','',NULL]);
243244

244245
SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
245246
SELECT unnest('base hidden rebel spaceship strike'::tsvector);
@@ -251,7 +252,9 @@ SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D st
251252
SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
252253

253254
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
255+
-- null and empty string are disallowed, since we mustn't make an empty lexeme
254256
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship',NULL]);
257+
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','']);
255258
-- array_to_tsvector must sort and de-dup
256259
SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
257260

@@ -260,8 +263,7 @@ SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
260263
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector,'c','{a}');
261264
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector,'c','{a}');
262265
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector,'c','{a,zxc}');
263-
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector,'c','{a,zxc}');
264-
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector,'c', ARRAY['a','zxc',NULL]);
266+
SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector,'c', ARRAY['a','zxc','',NULL]);
265267

266268
SELECT ts_filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector,'{a}');
267269
SELECT ts_filter('base hidden rebel spaceship strike'::tsvector,'{a}');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp