Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commiteb08605

Browse files
committed
Make websearch_to_tsquery() parse text in quotes as a single token
websearch_to_tsquery() splits text in quotes into tokens and connects them withphrase operator on its own. However, that leads to surprising results when thetoken contains no words.For instance, websearch_to_tsquery('"aaa: bbb"') is 'aaa <2> bbb', becauseit is equivalent of to_tsquery(E'aaa <-> \':\' <-> bbb'). Butwebsearch_to_tsquery('"aaa: bbb"') has to be 'aaa <-> bbb' in order to matchto_tsvector('aaa: bbb').Since0c4f355, we anyway connect lexemes of complex tokens with phraseoperators. Thus, let's just websearch_to_tsquery() parse text in quotes asa single token. Therefore, websearch_to_tsquery() should process the quotedtext in the same way phraseto_tsquery() does. This solution is what we exactlyneed and also simplifies the code.This commit is an incompatible change, so we don't backpatch it.Reported-by: Valentin Gatien-BaronDiscussion:https://postgr.es/m/CA%2B0DEqiZs7gdOd4ikmg%3D0UWG%2BSwWOLxPsk_JW-sx9WNOyrb0KQ%40mail.gmail.comAuthor: Alexander KorotkovReviewed-by: Tom Lane, Zhihong Yu
1 parent651d005 commiteb08605

File tree

3 files changed

+39
-67
lines changed

3 files changed

+39
-67
lines changed

‎src/backend/utils/adt/tsquery.c

Lines changed: 23 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ struct TSQueryParserStateData
7777
char*buf;/* current scan point */
7878
intcount;/* nesting count, incremented by (,
7979
* decremented by ) */
80-
boolin_quotes;/* phrase in quotes "" */
8180
ts_parserstatestate;
8281

8382
/* polish (prefix) notation in list, filled in by push* functions */
@@ -235,9 +234,6 @@ parse_or_operator(TSQueryParserState pstate)
235234
{
236235
char*ptr=pstate->buf;
237236

238-
if (pstate->in_quotes)
239-
return false;
240-
241237
/* it should begin with "OR" literal */
242238
if (pg_strncasecmp(ptr,"or",2)!=0)
243239
return false;
@@ -398,38 +394,29 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
398394
state->buf++;
399395
state->state=WAITOPERAND;
400396

401-
if (state->in_quotes)
402-
continue;
403-
404397
*operator=OP_NOT;
405398
returnPT_OPR;
406399
}
407400
elseif (t_iseq(state->buf,'"'))
408401
{
402+
/* Everything in quotes is processed as a single token */
403+
404+
/* skip opening quote */
409405
state->buf++;
406+
*strval=state->buf;
410407

411-
if (!state->in_quotes)
412-
{
413-
state->state=WAITOPERAND;
408+
/* iterate to the closing quote or end of the string */
409+
while (*state->buf!='\0'&& !t_iseq(state->buf,'"'))
410+
state->buf++;
411+
*lenval=state->buf-*strval;
414412

415-
if (strchr(state->buf,'"'))
416-
{
417-
/* quoted text should be ordered <-> */
418-
state->in_quotes= true;
419-
returnPT_OPEN;
420-
}
413+
/* skip closing quote if not end of the string */
414+
if (*state->buf!='\0')
415+
state->buf++;
421416

422-
/* web search tolerates missing quotes */
423-
continue;
424-
}
425-
else
426-
{
427-
/* we have to provide an operand */
428-
state->in_quotes= false;
429-
state->state=WAITOPERATOR;
430-
pushStop(state);
431-
returnPT_CLOSE;
432-
}
417+
state->state=WAITOPERATOR;
418+
state->count++;
419+
returnPT_VAL;
433420
}
434421
elseif (ISOPERATOR(state->buf))
435422
{
@@ -467,24 +454,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
467454
caseWAITOPERATOR:
468455
if (t_iseq(state->buf,'"'))
469456
{
470-
if (!state->in_quotes)
471-
{
472-
/*
473-
* put implicit AND after an operand and handle this
474-
* quote in WAITOPERAND
475-
*/
476-
state->state=WAITOPERAND;
477-
*operator=OP_AND;
478-
returnPT_OPR;
479-
}
480-
else
481-
{
482-
state->buf++;
483-
484-
/* just close quotes */
485-
state->in_quotes= false;
486-
returnPT_CLOSE;
487-
}
457+
/*
458+
* put implicit AND after an operand and handle this quote
459+
* in WAITOPERAND
460+
*/
461+
state->state=WAITOPERAND;
462+
*operator=OP_AND;
463+
returnPT_OPR;
488464
}
489465
elseif (parse_or_operator(state))
490466
{
@@ -498,18 +474,8 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
498474
}
499475
elseif (!t_isspace(state->buf))
500476
{
501-
if (state->in_quotes)
502-
{
503-
/* put implicit <-> after an operand */
504-
*operator=OP_PHRASE;
505-
*weight=1;
506-
}
507-
else
508-
{
509-
/* put implicit AND after an operand */
510-
*operator=OP_AND;
511-
}
512-
477+
/* put implicit AND after an operand */
478+
*operator=OP_AND;
513479
state->state=WAITOPERAND;
514480
returnPT_OPR;
515481
}
@@ -846,7 +812,6 @@ parse_tsquery(char *buf,
846812
state.buffer=buf;
847813
state.buf=buf;
848814
state.count=0;
849-
state.in_quotes= false;
850815
state.state=WAITFIRSTOPERAND;
851816
state.polstr=NIL;
852817

‎src/test/regress/expected/tsearch.out

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,9 +2678,9 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
26782678

26792679
-- test quotes
26802680
select websearch_to_tsquery('english', '"pg_class pg');
2681-
websearch_to_tsquery
2682-
-------------------------
2683-
'pg' <-> 'class'& 'pg'
2681+
websearch_to_tsquery
2682+
---------------------------
2683+
'pg' <-> 'class'<-> 'pg'
26842684
(1 row)
26852685

26862686
select websearch_to_tsquery('english', 'pg_class pg"');
@@ -2695,6 +2695,12 @@ select websearch_to_tsquery('english', '"pg_class pg"');
26952695
'pg' <-> 'class' <-> 'pg'
26962696
(1 row)
26972697

2698+
select websearch_to_tsquery('english', '"pg_class : pg"');
2699+
websearch_to_tsquery
2700+
---------------------------
2701+
'pg' <-> 'class' <-> 'pg'
2702+
(1 row)
2703+
26982704
select websearch_to_tsquery('english', 'abc "pg_class pg"');
26992705
websearch_to_tsquery
27002706
-----------------------------------
@@ -2708,15 +2714,15 @@ select websearch_to_tsquery('english', '"pg_class pg" def');
27082714
(1 row)
27092715

27102716
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
2711-
websearch_to_tsquery
2712-
--------------------------------------------------------
2713-
'abc' & 'pg' <->('pg' <-> 'class' ) <-> 'pg' & 'def'
2717+
websearch_to_tsquery
2718+
----------------------------------------------------
2719+
'abc' & 'pg' <-> 'pg' <-> 'class' <-> 'pg' & 'def'
27142720
(1 row)
27152721

27162722
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
2717-
websearch_to_tsquery
2718-
----------------------------------------
2719-
'pg' <->('pg' <-> 'class' ) <-> 'pg'
2723+
websearch_to_tsquery
2724+
------------------------------------
2725+
'pg' <-> 'pg' <-> 'class' <-> 'pg'
27202726
(1 row)
27212727

27222728
select websearch_to_tsquery('english', '""pg pg_class pg""');

‎src/test/regress/sql/tsearch.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
759759
select websearch_to_tsquery('english','"pg_class pg');
760760
select websearch_to_tsquery('english','pg_class pg"');
761761
select websearch_to_tsquery('english','"pg_class pg"');
762+
select websearch_to_tsquery('english','"pg_class : pg"');
762763
select websearch_to_tsquery('english','abc "pg_class pg"');
763764
select websearch_to_tsquery('english','"pg_class pg" def');
764765
select websearch_to_tsquery('english','abc "pg pg_class pg" def');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp