Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0c4f355

Browse files
committed
Fix parsing of complex morphs to tsquery
When to_tsquery() or websearch_to_tsquery() meet a complex morph containingmultiple words residing adjacent position, these words are connectedwith OP_AND operator. That leads to surprising results. For instace,both websearch_to_tsquery('"pg_class pg"') and to_tsquery('pg_class <-> pg')produce '( pg & class ) <-> pg' tsquery. This tsquery requires'pg' and 'class' words to reside on the same position and doesn't matchto to_tsvector('pg_class pg'). It appears to be ridiculous behavior, whichneeds to be fixed.This commit makes to_tsquery() or websearch_to_tsquery() connect wordsresiding adjacent position with OP_PHRASE. Therefore, now those words arenormally chained with other OP_PHRASE operator. The examples of above nowproduces 'pg <-> class <-> pg' tsquery, which matches toto_tsvector('pg_class pg').Another effect of this commit is that complex morph word positions now need tomatch the tsvector even if there is no surrounding OP_PHRASE. This behaviorchange generally looks like an improvement but making this commit notbackpatchable.Reported-by: Barry PedersonBug: #16592Discussion:https://postgr.es/m/16592-70b110ff9731c07d@postgresql.orgDiscussion:https://postgr.es/m/CAPpHfdv0EzVhf6CWfB1_TTZqXV_2Sn-jSY3zSd7ePH%3D-%2B1V2DQ%40mail.gmail.comAuthor: Alexander KorotkovReviewed-by: Tom Lane, Neil Chen
1 parentdfb75e4 commit0c4f355

File tree

3 files changed

+132
-97
lines changed

3 files changed

+132
-97
lines changed

‎src/backend/tsearch/to_tsany.c

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,20 @@
2020
#include"utils/jsonfuncs.h"
2121

2222

23+
/*
24+
* Opaque data structure, which is passed by parse_tsquery() to pushval_morph().
25+
*/
2326
typedefstructMorphOpaque
2427
{
2528
Oidcfg_id;
26-
intqoperator;/* query operator */
29+
30+
/*
31+
* Single tsquery morph could be parsed into multiple words. When these
32+
* words reside in adjacent positions, they are connected using this
33+
* operator. Usually, that is OP_PHRASE, which requires word positions of
34+
* a complex morph to exactly match the tsvector.
35+
*/
36+
intqoperator;
2737
}MorphOpaque;
2838

2939
typedefstructTSVectorBuildState
@@ -573,7 +583,14 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
573583
MorphOpaquedata;
574584

575585
data.cfg_id=PG_GETARG_OID(0);
576-
data.qoperator=OP_AND;
586+
587+
/*
588+
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
589+
* positions of a complex morph exactly match the tsvector. Also, when
590+
* the complex morphs are connected with OP_PHRASE operator, we connect
591+
* all their words into the OP_PHRASE sequence.
592+
*/
593+
data.qoperator=OP_PHRASE;
577594

578595
query=parse_tsquery(text_to_cstring(in),
579596
pushval_morph,
@@ -603,6 +620,12 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
603620
MorphOpaquedata;
604621

605622
data.cfg_id=PG_GETARG_OID(0);
623+
624+
/*
625+
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
626+
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
627+
* matching of all words independently on their positions.
628+
*/
606629
data.qoperator=OP_AND;
607630

608631
query=parse_tsquery(text_to_cstring(in),
@@ -634,6 +657,12 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
634657
MorphOpaquedata;
635658

636659
data.cfg_id=PG_GETARG_OID(0);
660+
661+
/*
662+
* parse_tsquery() with P_TSQ_PLAIN flag takes the whole input text as a
663+
* single morph. Passing OP_PHRASE as a qoperator makes tsquery require
664+
* matching of word positions.
665+
*/
637666
data.qoperator=OP_PHRASE;
638667

639668
query=parse_tsquery(text_to_cstring(in),
@@ -665,7 +694,13 @@ websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
665694

666695
data.cfg_id=PG_GETARG_OID(0);
667696

668-
data.qoperator=OP_AND;
697+
/*
698+
* Passing OP_PHRASE as a qoperator makes tsquery require matching of word
699+
* positions of a complex morph exactly match the tsvector. Also, when
700+
* the complex morphs are given in quotes, we connect all their words into
701+
* the OP_PHRASE sequence.
702+
*/
703+
data.qoperator=OP_PHRASE;
669704

670705
query=parse_tsquery(text_to_cstring(in),
671706
pushval_morph,

‎src/test/regress/expected/tsearch.out

Lines changed: 76 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,63 +1997,63 @@ ALTER TABLE test_tsquery ADD COLUMN keyword tsquery;
19971997
UPDATE test_tsquery SET keyword = to_tsquery('english', txtkeyword);
19981998
ALTER TABLE test_tsquery ADD COLUMN sample tsquery;
19991999
UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
2000-
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new& york';
2000+
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new<-> york';
20012001
count
20022002
-------
20032003
2
20042004
(1 row)
20052005

2006-
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new& york';
2006+
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new<-> york';
20072007
count
20082008
-------
20092009
3
20102010
(1 row)
20112011

2012-
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new& york';
2012+
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new<-> york';
20132013
count
20142014
-------
20152015
1
20162016
(1 row)
20172017

2018-
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new& york';
2018+
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new<-> york';
20192019
count
20202020
-------
20212021
4
20222022
(1 row)
20232023

2024-
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new& york';
2024+
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new<-> york';
20252025
count
20262026
-------
20272027
3
20282028
(1 row)
20292029

20302030
CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
20312031
SET enable_seqscan=OFF;
2032-
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new& york';
2032+
SELECT COUNT(*) FROM test_tsquery WHERE keyword < 'new<-> york';
20332033
count
20342034
-------
20352035
2
20362036
(1 row)
20372037

2038-
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new& york';
2038+
SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new<-> york';
20392039
count
20402040
-------
20412041
3
20422042
(1 row)
20432043

2044-
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new& york';
2044+
SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new<-> york';
20452045
count
20462046
-------
20472047
1
20482048
(1 row)
20492049

2050-
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new& york';
2050+
SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new<-> york';
20512051
count
20522052
-------
20532053
4
20542054
(1 row)
20552055

2056-
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new& york';
2056+
SELECT COUNT(*) FROM test_tsquery WHERE keyword > 'new<-> york';
20572057
count
20582058
-------
20592059
3
@@ -2085,10 +2085,10 @@ SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::
20852085
'hotel' & ( 'moskva' | 'moscow' )
20862086
(1 row)
20872087

2088-
SELECT ts_rewrite('bar &new &qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text );
2089-
ts_rewrite
2090-
---------------------------------------------------------------------------------
2091-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2088+
SELECT ts_rewrite('bar & qq & foo &(new <->york)', 'SELECT keyword, sample FROM test_tsquery'::text );
2089+
ts_rewrite
2090+
-------------------------------------------------------------------------------------
2091+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
20922092
(1 row)
20932093

20942094
SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
@@ -2103,10 +2103,10 @@ SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery')
21032103
'hotel' & ( 'moskva' | 'moscow' )
21042104
(1 row)
21052105

2106-
SELECT ts_rewrite( 'bar &new &qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
2107-
ts_rewrite
2108-
---------------------------------------------------------------------------------
2109-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2106+
SELECT ts_rewrite( 'bar & qq & foo &(new <->york)', 'SELECT keyword, sample FROM test_tsquery');
2107+
ts_rewrite
2108+
-------------------------------------------------------------------------------------
2109+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
21102110
(1 row)
21112111

21122112
SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
@@ -2149,9 +2149,9 @@ NOTICE: text-search query doesn't contain lexemes: ""
21492149
(1 row)
21502150

21512151
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
2152-
keyword
2153-
----------------
2154-
'new'& 'york'
2152+
keyword
2153+
------------------
2154+
'new'<-> 'york'
21552155
(1 row)
21562156

21572157
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
@@ -2183,10 +2183,10 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
21832183
'hotel' & ( 'moskva' | 'moscow' )
21842184
(1 row)
21852185

2186-
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new &qq & foo & york') AS query;
2187-
ts_rewrite
2188-
---------------------------------------------------------------------------------
2189-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2186+
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo &(new <->york)') AS query;
2187+
ts_rewrite
2188+
-------------------------------------------------------------------------------------
2189+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
21902190
(1 row)
21912191

21922192
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -2201,18 +2201,18 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
22012201
'hotel' & ( 'moskva' | 'moscow' )
22022202
(1 row)
22032203

2204-
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar &new &qq & foo & york') AS query;
2205-
ts_rewrite
2206-
---------------------------------------------------------------------------------
2207-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2204+
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo &(new <->york)') AS query;
2205+
ts_rewrite
2206+
-------------------------------------------------------------------------------------
2207+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
22082208
(1 row)
22092209

22102210
CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
22112211
SET enable_seqscan=OFF;
22122212
SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
2213-
keyword
2214-
----------------
2215-
'new'& 'york'
2213+
keyword
2214+
------------------
2215+
'new'<-> 'york'
22162216
(1 row)
22172217

22182218
SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
@@ -2244,10 +2244,10 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
22442244
'hotel' & ( 'moskva' | 'moscow' )
22452245
(1 row)
22462246

2247-
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar &new &qq & foo & york') AS query;
2248-
ts_rewrite
2249-
---------------------------------------------------------------------------------
2250-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2247+
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo &(new <->york)') AS query;
2248+
ts_rewrite
2249+
-------------------------------------------------------------------------------------
2250+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
22512251
(1 row)
22522252

22532253
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -2262,10 +2262,10 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
22622262
'hotel' & ( 'moskva' | 'moscow' )
22632263
(1 row)
22642264

2265-
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new &qq & foo & york') AS query;
2266-
ts_rewrite
2267-
---------------------------------------------------------------------------------
2268-
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'& 'appl' | 'new'& 'york' )
2265+
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & qq & foo &(new <->york)') AS query;
2266+
ts_rewrite
2267+
-------------------------------------------------------------------------------------
2268+
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big'<-> 'appl' | 'new'<-> 'york' )
22692269
(1 row)
22702270

22712271
SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
@@ -2456,19 +2456,19 @@ select websearch_to_tsquery('simple', 'fat:A : cat:B');
24562456
select websearch_to_tsquery('simple', 'fat*rat');
24572457
websearch_to_tsquery
24582458
----------------------
2459-
'fat'& 'rat'
2459+
'fat'<-> 'rat'
24602460
(1 row)
24612461

24622462
select websearch_to_tsquery('simple', 'fat-rat');
2463-
websearch_to_tsquery
2464-
---------------------------
2465-
'fat-rat'& 'fat'& 'rat'
2463+
websearch_to_tsquery
2464+
-------------------------------
2465+
'fat-rat'<-> 'fat'<-> 'rat'
24662466
(1 row)
24672467

24682468
select websearch_to_tsquery('simple', 'fat_rat');
24692469
websearch_to_tsquery
24702470
----------------------
2471-
'fat'& 'rat'
2471+
'fat'<-> 'rat'
24722472
(1 row)
24732473

24742474
-- weights are completely ignored
@@ -2665,64 +2665,64 @@ select websearch_to_tsquery('simple', 'abc OR1234');
26652665
(1 row)
26662666

26672667
select websearch_to_tsquery('simple', 'abc or-abc');
2668-
websearch_to_tsquery
2669-
---------------------------------
2670-
'abc' & 'or-abc'& 'or'& 'abc'
2668+
websearch_to_tsquery
2669+
-------------------------------------
2670+
'abc' & 'or-abc'<-> 'or'<-> 'abc'
26712671
(1 row)
26722672

26732673
select websearch_to_tsquery('simple', 'abc OR_abc');
2674-
websearch_to_tsquery
2675-
----------------------
2676-
'abc' & 'or'& 'abc'
2674+
websearch_to_tsquery
2675+
------------------------
2676+
'abc' & 'or'<-> 'abc'
26772677
(1 row)
26782678

26792679
-- test quotes
26802680
select websearch_to_tsquery('english', '"pg_class pg');
2681-
websearch_to_tsquery
2682-
-----------------------
2683-
'pg'& 'class' & 'pg'
2681+
websearch_to_tsquery
2682+
-------------------------
2683+
'pg'<-> 'class' & 'pg'
26842684
(1 row)
26852685

26862686
select websearch_to_tsquery('english', 'pg_class pg"');
2687-
websearch_to_tsquery
2688-
-----------------------
2689-
'pg'& 'class' & 'pg'
2687+
websearch_to_tsquery
2688+
-------------------------
2689+
'pg'<-> 'class' & 'pg'
26902690
(1 row)
26912691

26922692
select websearch_to_tsquery('english', '"pg_class pg"');
2693-
websearch_to_tsquery
2694-
-----------------------------
2695-
('pg'& 'class' ) <-> 'pg'
2693+
websearch_to_tsquery
2694+
---------------------------
2695+
'pg'<-> 'class' <-> 'pg'
26962696
(1 row)
26972697

26982698
select websearch_to_tsquery('english', 'abc "pg_class pg"');
2699-
websearch_to_tsquery
2700-
-------------------------------------
2701-
'abc' &('pg'& 'class' ) <-> 'pg'
2699+
websearch_to_tsquery
2700+
-----------------------------------
2701+
'abc' & 'pg'<-> 'class' <-> 'pg'
27022702
(1 row)
27032703

27042704
select websearch_to_tsquery('english', '"pg_class pg" def');
2705-
websearch_to_tsquery
2706-
-------------------------------------
2707-
('pg'& 'class' ) <-> 'pg' & 'def'
2705+
websearch_to_tsquery
2706+
-----------------------------------
2707+
'pg'<-> 'class' <-> 'pg' & 'def'
27082708
(1 row)
27092709

27102710
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
2711-
websearch_to_tsquery
2712-
------------------------------------------------------
2713-
'abc' & 'pg' <-> ( 'pg'& 'class' ) <-> 'pg' & 'def'
2711+
websearch_to_tsquery
2712+
--------------------------------------------------------
2713+
'abc' & 'pg' <-> ( 'pg'<-> 'class' ) <-> 'pg' & 'def'
27142714
(1 row)
27152715

27162716
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
2717-
websearch_to_tsquery
2718-
--------------------------------------
2719-
'pg' <-> ( 'pg'& 'class' ) <-> 'pg'
2717+
websearch_to_tsquery
2718+
----------------------------------------
2719+
'pg' <-> ( 'pg'<-> 'class' ) <-> 'pg'
27202720
(1 row)
27212721

27222722
select websearch_to_tsquery('english', '""pg pg_class pg""');
2723-
websearch_to_tsquery
2724-
------------------------------
2725-
'pg' & 'pg'& 'class' & 'pg'
2723+
websearch_to_tsquery
2724+
--------------------------------
2725+
'pg' & 'pg'<-> 'class' & 'pg'
27262726
(1 row)
27272727

27282728
select websearch_to_tsquery('english', 'abc """"" def');
@@ -2829,7 +2829,7 @@ NOTICE: text-search query contains only stop words or doesn't contain lexemes,
28292829
select websearch_to_tsquery('''abc''''def''');
28302830
websearch_to_tsquery
28312831
----------------------
2832-
'abc'& 'def'
2832+
'abc'<-> 'def'
28332833
(1 row)
28342834

28352835
select websearch_to_tsquery('\abc');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp