Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita15127b

Browse files
committed
Merge branch 'PGPRO9_5_phrase_search' into PGPRO9_5
Merge phrase search functionality.
2 parentsdddbbc4 +249bfc7 commita15127b

29 files changed

+2412
-386
lines changed

‎contrib/tsearch2/expected/tsearch2.out

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
278278
(1 row)
279279

280280
SELECT '1|(2|(4|(5|6)))'::tsquery;
281-
tsquery
282-
-----------------------------------------
283-
'1' |('2' |('4' |('5' | '6' ) ) )
281+
tsquery
282+
-----------------------------
283+
'1' | '2' | '4' | '5' | '6'
284284
(1 row)
285285

286286
SELECT '1|2|4|5|6'::tsquery;
287-
tsquery
288-
-----------------------------------------
289-
( ( ('1' | '2')| '4')| '5' ) | '6'
287+
tsquery
288+
-----------------------------
289+
'1' | '2' | '4' | '5' | '6'
290290
(1 row)
291291

292292
SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
340340
select 'a | f' < 'b & c'::tsquery;
341341
?column?
342342
----------
343-
t
343+
f
344344
(1 row)
345345

346346
select 'a | ff' < 'b & c'::tsquery;
@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword > 'new & york';
443443

444444
set enable_seqscan=on;
445445
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
446-
rewrite
447-
----------------------------------------------------------------------------------
448-
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' |( 'nyc' | 'big' & 'apple' ) )
446+
rewrite
447+
------------------------------------------------------------------------------
448+
'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' |'city' & 'new' & 'york' )
449449
(1 row)
450450

451451
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
461461
(1 row)
462462

463463
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
464-
rewrite
465-
-------------------------------------------------------------------------------------
466-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
464+
rewrite
465+
---------------------------------------------------------------------------------
466+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
467467
(1 row)
468468

469469
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
479479
(1 row)
480480

481481
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
482-
rewrite
483-
-------------------------------------------------------------------------------------
484-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
482+
rewrite
483+
---------------------------------------------------------------------------------
484+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
485485
(1 row)
486486

487487
select keyword from test_tsquery where keyword @> 'new';
@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
520520
(1 row)
521521

522522
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
523-
rewrite
524-
-------------------------------------------------------------------------------------
525-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
523+
rewrite
524+
---------------------------------------------------------------------------------
525+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
526526
(1 row)
527527

528528
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
538538
(1 row)
539539

540540
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
541-
rewrite
542-
-------------------------------------------------------------------------------------
543-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
541+
rewrite
542+
---------------------------------------------------------------------------------
543+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
544544
(1 row)
545545

546546
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
581581
(1 row)
582582

583583
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
584-
rewrite
585-
-------------------------------------------------------------------------------------
586-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
584+
rewrite
585+
---------------------------------------------------------------------------------
586+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
587587
(1 row)
588588

589589
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
599599
(1 row)
600600

601601
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
602-
rewrite
603-
-------------------------------------------------------------------------------------
604-
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
602+
rewrite
603+
---------------------------------------------------------------------------------
604+
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
605605
(1 row)
606606

607607
set enable_seqscan='on';

‎doc/src/sgml/datatype.sgml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3900,8 +3900,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
39003900
<para>
39013901
A <type>tsquery</type> value stores lexemes that are to be
39023902
searched for, and combines them honoring the Boolean operators
3903-
<literal>&amp;</literal> (AND), <literal>|</literal> (OR), and
3904-
<literal>!</> (NOT). Parentheses can be used to enforce grouping
3903+
<literal>&amp;</literal> (AND), <literal>|</literal> (OR),
3904+
<literal>!</> (NOT) and <literal>?</> (FOLLOWED BY) phrase search
3905+
operator. Parentheses can be used to enforce grouping
39053906
of the operators:
39063907

39073908
<programlisting>
@@ -3922,8 +3923,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
39223923
</programlisting>
39233924

39243925
In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
3925-
and <literal>&amp;</literal> (AND)binds more tightly than
3926-
<literal>|</literal> (OR).
3926+
and <literal>&amp;</literal> (AND)and <literal>?</literal> (FOLLOWED BY)
3927+
both bind more tightly than<literal>|</literal> (OR).
39273928
</para>
39283929

39293930
<para>

‎doc/src/sgml/func.sgml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8961,6 +8961,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
89618961
<entry><literal>!! 'cat'::tsquery</literal></entry>
89628962
<entry><literal>!'cat'</literal></entry>
89638963
</row>
8964+
<row>
8965+
<entry> <literal>??</literal> </entry>
8966+
<entry><type>tsquery</> followed by <type>tsquery</></entry>
8967+
<entry><literal>to_tsquery('fat') ?? to_tsquery('rat')</literal></entry>
8968+
<entry><literal>'fat' ? 'rat'</literal></entry>
8969+
</row>
89648970
<row>
89658971
<entry> <literal>@&gt;</literal> </entry>
89668972
<entry><type>tsquery</> contains another ?</entry>
@@ -9053,6 +9059,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
90539059
<entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry>
90549060
<entry><literal>'fat' &amp; 'rat'</literal></entry>
90559061
</row>
9062+
<row>
9063+
<entry>
9064+
<indexterm>
9065+
<primary>phraseto_tsquery</primary>
9066+
</indexterm>
9067+
<literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
9068+
</entry>
9069+
<entry><type>tsquery</type></entry>
9070+
<entry>produce <type>tsquery</> ignoring punctuation</entry>
9071+
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
9072+
<entry><literal>'fat' ? 'rat'</literal></entry>
9073+
</row>
90569074
<row>
90579075
<entry>
90589076
<indexterm>
@@ -9077,6 +9095,15 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
90779095
<entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A')</literal></entry>
90789096
<entry><literal>'cat':3A 'fat':2A,4A 'rat':5A</literal></entry>
90799097
</row>
9098+
<row>
9099+
<entry>
9100+
<literal><function>setweight(<type>tsquery</>, <type>"char"</>)</function></literal>
9101+
</entry>
9102+
<entry><type>tsquery</type></entry>
9103+
<entry>add weight to each element of <type>tsquery</></entry>
9104+
<entry><literal>setweight('fat ? cat &amp; rat:B'::tsquery, 'A')</literal></entry>
9105+
<entry><literal>( 'fat':A ? 'cat':A ) &amp; 'rat':AB</literal></entry>
9106+
</row>
90809107
<row>
90819108
<entry>
90829109
<indexterm>
@@ -9168,6 +9195,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
91689195
<entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry>
91699196
<entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry>
91709197
</row>
9198+
<row>
9199+
<entry>
9200+
<indexterm>
9201+
<primary>tsquery_phrase</primary>
9202+
</indexterm>
9203+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
9204+
</entry>
9205+
<entry><type>tsquery</type></entry>
9206+
<entry>implementation of <literal>??</> (FOLLOWED BY) operator</entry>
9207+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
9208+
<entry><literal>'fat' ? 'cat'</literal></entry>
9209+
</row>
9210+
<row>
9211+
<entry>
9212+
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
9213+
</entry>
9214+
<entry><type>tsquery</type></entry>
9215+
<entry>phrase-concatenate with distance</entry>
9216+
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
9217+
<entry><literal>'fat' ?[10] 'cat'</literal></entry>
9218+
</row>
91719219
<row>
91729220
<entry>
91739221
<indexterm>

‎doc/src/sgml/textsearch.sgml

Lines changed: 127 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -709,11 +709,14 @@ UPDATE tt SET ti =
709709

710710
<para>
711711
<productname>&productname;</productname> provides the
712-
functions <function>to_tsquery</function> and
713-
<function>plainto_tsquery</function> for converting a query to
714-
the <type>tsquery</type> data type. <function>to_tsquery</function>
715-
offers access to more features than <function>plainto_tsquery</function>,
716-
but is less forgiving about its input.
712+
functions <function>to_tsquery</function>,
713+
<function>plainto_tsquery</function> and
714+
<function>phraseto_tsquery</function>
715+
for converting a query to the <type>tsquery</type> data type.
716+
<function>to_tsquery</function> offers access to more features
717+
than both <function>plainto_tsquery</function> and
718+
<function>phraseto_tsquery</function>, but is less forgiving
719+
about its input.
717720
</para>
718721

719722
<indexterm>
@@ -728,7 +731,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>
728731
<function>to_tsquery</function> creates a <type>tsquery</> value from
729732
<replaceable>querytext</replaceable>, which must consist of single tokens
730733
separated by the Boolean operators <literal>&amp;</literal> (AND),
731-
<literal>|</literal> (OR) and <literal>!</literal> (NOT). These operators
734+
<literal>|</literal> (OR), <literal>!</literal> (NOT), and also the
735+
<literal>?</literal> (FOLLOWED BY) phrase search operator. These operators
732736
can be grouped using parentheses. In other words, the input to
733737
<function>to_tsquery</function> must already follow the general rules for
734738
<type>tsquery</> input, as described in <xref
@@ -814,8 +818,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats');
814818
</screen>
815819

816820
Note that <function>plainto_tsquery</> cannot
817-
recognize Boolean operators, weight labels, or prefix-match labels
818-
in its input:
821+
recognize Booleanand phrase searchoperators, weight labels,
822+
or prefix-match labelsin its input:
819823

820824
<screen>
821825
SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
@@ -827,6 +831,46 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
827831
Here, all the input punctuation was discarded as being space symbols.
828832
</para>
829833

834+
<indexterm>
835+
<primary>phraseto_tsquery</primary>
836+
</indexterm>
837+
838+
<synopsis>
839+
phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</>
840+
</synopsis>
841+
842+
<para>
843+
<function>phraseto_tsquery</> behaves much like
844+
<function>plainto_tsquery</>, with the exception
845+
that it utilizes the <literal>?</literal> (FOLLOWED BY) phrase search
846+
operator instead of the <literal>&amp;</literal> (AND) Boolean operator.
847+
This is particularly useful when searching for exact lexeme sequences,
848+
since the phrase search operator helps to maintain lexeme order.
849+
</para>
850+
851+
<para>
852+
Example:
853+
854+
<screen>
855+
SELECT phraseto_tsquery('english', 'The Fat Rats');
856+
phraseto_tsquery
857+
------------------
858+
'fat' ? 'rat'
859+
</screen>
860+
861+
Just like the <function>plainto_tsquery</>, the
862+
<function>phraseto_tsquery</> function cannot
863+
recognize Boolean and phrase search operators, weight labels,
864+
or prefix-match labels in its input:
865+
866+
<screen>
867+
SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
868+
phraseto_tsquery
869+
-------------------------
870+
( 'fat' ? 'rat' ) ? 'c'
871+
</screen>
872+
</para>
873+
830874
</sect2>
831875

832876
<sect2 id="textsearch-ranking">
@@ -1383,6 +1427,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
13831427

13841428
</varlistentry>
13851429

1430+
<varlistentry>
1431+
1432+
<term>
1433+
<literal><type>tsquery</> ?? <type>tsquery</></literal>
1434+
</term>
1435+
1436+
<listitem>
1437+
<para>
1438+
Returns the phrase-concatenation of the two given queries.
1439+
1440+
<screen>
1441+
SELECT to_tsquery('fat') ?? to_tsquery('cat | rat');
1442+
?column?
1443+
-------------------------------
1444+
'fat' ? 'cat' | 'fat' ? 'rat'
1445+
</screen>
1446+
</para>
1447+
</listitem>
1448+
1449+
</varlistentry>
1450+
1451+
<varlistentry>
1452+
1453+
<term>
1454+
<indexterm>
1455+
<primary>tsquery_phrase</primary>
1456+
</indexterm>
1457+
1458+
<literal>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</> [, <replaceable class="PARAMETER">distance</replaceable> <type>integer</> ]) returns <type>tsquery</></literal>
1459+
</term>
1460+
1461+
<listitem>
1462+
<para>
1463+
Returns the distanced phrase-concatenation of the two given queries.
1464+
This function lies in the implementation of the <literal>??</> operator.
1465+
1466+
<screen>
1467+
SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
1468+
tsquery_phrase
1469+
-------------------
1470+
'fat' ?[10] 'cat'
1471+
</screen>
1472+
</para>
1473+
</listitem>
1474+
1475+
</varlistentry>
1476+
1477+
<varlistentry>
1478+
1479+
<term>
1480+
<indexterm>
1481+
<primary>setweight</primary>
1482+
</indexterm>
1483+
1484+
<literal>setweight(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>) returns <type>tsquery</></literal>
1485+
</term>
1486+
1487+
<listitem>
1488+
<para>
1489+
<function>setweight</> returns a copy of the input query in which every
1490+
position has been labeled with the given <replaceable>weight</>, either
1491+
<literal>A</literal>, <literal>B</literal>, <literal>C</literal>, or
1492+
<literal>D</literal>. These labels are retained when queries are
1493+
concatenated, allowing words from different parts of a document
1494+
to be weighted differently by ranking functions.
1495+
</para>
1496+
1497+
<para>
1498+
Note that weight labels apply to <emphasis>positions</>, not
1499+
<emphasis>lexemes</>. If the input query has been stripped of
1500+
positions then <function>setweight</> does nothing.
1501+
</para>
1502+
</listitem>
1503+
</varlistentry>
1504+
13861505
<varlistentry>
13871506

13881507
<term>

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp