Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitfd5f28d

Browse files
committed
Merge branch 'PGPRO9_5-phrase_search-CORE-261' into PGPRO9_5
2 parentsa2d3698 +20de682 commitfd5f28d

File tree

26 files changed

+910
-787
lines changed

26 files changed

+910
-787
lines changed

‎contrib/tsearch2/expected/tsearch2.out

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
340340
select 'a | f' < 'b & c'::tsquery;
341341
?column?
342342
----------
343-
f
343+
t
344344
(1 row)
345345

346346
select 'a | ff' < 'b & c'::tsquery;
@@ -445,7 +445,7 @@ set enable_seqscan=on;
445445
select rewrite('foo & bar & qq & new & york', 'new & york'::tsquery, 'big & apple | nyc | new & york & city');
446446
rewrite
447447
------------------------------------------------------------------------------
448-
'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
448+
'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | 'nyc' | 'big' & 'apple' )
449449
(1 row)
450450

451451
select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -463,7 +463,7 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
463463
select rewrite('bar & new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
464464
rewrite
465465
---------------------------------------------------------------------------------
466-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
466+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
467467
(1 row)
468468

469469
select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -481,7 +481,7 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
481481
select rewrite( ARRAY['bar & new & qq & foo & york', keyword, sample] ) from test_tsquery;
482482
rewrite
483483
---------------------------------------------------------------------------------
484-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
484+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
485485
(1 row)
486486

487487
select keyword from test_tsquery where keyword @> 'new';
@@ -522,7 +522,7 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
522522
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
523523
rewrite
524524
---------------------------------------------------------------------------------
525-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
525+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
526526
(1 row)
527527

528528
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -540,7 +540,7 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
540540
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
541541
rewrite
542542
---------------------------------------------------------------------------------
543-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
543+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
544544
(1 row)
545545

546546
create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -583,7 +583,7 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
583583
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where keyword <@ query;
584584
rewrite
585585
---------------------------------------------------------------------------------
586-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
586+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
587587
(1 row)
588588

589589
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -601,7 +601,7 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
601601
select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar & new & qq & foo & york') as query where query @> keyword;
602602
rewrite
603603
---------------------------------------------------------------------------------
604-
( 'nyc' | 'big' &'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
604+
'citi' & 'foo' &( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
605605
(1 row)
606606

607607
set enable_seqscan='on';

‎doc/src/sgml/datatype.sgml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3901,7 +3901,7 @@ SELECT to_tsvector('english', 'The Fat Rats');
39013901
A <type>tsquery</type> value stores lexemes that are to be
39023902
searched for, and combines them honoring the Boolean operators
39033903
<literal>&amp;</literal> (AND), <literal>|</literal> (OR),
3904-
<literal>!</> (NOT) and <literal>?</> (FOLLOWED BY) phrase search
3904+
<literal>!</> (NOT) and <literal>&lt;-&gt;</> (FOLLOWED BY) phrase search
39053905
operator. Parentheses can be used to enforce grouping
39063906
of the operators:
39073907

@@ -3923,7 +3923,7 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
39233923
</programlisting>
39243924

39253925
In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
3926-
and <literal>&amp;</literal> (AND) and <literal>?</literal> (FOLLOWED BY)
3926+
and <literal>&amp;</literal> (AND) and <literal>&lt;-&gt;</literal> (FOLLOWED BY)
39273927
both bind more tightly than <literal>|</literal> (OR).
39283928
</para>
39293929

‎doc/src/sgml/func.sgml

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8962,10 +8962,10 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
89628962
<entry><literal>!'cat'</literal></entry>
89638963
</row>
89648964
<row>
8965-
<entry> <literal>??</literal> </entry>
8965+
<entry> <literal>&lt;-&gt;</literal> </entry>
89668966
<entry><type>tsquery</> followed by <type>tsquery</></entry>
8967-
<entry><literal>to_tsquery('fat')?? to_tsquery('rat')</literal></entry>
8968-
<entry><literal>'fat'? 'rat'</literal></entry>
8967+
<entry><literal>to_tsquery('fat')&lt;-&gt; to_tsquery('rat')</literal></entry>
8968+
<entry><literal>'fat'&lt;-&gt; 'rat'</literal></entry>
89698969
</row>
89708970
<row>
89718971
<entry> <literal>@&gt;</literal> </entry>
@@ -9069,7 +9069,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
90699069
<entry><type>tsquery</type></entry>
90709070
<entry>produce <type>tsquery</> ignoring punctuation</entry>
90719071
<entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
9072-
<entry><literal>'fat'? 'rat'</literal></entry>
9072+
<entry><literal>'fat'&lt;-&gt; 'rat'</literal></entry>
90739073
</row>
90749074
<row>
90759075
<entry>
@@ -9203,9 +9203,9 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92039203
<literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
92049204
</entry>
92059205
<entry><type>tsquery</type></entry>
9206-
<entry>implementation of <literal>??</> (FOLLOWED BY) operator</entry>
9206+
<entry>implementation of <literal>&lt;-&gt;</> (FOLLOWED BY) operator</entry>
92079207
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
9208-
<entry><literal>'fat'? 'cat'</literal></entry>
9208+
<entry><literal>'fat'&lt;-&gt; 'cat'</literal></entry>
92099209
</row>
92109210
<row>
92119211
<entry>
@@ -9214,7 +9214,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
92149214
<entry><type>tsquery</type></entry>
92159215
<entry>phrase-concatenate with distance</entry>
92169216
<entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
9217-
<entry><literal>'fat'?[10] 'cat'</literal></entry>
9217+
<entry><literal>'fat'&lt;10&gt; 'cat'</literal></entry>
92189218
</row>
92199219
<row>
92209220
<entry>

‎doc/src/sgml/textsearch.sgml

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,10 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
263263
As the above example suggests, a <type>tsquery</type> is not just raw
264264
text, any more than a <type>tsvector</type> is. A <type>tsquery</type>
265265
contains search terms, which must be already-normalized lexemes, and
266-
may combine multiple terms using AND, OR, andNOT operators.
266+
may combine multiple terms using AND, OR,NOTandFOLLOWED BY operators.
267267
(For details see <xref linkend="datatype-textsearch">.) There are
268-
functions <function>to_tsquery</> and <function>plainto_tsquery</>
268+
functions <function>to_tsquery</>, <function>plainto_tsquery</>
269+
and <function>phraseto_tsquery</>
269270
that are helpful in converting user-written text into a proper
270271
<type>tsquery</type>, for example by normalizing words appearing in
271272
the text. Similarly, <function>to_tsvector</> is used to parse and
@@ -293,6 +294,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat &amp; rat');
293294
already normalized, so <literal>rats</> does not match <literal>rat</>.
294295
</para>
295296

297+
<para>
298+
Phrase search is made possible with the help of the <literal>&lt;-&gt;</>
299+
(FOLLOWED BY) operator, which enforces lexeme order. This allows you
300+
to discard strings not containing the desired phrase, for example:
301+
302+
<programlisting>
303+
SELECT q @@ to_tsquery('fatal &lt;-&gt; error')
304+
FROM unnest(array[to_tsvector('fatal error'),
305+
to_tsvector('error is not fatal')]) AS q;
306+
?column?
307+
----------
308+
t
309+
f
310+
</programlisting>
311+
312+
A more generic version of the FOLLOWED BY operator takes form of
313+
<literal>&lt;N&gt;</>, where N stands for the greatest allowed distance
314+
between the specified lexemes. The <literal>phraseto_tsquery</>
315+
function makes use of this behavior in order to construct a
316+
<literal>tsquery</> capable of matching the provided phrase:
317+
318+
<programlisting>
319+
SELECT phraseto_tsquery('cat ate some rats');
320+
phraseto_tsquery
321+
-------------------------------
322+
( 'cat' &lt;-&gt; 'ate' ) &lt;2&gt; 'rat'
323+
</programlisting>
324+
</para>
325+
296326
<para>
297327
The <literal>@@</literal> operator also
298328
supports <type>text</type> input, allowing explicit conversion of a text
@@ -732,7 +762,7 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>
732762
<replaceable>querytext</replaceable>, which must consist of single tokens
733763
separated by the Boolean operators <literal>&amp;</literal> (AND),
734764
<literal>|</literal> (OR), <literal>!</literal> (NOT), and also the
735-
<literal>?</literal> (FOLLOWED BY) phrase search operator. These operators
765+
<literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search operator. These operators
736766
can be grouped using parentheses. In other words, the input to
737767
<function>to_tsquery</function> must already follow the general rules for
738768
<type>tsquery</> input, as described in <xref
@@ -842,7 +872,7 @@ phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable>
842872
<para>
843873
<function>phraseto_tsquery</> behaves much like
844874
<function>plainto_tsquery</>, with the exception
845-
that it utilizes the <literal>?</literal> (FOLLOWED BY) phrase search
875+
that it utilizes the <literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search
846876
operator instead of the <literal>&amp;</literal> (AND) Boolean operator.
847877
This is particularly useful when searching for exact lexeme sequences,
848878
since the phrase search operator helps to maintain lexeme order.
@@ -853,9 +883,9 @@ phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable>
853883

854884
<screen>
855885
SELECT phraseto_tsquery('english', 'The Fat Rats');
856-
phraseto_tsquery
886+
phraseto_tsquery
857887
------------------
858-
'fat'? 'rat'
888+
'fat'&lt;-&gt; 'rat'
859889
</screen>
860890

861891
Just like the <function>plainto_tsquery</>, the
@@ -865,9 +895,20 @@ SELECT phraseto_tsquery('english', 'The Fat Rats');
865895

866896
<screen>
867897
SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
868-
phraseto_tsquery
869-
-------------------------
870-
( 'fat' ? 'rat' ) ? 'c'
898+
phraseto_tsquery
899+
-----------------------------
900+
( 'fat' &lt;-&gt; 'rat' ) &lt;-&gt; 'c'
901+
</screen>
902+
903+
It is possible to specify the configuration to be used to parse the document,
904+
for example, we could create a new one using the hunspell dictionary
905+
(namely 'eng_hunspell') in order to match phrases with different word forms:
906+
907+
<screen>
908+
SELECT phraseto_tsquery('eng_hunspell', 'developer of the building which collapsed');
909+
phraseto_tsquery
910+
--------------------------------------------------------------------------------------------
911+
( 'developer' &lt;3&gt; 'building' ) &lt;2&gt; 'collapse' | ( 'developer' &lt;3&gt; 'build' ) &lt;2&gt; 'collapse'
871912
</screen>
872913
</para>
873914

@@ -1430,18 +1471,18 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
14301471
<varlistentry>
14311472

14321473
<term>
1433-
<literal><type>tsquery</>?? <type>tsquery</></literal>
1474+
<literal><type>tsquery</>&lt;-&gt; <type>tsquery</></literal>
14341475
</term>
14351476

14361477
<listitem>
14371478
<para>
14381479
Returns the phrase-concatenation of the two given queries.
14391480

14401481
<screen>
1441-
SELECT to_tsquery('fat')?? to_tsquery('cat | rat');
1442-
?column?
1443-
-------------------------------
1444-
'fat'? 'cat' | 'fat'? 'rat'
1482+
SELECT to_tsquery('fat')&lt;-&gt; to_tsquery('cat | rat');
1483+
?column?
1484+
-----------------------------------
1485+
'fat'&lt;-&gt; 'cat' | 'fat'&lt;-&gt; 'rat'
14451486
</screen>
14461487
</para>
14471488
</listitem>
@@ -1461,13 +1502,13 @@ SELECT to_tsquery('fat') ?? to_tsquery('cat | rat');
14611502
<listitem>
14621503
<para>
14631504
Returns the distanced phrase-concatenation of the two given queries.
1464-
This function lies in the implementation of the <literal>??</> operator.
1505+
This function lies in the implementation of the <literal>&lt;-&gt;</> operator.
14651506

14661507
<screen>
14671508
SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
1468-
tsquery_phrase
1469-
-------------------
1470-
'fat'?[10] 'cat'
1509+
tsquery_phrase
1510+
------------------
1511+
'fat'&lt;10&gt; 'cat'
14711512
</screen>
14721513
</para>
14731514
</listitem>
@@ -1487,10 +1528,10 @@ SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
14871528
<listitem>
14881529
<para>
14891530
<function>setweight</> returns a copy of the input query in which every
1490-
position has been labeled with the given <replaceable>weight</>, either
1491-
<literal>A</literal>, <literal>B</literal>, <literal>C</literal>, or
1492-
<literal>D</literal>. These labels are retained when queries are
1493-
concatenated, allowing words from different parts of a document
1531+
position has been labeled with the given <replaceable>weight</>(s), either
1532+
<literal>A</literal>, <literal>B</literal>, <literal>C</literal>,
1533+
<literal>D</literal> or their combination. These labels are retained when
1534+
queries areconcatenated, allowing words from different parts of a document
14941535
to be weighted differently by ranking functions.
14951536
</para>
14961537

‎src/backend/tsearch/to_tsany.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
#include"utils/builtins.h"
2020

2121

22+
typedefstructMorphOpaque
23+
{
24+
Oidcfg_id;
25+
intqoperator;/* query operator */
26+
}MorphOpaque;
27+
28+
2229
Datum
2330
get_current_ts_config(PG_FUNCTION_ARGS)
2431
{
@@ -254,11 +261,6 @@ to_tsvector(PG_FUNCTION_ARGS)
254261
* to_tsquery
255262
*/
256263

257-
typedefstructMorphOpaque
258-
{
259-
Oidcfg_id;
260-
intqoperator;/* query operator */
261-
}MorphOpaque;
262264

263265
/*
264266
* This function is used for morph parsing.
@@ -268,7 +270,7 @@ typedef struct MorphOpaque
268270
* to the stack.
269271
*
270272
* All words belonging to the same variant are pushed as an ANDed list,
271-
* and different variants areORred together.
273+
* and different variants areORed together.
272274
*/
273275
staticvoid
274276
pushval_morph(Datumopaque,TSQueryParserStatestate,char*strval,intlenval,int16weight,boolprefix)
@@ -293,11 +295,15 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
293295
{
294296
while (count<prs.curwords)
295297
{
298+
/*
299+
* Were any stop words removed? If so, fill empty positions
300+
* with placeholders linked by an appropriate operator.
301+
*/
296302
if (pos>0&&pos+1<prs.words[count].pos.pos)
297303
{
298304
while (pos+1<prs.words[count].pos.pos)
299305
{
300-
/* put placeholders for each stop word */
306+
/* put placeholders for eachmissingstop word */
301307
pushStop(state);
302308
if (cntpos)
303309
pushOperator(state,data->qoperator,1);
@@ -306,25 +312,25 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
306312
}
307313
}
308314

309-
pos=prs.words[count].pos.pos;
315+
pos=prs.words[count].pos.pos;/* save current word's position */
316+
317+
/* Go through all variants obtained from this token */
310318
cntvar=0;
311319
while (count<prs.curwords&&pos==prs.words[count].pos.pos)
312320
{
313321
variant=prs.words[count].nvariant;
314322

323+
/* Push all words belonging to the same variant */
315324
cnt=0;
316325
while (count<prs.curwords&&
317326
pos==prs.words[count].pos.pos&&
318327
variant==prs.words[count].nvariant)
319328
{
320-
321329
pushValue(state,
322330
prs.words[count].word,
323331
prs.words[count].len,
324332
weight,
325-
((prs.words[count].flags&TSL_PREFIX)||prefix) ?
326-
true :
327-
false);
333+
((prs.words[count].flags&TSL_PREFIX)||prefix));
328334
pfree(prs.words[count].word);
329335
if (cnt)
330336
pushOperator(state,OP_AND,0);
@@ -338,11 +344,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
338344
}
339345

340346
if (cntpos)
341-
pushOperator(state,data->qoperator,1);
347+
pushOperator(state,data->qoperator,1);/* distance may be useful */
342348
cntpos++;
343349
}
344350

345351
pfree(prs.words);
352+
346353
}
347354
else
348355
pushStop(state);

‎src/backend/tsearch/ts_parse.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,6 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
498498

499499
while (lexs)
500500
{
501-
502501
if (lexs->type>0)
503502
hladdword(prs,lexs->lemm,lexs->lenlemm,lexs->type);
504503

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp