Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitaf01c25

Browse files
feodordmpgpro
authored andcommitted
Make exact distance match for FTS phrase operator
Phrase operator now requires exact distance betweens lexems instead ofless-or-equal.Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.comConflicts:doc/src/sgml/textsearch.sgmlsrc/backend/utils/adt/tsvector_op.c
1 parentf4e9a02 commitaf01c25

File tree

4 files changed

+122
-52
lines changed

4 files changed

+122
-52
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,57 @@ text @@ text
344344
The form <type>text</type> <literal>@@</literal> <type>text</type>
345345
is equivalent to <literal>to_tsvector(x) @@ plainto_tsquery(y)</literal>.
346346
</para>
347+
348+
<para>
349+
Within a <type>tsquery</>, the <literal>&amp;</literal> (AND) operator
350+
specifies that both its arguments must appear in the document to have a
351+
match. Similarly, the <literal>|</literal> (OR) operator specifies that
352+
at least one of its arguments must appear, while the <literal>!</> (NOT)
353+
operator specifies that its argument must <emphasis>not</> appear in
354+
order to have a match. Parentheses can be used to control nesting of
355+
these operators.
356+
</para>
357+
358+
<para>
359+
Searching for phrases is possible with the help of
360+
the <literal>&lt;-&gt;</> (FOLLOWED BY) <type>tsquery</> operator, which
361+
matches only if its arguments have matches that are adjacent and in the
362+
given order. For example:
363+
364+
<programlisting>
365+
SELECT to_tsvector('fatal error') @@ to_tsquery('fatal &lt;-&gt; error');
366+
?column?
367+
----------
368+
t
369+
370+
SELECT to_tsvector('error is not fatal') @@ to_tsquery('fatal &lt;-&gt; error');
371+
?column?
372+
----------
373+
f
374+
</programlisting>
375+
376+
There is a more general version of the FOLLOWED BY operator having the
377+
form <literal>&lt;<replaceable>N</>&gt;</literal>,
378+
where <replaceable>N</> is an integer standing for the exact distance
379+
allowed between the matching lexemes. <literal>&lt;1&gt;</literal> is
380+
the same as <literal>&lt;-&gt;</>, while <literal>&lt;2&gt;</literal>
381+
allows one other lexeme to appear between the matches, and so
382+
on. The <literal>phraseto_tsquery</> function makes use of this
383+
operator to construct a <literal>tsquery</> that can match a multi-word
384+
phrase when some of the words are stop words. For example:
385+
386+
<programlisting>
387+
SELECT phraseto_tsquery('cats ate rats');
388+
phraseto_tsquery
389+
-------------------------------
390+
( 'cat' &lt;-&gt; 'ate' ) &lt;-&gt; 'rat'
391+
392+
SELECT phraseto_tsquery('the cats ate the rats');
393+
phraseto_tsquery
394+
-------------------------------
395+
( 'cat' &lt;-&gt; 'ate' ) &lt;2&gt; 'rat'
396+
</programlisting>
397+
</para>
347398
</sect2>
348399

349400
<sect2 id="textsearch-intro-configurations">
@@ -1501,8 +1552,11 @@ SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
15011552

15021553
<listitem>
15031554
<para>
1504-
Returns the distanced phrase-concatenation of the two given queries.
1505-
This function lies in the implementation of the <literal>&lt;-&gt;</> operator.
1555+
Returns a query that searches for a match to the first given query
1556+
followed by a match to the second given query at a distance of at
1557+
<replaceable>distance</replaceable> lexemes, using
1558+
the <literal>&lt;<replaceable>N</>&gt;</literal>
1559+
<type>tsquery</> operator. For example:
15061560

15071561
<screen>
15081562
SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);

‎src/backend/utils/adt/tsvector_op.c

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -788,11 +788,12 @@ TS_phrase_execute(QueryItem *curitem,
788788
}
789789
else
790790
{
791-
ExecPhraseDataLdata= {0, false,NULL},
792-
Rdata= {0, false,NULL};
793-
WordEntryPos*Lpos,
794-
*Rpos,
795-
*pos_iter=NULL;
791+
ExecPhraseDataLdata= {0, false,NULL},
792+
Rdata= {0, false,NULL};
793+
WordEntryPos*Lpos,
794+
*LposStart,
795+
*Rpos,
796+
*pos_iter=NULL;
796797

797798
Assert(curitem->qoperator.oper==OP_PHRASE);
798799

@@ -830,52 +831,60 @@ TS_phrase_execute(QueryItem *curitem,
830831
pos_iter=data->pos;
831832
}
832833

833-
Lpos=Ldata.pos;
834-
Rpos=Rdata.pos;
835-
836834
/*
837835
* Find matches by distance, WEP_GETPOS() is needed because
838836
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector
839837
*/
840838

839+
Rpos=Rdata.pos;
840+
LposStart=Ldata.pos;
841841
while (Rpos<Rdata.pos+Rdata.npos)
842842
{
843+
/*
844+
* We need to check all possible distances, so reset Lpos
845+
* to guranteed not yet satisfied position.
846+
*/
847+
Lpos=LposStart;
843848
while (Lpos<Ldata.pos+Ldata.npos)
844849
{
845-
if (WEP_GETPOS(*Lpos) <=WEP_GETPOS(*Rpos))
850+
if (WEP_GETPOS(*Rpos)-WEP_GETPOS(*Lpos)==
851+
curitem->qoperator.distance)
846852
{
847-
/*
848-
* Lpos is behind the Rpos, so we have to check the
849-
* distance condition
850-
*/
851-
if (WEP_GETPOS(*Rpos)-WEP_GETPOS(*Lpos) <=curitem->qoperator.distance)
853+
/* MATCH! */
854+
if (data)
852855
{
853-
/* MATCH! */
854-
if (data)
855-
{
856-
*pos_iter=WEP_GETPOS(*Rpos);
857-
pos_iter++;
858-
859-
break;/* We need to build a unique result
860-
* array, so go to the next Rpos */
861-
}
862-
else
863-
{
864-
/*
865-
* We are in the root of the phrase tree and hence
866-
* we don't have to store the resulting positions
867-
*/
868-
return true;
869-
}
856+
/* Store position for upper phrase operator */
857+
*pos_iter=WEP_GETPOS(*Rpos);
858+
pos_iter++;
859+
860+
/*
861+
* Set left start position to next, because current one
862+
* could not satisfy distance for any other right
863+
* position
864+
*/
865+
LposStart=Lpos+1;
866+
break;
870867
}
868+
else
869+
{
870+
/*
871+
* We are in the root of the phrase tree and hence
872+
* we don't have to store the resulting positions
873+
*/
874+
return true;
875+
}
876+
871877
}
872-
else
878+
elseif (WEP_GETPOS(*Rpos) <=WEP_GETPOS(*Lpos)||
879+
WEP_GETPOS(*Rpos)-WEP_GETPOS(*Lpos)<
880+
curitem->qoperator.distance)
873881
{
874882
/*
875-
* Go to the next Rpos, because Lpos
876-
*is ahead of the currentRpos
883+
* Go to the next Rpos, because Lpos is ahead or on less
884+
*distance than required by currentoperator
877885
*/
878886
break;
887+
879888
}
880889

881890
Lpos++;

‎src/test/regress/expected/tstypes.out

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -683,10 +683,10 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
683683
t
684684
(1 row)
685685

686-
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
687-
true
688-
------
689-
t
686+
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
687+
false
688+
-------
689+
f
690690
(1 row)
691691

692692
SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
@@ -701,6 +701,12 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
701701
t
702702
(1 row)
703703

704+
SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
705+
true
706+
------
707+
t
708+
(1 row)
709+
704710
SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
705711
false
706712
-------
@@ -915,7 +921,7 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
915921
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
916922
ts_rank_cd
917923
------------
918-
0.0714286
924+
0
919925
(1 row)
920926

921927
SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
@@ -942,10 +948,10 @@ SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
942948
t
943949
(1 row)
944950

945-
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
946-
true
947-
------
948-
t
951+
SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
952+
false
953+
-------
954+
f
949955
(1 row)
950956

951957
SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
@@ -972,9 +978,9 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
972978
t
973979
(1 row)
974980

975-
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
976-
true
977-
------
978-
t
981+
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
982+
false
983+
-------
984+
f
979985
(1 row)
980986

‎src/test/regress/sql/tstypes.sql

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,10 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
133133

134134
--phrase search
135135
SELECT to_tsvector('simple','1 2 3 1') @@'1 <-> 2'AS"true";
136-
SELECT to_tsvector('simple','1 2 3 1') @@'1 <2> 2'AS"true";
136+
SELECT to_tsvector('simple','1 2 3 1') @@'1 <2> 2'AS"false";
137137
SELECT to_tsvector('simple','1 2 3 1') @@'1 <-> 3'AS"false";
138138
SELECT to_tsvector('simple','1 2 3 1') @@'1 <2> 3'AS"true";
139+
SELECT to_tsvector('simple','1 2 1 2') @@'1 <3> 2'AS"true";
139140

140141
SELECT to_tsvector('simple','1 2 11 3') @@'1 <-> 3'AS"false";
141142
SELECT to_tsvector('simple','1 2 11 3') @@'1:* <-> 3'AS"true";
@@ -183,10 +184,10 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
183184
SELECT'a:1 b:2'::tsvector @@'a <-> b'::tsqueryAS"true";
184185
SELECT'a:1 b:2'::tsvector @@'a <0> b'::tsqueryAS"false";
185186
SELECT'a:1 b:2'::tsvector @@'a <1> b'::tsqueryAS"true";
186-
SELECT'a:1 b:2'::tsvector @@'a <2> b'::tsqueryAS"true";
187+
SELECT'a:1 b:2'::tsvector @@'a <2> b'::tsqueryAS"false";
187188
SELECT'a:1 b:3'::tsvector @@'a <-> b'::tsqueryAS"false";
188189
SELECT'a:1 b:3'::tsvector @@'a <0> b'::tsqueryAS"false";
189190
SELECT'a:1 b:3'::tsvector @@'a <1> b'::tsqueryAS"false";
190191
SELECT'a:1 b:3'::tsvector @@'a <2> b'::tsqueryAS"true";
191-
SELECT'a:1 b:3'::tsvector @@'a <3> b'::tsqueryAS"true";
192+
SELECT'a:1 b:3'::tsvector @@'a <3> b'::tsqueryAS"false";
192193

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp