Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitd009992

Browse files
committed
Have text search thesaurus files use "?" for stop words.
Throw an error for actual stop words, rather than a warning. This fixesproblems with cache reloading causing warning messages.Re-enable stop words in regression tests; was disabled by Tom.Document "?" as API change.
1 parent82748bc commitd009992

File tree

4 files changed

+56
-51
lines changed

4 files changed

+56
-51
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.30 2007/11/05 15:55:53 mha Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.31 2007/11/10 15:39:34 momjian Exp $ -->
22

33
<chapter id="textsearch">
44
<title id="textsearch-title">Full Text Search</title>
@@ -2258,20 +2258,17 @@ more sample word(s) : more indexed word(s)
22582258
</para>
22592259

22602260
<para>
2261-
Stop words recognized by the subdictionary are replaced by a <quote>stop
2262-
word placeholder</quote> to record their position. To illustrate this,
2263-
consider these phrases:
2261+
Specific stop words recognized by the subdictionary cannot be
2262+
specified; instead use <literal>?</> to mark the location where any
2263+
stop word can appear. For example, assuming that <literal>a</> and
2264+
<literal>the</> are stop words according to the subdictionary:
22642265

22652266
<programlisting>
2266-
a one the two : swsw
2267-
the one a two : swsw2
2267+
? one ? two : swsw
22682268
</programlisting>
22692269

2270-
Assuming that <literal>a</> and <literal>the</> are stop words according
2271-
to the subdictionary, these two phrases are identical to the thesaurus:
2272-
they both look like <replaceable>stopword</> <literal>one</>
2273-
<replaceable>stopword</> <literal>two</>. Input matching this pattern
2274-
will be replaced by <literal>swsw2</>, according to the tie-breaking rule.
2270+
matches <literal>a one the two</> and <literal>the one a two</>;
2271+
both would be replaced by <literal>swsw</>.
22752272
</para>
22762273

22772274
<para>
@@ -3576,6 +3573,12 @@ Parser: "pg_catalog.default"
35763573
</para>
35773574
</listitem>
35783575

3576+
<listitem>
3577+
<para>
3578+
Thesaurus files now use <literal>?</> for stop words.
3579+
</para>
3580+
</listitem>
3581+
35793582
<listitem>
35803583
<para>
35813584
What else?

‎src/backend/tsearch/dict_thesaurus.c

Lines changed: 37 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.5 2007/11/09 01:32:22 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_thesaurus.c,v 1.6 2007/11/10 15:39:34 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -412,47 +412,48 @@ compileTheLexeme(DictThesaurus * d)
412412
{
413413
TSLexeme*ptr;
414414

415-
ptr= (TSLexeme*)DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
416-
PointerGetDatum(d->subdict->dictData),
417-
PointerGetDatum(d->wrds[i].lexeme),
418-
Int32GetDatum(strlen(d->wrds[i].lexeme)),
419-
PointerGetDatum(NULL)));
420-
421-
if (!ptr)
422-
elog(ERROR,"thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)",
423-
d->wrds[i].lexeme,d->wrds[i].entries->idsubst+1);
424-
elseif (!(ptr->lexeme))
425-
{
426-
elog(NOTICE,"thesaurus word-sample \"%s\" is recognized as stop-word, assign any stop-word (rule %d)",
427-
d->wrds[i].lexeme,d->wrds[i].entries->idsubst+1);
428-
415+
if (strcmp(d->wrds[i].lexeme,"?")==0)/* Is stop word marker? */
429416
newwrds=addCompiledLexeme(newwrds,&nnw,&tnm,NULL,d->wrds[i].entries,0);
430-
}
431417
else
432418
{
433-
while (ptr->lexeme)
419+
ptr= (TSLexeme*)DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
420+
PointerGetDatum(d->subdict->dictData),
421+
PointerGetDatum(d->wrds[i].lexeme),
422+
Int32GetDatum(strlen(d->wrds[i].lexeme)),
423+
PointerGetDatum(NULL)));
424+
425+
if (!ptr)
426+
elog(ERROR,"thesaurus word-sample \"%s\" isn't recognized by subdictionary (rule %d)",
427+
d->wrds[i].lexeme,d->wrds[i].entries->idsubst+1);
428+
elseif (!(ptr->lexeme))
429+
elog(ERROR,"thesaurus word-sample \"%s\" is recognized as stop-word, use \"?\" for stop words instead (rule %d)",
430+
d->wrds[i].lexeme,d->wrds[i].entries->idsubst+1);
431+
else
434432
{
435-
TSLexeme*remptr=ptr+1;
436-
inttnvar=1;
437-
intcurvar=ptr->nvariant;
438-
439-
/* compute n words in one variant */
440-
while (remptr->lexeme)
433+
while (ptr->lexeme)
441434
{
442-
if (remptr->nvariant!= (remptr-1)->nvariant)
443-
break;
444-
tnvar++;
445-
remptr++;
446-
}
447-
448-
remptr=ptr;
449-
while (remptr->lexeme&&remptr->nvariant==curvar)
450-
{
451-
newwrds=addCompiledLexeme(newwrds,&nnw,&tnm,remptr,d->wrds[i].entries,tnvar);
452-
remptr++;
435+
TSLexeme*remptr=ptr+1;
436+
inttnvar=1;
437+
intcurvar=ptr->nvariant;
438+
439+
/* compute n words in one variant */
440+
while (remptr->lexeme)
441+
{
442+
if (remptr->nvariant!= (remptr-1)->nvariant)
443+
break;
444+
tnvar++;
445+
remptr++;
446+
}
447+
448+
remptr=ptr;
449+
while (remptr->lexeme&&remptr->nvariant==curvar)
450+
{
451+
newwrds=addCompiledLexeme(newwrds,&nnw,&tnm,remptr,d->wrds[i].entries,tnvar);
452+
remptr++;
453+
}
454+
455+
ptr=remptr;
453456
}
454-
455-
ptr=remptr;
456457
}
457458
}
458459

‎src/backend/tsearch/thesaurus_sample.ths

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,5 @@ two : *2
1414
supernovae stars : *sn
1515
supernovae : *sn
1616
booking tickets : order invitation cards
17-
# booking the tickets : order invitation Cards
17+
booking ? tickets : order invitation Cards
18+

‎src/test/regress/expected/tsdicts.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,8 @@ SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usuall
311311
(1 row)
312312

313313
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
314-
to_tsvector
315-
---------------------------------------------------------------------
316-
'book':8 'card':3 'like':6 'look':5 'invit':2 'order':1 'ticket':10
314+
to_tsvector
315+
-------------------------------------------------------
316+
'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
317317
(1 row)
318318

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp