Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitca450a0

Browse files
committed
Add an Accept parameter to "simple" dictionaries. The default of true
gives the old behavior; selecting false allows the dictionary to be usedas a filter ahead of other dictionaries, because it will pass on ratherthan accept words that aren't in its stopword list.Jan Urbanski
1 parenta44c81d commitca450a0

File tree

2 files changed

+67
-9
lines changed

2 files changed

+67
-9
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.32 2007/11/1403:26:24 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/textsearch.sgml,v 1.33 2007/11/1418:36:37 tgl Exp $ -->
22

33
<chapter id="textsearch">
44
<title id="textsearch-title">Full Text Search</title>
@@ -2093,9 +2093,11 @@ SELECT ts_rank_cd (to_tsvector('english','list stop words'), to_tsquery('list &a
20932093
<para>
20942094
The <literal>simple</> dictionary template operates by converting the
20952095
input token to lower case and checking it against a file of stop words.
2096-
If it is found in the file then<literal>NULL</> is returned, causing
2096+
If it is found in the file thenan empty array is returned, causing
20972097
the token to be discarded. If not, the lower-cased form of the word
2098-
is returned as the normalized lexeme.
2098+
is returned as the normalized lexeme. Alternatively, the dictionary
2099+
can be configured to report non-stop-words as unrecognized, allowing
2100+
them to be passed on to the next dictionary in the list.
20992101
</para>
21002102

21012103
<para>
@@ -2138,6 +2140,35 @@ SELECT ts_lexize('public.simple_dict','The');
21382140
</programlisting>
21392141
</para>
21402142

2143+
<para>
2144+
We can also choose to return <literal>NULL</>, instead of the lower-cased
2145+
word, if it is not found in the stop words file. This behavior is
2146+
selected by setting the dictionary's <literal>Accept</> parameter to
2147+
<literal>false</>. Continuing the example:
2148+
2149+
<programlisting>
2150+
ALTER TEXT SEARCH DICTIONARY public.simple_dict ( Accept = false );
2151+
2152+
SELECT ts_lexize('public.simple_dict','YeS');
2153+
ts_lexize
2154+
-----------
2155+
2156+
2157+
SELECT ts_lexize('public.simple_dict','The');
2158+
ts_lexize
2159+
-----------
2160+
{}
2161+
</programlisting>
2162+
</para>
2163+
2164+
<para>
2165+
With the default setting of <literal>Accept</> = <literal>true</>,
2166+
it is only useful to place a <literal>simple</> dictionary at the end
2167+
of a list of dictionaries, since it will never pass on any token to
2168+
a following dictionary. Conversely, <literal>Accept</> = <literal>false</>
2169+
is only useful when there is at least one following dictionary.
2170+
</para>
2171+
21412172
<caution>
21422173
<para>
21432174
Most types of dictionaries rely on configuration files, such as files of

‎src/backend/tsearch/dict_simple.c

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.3 2007/08/25 00:03:59 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/dict_simple.c,v 1.4 2007/11/14 18:36:37 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,6 +23,7 @@
2323
typedefstruct
2424
{
2525
StopListstoplist;
26+
boolaccept;
2627
}DictSimple;
2728

2829

@@ -31,9 +32,12 @@ dsimple_init(PG_FUNCTION_ARGS)
3132
{
3233
List*dictoptions= (List*)PG_GETARG_POINTER(0);
3334
DictSimple*d= (DictSimple*)palloc0(sizeof(DictSimple));
34-
boolstoploaded= false;
35+
boolstoploaded= false,
36+
acceptloaded= false;
3537
ListCell*l;
3638

39+
d->accept= true;/* default */
40+
3741
foreach(l,dictoptions)
3842
{
3943
DefElem*defel= (DefElem*)lfirst(l);
@@ -47,6 +51,15 @@ dsimple_init(PG_FUNCTION_ARGS)
4751
readstoplist(defGetString(defel),&d->stoplist,lowerstr);
4852
stoploaded= true;
4953
}
54+
elseif (pg_strcasecmp("Accept",defel->defname)==0)
55+
{
56+
if (acceptloaded)
57+
ereport(ERROR,
58+
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
59+
errmsg("multiple Accept parameters")));
60+
d->accept=defGetBoolean(defel);
61+
acceptloaded= true;
62+
}
5063
else
5164
{
5265
ereport(ERROR,
@@ -66,14 +79,28 @@ dsimple_lexize(PG_FUNCTION_ARGS)
6679
char*in= (char*)PG_GETARG_POINTER(1);
6780
int32len=PG_GETARG_INT32(2);
6881
char*txt;
69-
TSLexeme*res=palloc0(sizeof(TSLexeme)*2);
82+
TSLexeme*res;
7083

7184
txt=lowerstr_with_len(in,len);
7285

7386
if (*txt=='\0'||searchstoplist(&(d->stoplist),txt))
87+
{
88+
/* reject as stopword */
7489
pfree(txt);
75-
else
90+
res=palloc0(sizeof(TSLexeme)*2);
91+
PG_RETURN_POINTER(res);
92+
}
93+
elseif (d->accept)
94+
{
95+
/* accept */
96+
res=palloc0(sizeof(TSLexeme)*2);
7697
res[0].lexeme=txt;
77-
78-
PG_RETURN_POINTER(res);
98+
PG_RETURN_POINTER(res);
99+
}
100+
else
101+
{
102+
/* report as unrecognized */
103+
pfree(txt);
104+
PG_RETURN_POINTER(NULL);
105+
}
79106
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp