Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitdbaec70

Browse files
committed
Rename and slightly redefine the default text search parser's "word"
categories, as per discussion. asciiword (formerly lword) is stillASCII-letters-only, and numword (formerly word) is still the most generalmixed-alpha-and-digits case. But word (formerly nlword) is nowany-group-of-letters-with-at-least-one-non-ASCII, rather than all-non-ASCII asbefore. This is no worse than before for parsing mixed Russian/English text,which seems to have been the design center for the original coding; and itshould simplify matters for parsing most European languages. In particularit will not be necessary for any language to accept strings containing digitsas being regular "words". The hyphenated-word categories are adjustedsimilarly.
1 parent344d0ca commitdbaec70

File tree

10 files changed

+466
-449
lines changed

10 files changed

+466
-449
lines changed

‎doc/src/sgml/func.sgml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.403 2007/10/22 20:13:37 tgl Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.404 2007/10/23 20:46:11 tgl Exp $ -->
22

33
<chapter id="functions">
44
<title>Functions and Operators</title>
@@ -7861,7 +7861,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
78617861
<entry><type>setof record</type></entry>
78627862
<entry>test a configuration</entry>
78637863
<entry><literal>ts_debug('english', 'The Brightest supernovaes')</literal></entry>
7864-
<entry><literal>(lword,"Latin word",The,{english_stem},english_stem,{}) ...</literal></entry>
7864+
<entry><literal>(asciiword,"Word, all ASCII",The,{english_stem},english_stem,{}) ...</literal></entry>
78657865
</row>
78667866
<row>
78677867
<entry><literal><function>ts_lexize</function>(<replaceable class="PARAMETER">dict</replaceable> <type>regdictionary</>, <replaceable class="PARAMETER">token</replaceable> <type>text</>)</literal></entry>
@@ -7889,14 +7889,14 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
78897889
<entry><type>setof record</type></entry>
78907890
<entry>get token types defined by parser</entry>
78917891
<entry><literal>ts_token_type('default')</literal></entry>
7892-
<entry><literal>(1,lword,"Latin word") ...</literal></entry>
7892+
<entry><literal>(1,asciiword,"Word, all ASCII") ...</literal></entry>
78937893
</row>
78947894
<row>
78957895
<entry><literal><function>ts_token_type</function>(<replaceable class="PARAMETER">parser_oid</> <type>oid</>, OUT <replaceable class="PARAMETER">tokid</> <type>integer</>, OUT <replaceable class="PARAMETER">alias</> <type>text</>, OUT <replaceable class="PARAMETER">description</> <type>text</>)</literal></entry>
78967896
<entry><type>setof record</type></entry>
78977897
<entry>get token types defined by parser</entry>
78987898
<entry><literal>ts_token_type(3722)</literal></entry>
7899-
<entry><literal>(1,lword,"Latin word") ...</literal></entry>
7899+
<entry><literal>(1,asciiword,"Word, all ASCII") ...</literal></entry>
79007900
</row>
79017901
<row>
79027902
<entry><literal><function>ts_stat</function>(<replaceable class="PARAMETER">sqlquery</replaceable> <type>text</>, <optional> <replaceable class="PARAMETER">weights</replaceable> <type>text</>, </optional> OUT <replaceable class="PARAMETER">word</replaceable> <type>text</>, OUT <replaceable class="PARAMETER">ndoc</replaceable> <type>integer</>, OUT <replaceable class="PARAMETER">nentry</replaceable> <type>integer</>)</literal></entry>

‎doc/src/sgml/textsearch.sgml

Lines changed: 189 additions & 176 deletions
Large diffs are not rendered by default.

‎src/backend/snowball/Makefile

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Makefile for src/backend/snowball
44
#
5-
# $PostgreSQL: pgsql/src/backend/snowball/Makefile,v 1.3 2007/08/27 10:29:49 mha Exp $
5+
# $PostgreSQL: pgsql/src/backend/snowball/Makefile,v 1.4 2007/10/23 20:46:12 tgl Exp $
66
#
77
#-------------------------------------------------------------------------
88

@@ -46,8 +46,9 @@ OBJS= dict_snowball.o api.o utilities.o \
4646
stem_UTF_8_swedish.o\
4747
stem_UTF_8_turkish.o
4848

49-
# second column is name of latin dictionary, if different
50-
# Note order dependency: use of some other language as latin dictionary
49+
# first column is language name and also name of dictionary for not-all-ASCII
50+
# words, second is name of dictionary for all-ASCII words
51+
# Note order dependency: use of some other language as ASCII dictionary
5152
# must come after creation of that language
5253
LANGUAGES=\
5354
danishdanish\
@@ -95,8 +96,8 @@ ifeq ($(enable_shared), yes)
9596
while [ "$$#" -gt 0 ] ;\
9697
do\
9798
lang=$$1; shift;\
98-
nonlatdictname=$$lang;\
99-
latdictname=$$1; shift;\
99+
nonascdictname=$$lang;\
100+
ascdictname=$$1; shift;\
100101
if [ -s $(srcdir)/stopwords/$${lang}.stop ] ; then\
101102
stop=", StopWords=$${lang}" ;\
102103
else\
@@ -106,8 +107,8 @@ ifeq ($(enable_shared), yes)
106107
sed -e "s#_LANGNAME_#$$lang#g" |\
107108
sed -e "s#_DICTNAME_#$${lang}_stem#g" |\
108109
sed -e "s#_CFGNAME_#$$lang#g" |\
109-
sed -e "s#_LATDICTNAME_#$${latdictname}_stem#g" |\
110-
sed -e "s#_NONLATDICTNAME_#$${nonlatdictname}_stem#g" |\
110+
sed -e "s#_ASCDICTNAME_#$${ascdictname}_stem#g" |\
111+
sed -e "s#_NONASCDICTNAME_#$${nonascdictname}_stem#g" |\
111112
sed -e "s#_STOPWORDS_#$$stop#g" ;\
112113
done >> $@
113114
else

‎src/backend/snowball/snowball.sql.in

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.4 2007/09/03 02:30:43 tgl Exp $$
1+
-- $PostgreSQL: pgsql/src/backend/snowball/snowball.sql.in,v 1.5 2007/10/23 20:46:12 tgl Exp $$
22

33
-- text search configuration for _LANGNAME_ language
44
CREATE TEXT SEARCH DICTIONARY _DICTNAME_
@@ -12,14 +12,15 @@ CREATE TEXT SEARCH CONFIGURATION _CFGNAME_
1212
COMMENT ON TEXT SEARCH CONFIGURATION _CFGNAME_ IS 'configuration for _LANGNAME_ language';
1313

1414
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
15-
FOR email, url, host, sfloat, version, uri, file, float, int, uint
15+
FOR email, url, host, sfloat, version, uri, file, float, int, uint,
16+
numword, hword_numpart, numhword
1617
WITH simple;
1718

1819
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
19-
FORlhword, lpart_hword, lword
20-
WITH_LATDICTNAME_;
20+
FORasciiword, hword_asciipart, asciihword
21+
WITH_ASCDICTNAME_;
2122

2223
ALTER TEXT SEARCH CONFIGURATION _CFGNAME_ ADD MAPPING
23-
FORhword, nlhword, nlpart_hword, nlword, word, part_hword
24-
WITH_NONLATDICTNAME_;
24+
FORword, hword_part, hword
25+
WITH_NONASCDICTNAME_;
2526

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp