Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitcbcc872

Browse files
committed
Update snowball
Update to snowball tag v2.0.0. Major changes are new stemmers forBasque, Catalan, and Hindi.Discussion:https://www.postgresql.org/message-id/flat/a8eeabd6-2be1-43fe-401e-a97594c38478%402ndquadrant.com
1 parent57cb806 commitcbcc872

File tree

97 files changed

+6914
-2166
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+6914
-2166
lines changed

‎src/backend/snowball/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ OBJS = \
2323
utilities.o
2424

2525
OBJS +=\
26+
stem_ISO_8859_1_basque.o\
27+
stem_ISO_8859_1_catalan.o\
2628
stem_ISO_8859_1_danish.o\
2729
stem_ISO_8859_1_dutch.o\
2830
stem_ISO_8859_1_english.o\
@@ -41,13 +43,16 @@ OBJS += \
4143
stem_ISO_8859_2_romanian.o\
4244
stem_KOI8_R_russian.o\
4345
stem_UTF_8_arabic.o\
46+
stem_UTF_8_basque.o\
47+
stem_UTF_8_catalan.o\
4448
stem_UTF_8_danish.o\
4549
stem_UTF_8_dutch.o\
4650
stem_UTF_8_english.o\
4751
stem_UTF_8_finnish.o\
4852
stem_UTF_8_french.o\
4953
stem_UTF_8_german.o\
5054
stem_UTF_8_greek.o\
55+
stem_UTF_8_hindi.o\
5156
stem_UTF_8_hungarian.o\
5257
stem_UTF_8_indonesian.o\
5358
stem_UTF_8_irish.o\
@@ -70,13 +75,16 @@ OBJS += \
7075
# must come after creation of that language
7176
LANGUAGES=\
7277
arabicarabic\
78+
basquebasque\
79+
catalancatalan\
7380
danishdanish\
7481
dutchdutch\
7582
englishenglish\
7683
finnishfinnish\
7784
frenchfrench\
7885
germangerman\
7986
greekgreek\
87+
hindienglish\
8088
hungarianhungarian\
8189
indonesianindonesian\
8290
irishirish\

‎src/backend/snowball/README

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ This module uses the word stemming code developed by the Snowball project,
77
http://snowballstem.org (formerly http://snowball.tartarus.org)
88
which is released by them under a BSD-style license.
99

10-
The Snowball projectis notcurrently making formal releases; it's best
10+
The Snowball projectdoes notoften make formal releases; it's best
1111
to pull from their git repository
1212

1313
git clone https://github.com/snowballstem/snowball.git
@@ -29,8 +29,8 @@ We choose to include the derived files in the PostgreSQL distribution
2929
because most installations will not have the Snowball compiler available.
3030

3131
We are currently synced with the Snowball git commit
32-
4456b82c26c02493e8807a66f30593a98c5d2888
33-
of 2019-06-24.
32+
c70ed64f9d41c1032fba4e962b054f8e9d489a74 (tag v2.0.0)
33+
of 2019-10-02.
3434

3535
To update the PostgreSQL sources from a new Snowball version:
3636

@@ -44,6 +44,9 @@ do
4444
sed 's|\.\./runtime/header\.h|header.h|' $f >libstemmer/`basename $f`
4545
done
4646

47+
Do not copy stemmers that are listed in libstemmer/modules.txt as
48+
nonstandard, such as "german2" or "lovins".
49+
4750
2. Copy the *.c files in snowball/runtime/ to
4851
src/backend/snowball/libstemmer, and edit them to remove direct inclusions
4952
of system headers such as <stdio.h> --- they should only include "header.h".

‎src/backend/snowball/dict_snowball.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
/* Now we can include the original Snowball header.h */
2828
#include"snowball/libstemmer/header.h"
29+
#include"snowball/libstemmer/stem_ISO_8859_1_basque.h"
30+
#include"snowball/libstemmer/stem_ISO_8859_1_catalan.h"
2931
#include"snowball/libstemmer/stem_ISO_8859_1_danish.h"
3032
#include"snowball/libstemmer/stem_ISO_8859_1_dutch.h"
3133
#include"snowball/libstemmer/stem_ISO_8859_1_english.h"
@@ -44,13 +46,16 @@
4446
#include"snowball/libstemmer/stem_ISO_8859_2_romanian.h"
4547
#include"snowball/libstemmer/stem_KOI8_R_russian.h"
4648
#include"snowball/libstemmer/stem_UTF_8_arabic.h"
49+
#include"snowball/libstemmer/stem_UTF_8_basque.h"
50+
#include"snowball/libstemmer/stem_UTF_8_catalan.h"
4751
#include"snowball/libstemmer/stem_UTF_8_danish.h"
4852
#include"snowball/libstemmer/stem_UTF_8_dutch.h"
4953
#include"snowball/libstemmer/stem_UTF_8_english.h"
5054
#include"snowball/libstemmer/stem_UTF_8_finnish.h"
5155
#include"snowball/libstemmer/stem_UTF_8_french.h"
5256
#include"snowball/libstemmer/stem_UTF_8_german.h"
5357
#include"snowball/libstemmer/stem_UTF_8_greek.h"
58+
#include"snowball/libstemmer/stem_UTF_8_hindi.h"
5459
#include"snowball/libstemmer/stem_UTF_8_hungarian.h"
5560
#include"snowball/libstemmer/stem_UTF_8_indonesian.h"
5661
#include"snowball/libstemmer/stem_UTF_8_irish.h"
@@ -92,6 +97,8 @@ static const stemmer_module stemmer_modules[] =
9297
/*
9398
* Stemmers list from Snowball distribution
9499
*/
100+
STEMMER_MODULE(basque,PG_LATIN1,ISO_8859_1),
101+
STEMMER_MODULE(catalan,PG_LATIN1,ISO_8859_1),
95102
STEMMER_MODULE(danish,PG_LATIN1,ISO_8859_1),
96103
STEMMER_MODULE(dutch,PG_LATIN1,ISO_8859_1),
97104
STEMMER_MODULE(english,PG_LATIN1,ISO_8859_1),
@@ -110,13 +117,16 @@ static const stemmer_module stemmer_modules[] =
110117
STEMMER_MODULE(romanian,PG_LATIN2,ISO_8859_2),
111118
STEMMER_MODULE(russian,PG_KOI8R,KOI8_R),
112119
STEMMER_MODULE(arabic,PG_UTF8,UTF_8),
120+
STEMMER_MODULE(basque,PG_UTF8,UTF_8),
121+
STEMMER_MODULE(catalan,PG_UTF8,UTF_8),
113122
STEMMER_MODULE(danish,PG_UTF8,UTF_8),
114123
STEMMER_MODULE(dutch,PG_UTF8,UTF_8),
115124
STEMMER_MODULE(english,PG_UTF8,UTF_8),
116125
STEMMER_MODULE(finnish,PG_UTF8,UTF_8),
117126
STEMMER_MODULE(french,PG_UTF8,UTF_8),
118127
STEMMER_MODULE(german,PG_UTF8,UTF_8),
119128
STEMMER_MODULE(greek,PG_UTF8,UTF_8),
129+
STEMMER_MODULE(hindi,PG_UTF8,UTF_8),
120130
STEMMER_MODULE(hungarian,PG_UTF8,UTF_8),
121131
STEMMER_MODULE(indonesian,PG_UTF8,UTF_8),
122132
STEMMER_MODULE(irish,PG_UTF8,UTF_8),

‎src/backend/snowball/libstemmer/api.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,3 @@ extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
6161
z->c=0;
6262
returnerr;
6363
}
64-

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp