Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit7b925e1

Browse files
committed
Sync our Snowball stemmer dictionaries with current upstream
The main change is a new stemmer for Greek. There are minor changesin the Danish and French stemmers.Author: Panagiotis Mavrogiorgos <pmav99@gmail.com>
1 parentdedb6e0 commit7b925e1

File tree

14 files changed

+5052
-693
lines changed

14 files changed

+5052
-693
lines changed

‎doc/src/sgml/textsearch.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3810,6 +3810,7 @@ Parser: "pg_catalog.default"
38103810
pg_catalog | finnish_stem | snowball stemmer for finnish language
38113811
pg_catalog | french_stem | snowball stemmer for french language
38123812
pg_catalog | german_stem | snowball stemmer for german language
3813+
pg_catalog | greek_stem | snowball stemmer for greek language
38133814
pg_catalog | hungarian_stem | snowball stemmer for hungarian language
38143815
pg_catalog | indonesian_stem | snowball stemmer for indonesian language
38153816
pg_catalog | irish_stem | snowball stemmer for irish language

‎src/backend/snowball/Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ OBJS= $(WIN32RES) dict_snowball.o api.o utilities.o \
4141
stem_UTF_8_finnish.o\
4242
stem_UTF_8_french.o\
4343
stem_UTF_8_german.o\
44+
stem_UTF_8_greek.o\
4445
stem_UTF_8_hungarian.o\
4546
stem_UTF_8_indonesian.o\
4647
stem_UTF_8_irish.o\
@@ -69,6 +70,7 @@ LANGUAGES= \
6970
finnishfinnish\
7071
frenchfrench\
7172
germangerman\
73+
greekgreek\
7274
hungarianhungarian\
7375
indonesianindonesian\
7476
irishirish\

‎src/backend/snowball/README

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ We choose to include the derived files in the PostgreSQL distribution
2929
because most installations will not have the Snowball compiler available.
3030

3131
We are currently synced with the Snowball git commit
32-
1964ce688cbeca505263c8f77e16ed923296ce7a
33-
of2018-06-29.
32+
4456b82c26c02493e8807a66f30593a98c5d2888
33+
of2019-06-24.
3434

3535
To update the PostgreSQL sources from a new Snowball version:
3636

@@ -57,7 +57,7 @@ do not require any changes.
5757
4. Check whether any stemmer modules have been added or removed. If so, edit
5858
the OBJS list in Makefile, the list of #include's in dict_snowball.c, and the
5959
stemmer_modules[] table in dict_snowball.c. You might also need to change
60-
the LANGUAGES list in Makefile.
60+
the LANGUAGES list in Makefile and tsearch_config_languages in initdb.c.
6161

6262
5. The various stopword files in stopwords/ must be downloaded
6363
individually from pages on the snowballstem.org website.

‎src/backend/snowball/dict_snowball.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include"snowball/libstemmer/stem_UTF_8_finnish.h"
5151
#include"snowball/libstemmer/stem_UTF_8_french.h"
5252
#include"snowball/libstemmer/stem_UTF_8_german.h"
53+
#include"snowball/libstemmer/stem_UTF_8_greek.h"
5354
#include"snowball/libstemmer/stem_UTF_8_hungarian.h"
5455
#include"snowball/libstemmer/stem_UTF_8_indonesian.h"
5556
#include"snowball/libstemmer/stem_UTF_8_irish.h"
@@ -115,6 +116,7 @@ static const stemmer_module stemmer_modules[] =
115116
STEMMER_MODULE(finnish,PG_UTF8,UTF_8),
116117
STEMMER_MODULE(french,PG_UTF8,UTF_8),
117118
STEMMER_MODULE(german,PG_UTF8,UTF_8),
119+
STEMMER_MODULE(greek,PG_UTF8,UTF_8),
118120
STEMMER_MODULE(hungarian,PG_UTF8,UTF_8),
119121
STEMMER_MODULE(indonesian,PG_UTF8,UTF_8),
120122
STEMMER_MODULE(irish,PG_UTF8,UTF_8),

‎src/backend/snowball/libstemmer/stem_ISO_8859_1_danish.c

Lines changed: 62 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ static const struct among a_2[5] =
124124
/* 4 */ {4,s_2_4,-1,2,0}
125125
};
126126

127+
staticconstunsignedcharg_c[]= {119,223,119,1 };
128+
127129
staticconstunsignedcharg_v[]= {17,65,16,1,0,0,0,0,0,0,0,0,0,0,0,0,48,0,128 };
128130

129131
staticconstunsignedcharg_s_ending[]= {239,254,42,3,0,0,0,0,0,0,0,0,0,0,0,0,16 };
@@ -133,51 +135,51 @@ static const symbol s_1[] = { 'i', 'g' };
133135
staticconstsymbols_2[]= {'l',0xF8,'s' };
134136

135137
staticintr_mark_regions(structSN_env*z) {/* forwardmode */
136-
z->I[0]=z->l;/* $p1 = <integer expression>, line31 */
137-
{intc_test1=z->c;/* test, line33 */
138-
{intret=z->c+3;/* hop, line33 */
138+
z->I[0]=z->l;/* $p1 = <integer expression>, line33 */
139+
{intc_test1=z->c;/* test, line35 */
140+
{intret=z->c+3;/* hop, line35 */
139141
if (0>ret||ret>z->l)return0;
140142
z->c=ret;
141143
}
142-
z->I[1]=z->c;/* setmark x, line33 */
144+
z->I[1]=z->c;/* setmark x, line35 */
143145
z->c=c_test1;
144146
}
145-
if (out_grouping(z,g_v,97,248,1)<0)return0;/* goto *//* grouping v, line34 */
146-
{/* gopast *//* non v, line34 */
147+
if (out_grouping(z,g_v,97,248,1)<0)return0;/* goto *//* grouping v, line36 */
148+
{/* gopast *//* non v, line36 */
147149
intret=in_grouping(z,g_v,97,248,1);
148150
if (ret<0)return0;
149151
z->c+=ret;
150152
}
151-
z->I[0]=z->c;/* setmark p1, line34 */
152-
/* try, line35 */
153-
if (!(z->I[0]<z->I[1])) gotolab0;/* $(<integer expression> < <integer expression>), line35 */
154-
z->I[0]=z->I[1];/* $p1 = <integer expression>, line35 */
153+
z->I[0]=z->c;/* setmark p1, line36 */
154+
/* try, line37 */
155+
if (!(z->I[0]<z->I[1])) gotolab0;/* $(<integer expression> < <integer expression>), line37 */
156+
z->I[0]=z->I[1];/* $p1 = <integer expression>, line37 */
155157
lab0:
156158
return1;
157159
}
158160

159161
staticintr_main_suffix(structSN_env*z) {/* backwardmode */
160162
intamong_var;
161163

162-
{intmlimit1;/* setlimit, line41 */
164+
{intmlimit1;/* setlimit, line43 */
163165
if (z->c<z->I[0])return0;
164166
mlimit1=z->lb;z->lb=z->I[0];
165-
z->ket=z->c;/* [, line41 */
166-
if (z->c <=z->lb||z->p[z->c-1] >>5!=3|| !((1851440 >> (z->p[z->c-1]&0x1f))&1)) {z->lb=mlimit1;return0; }/* substring, line41 */
167+
z->ket=z->c;/* [, line43 */
168+
if (z->c <=z->lb||z->p[z->c-1] >>5!=3|| !((1851440 >> (z->p[z->c-1]&0x1f))&1)) {z->lb=mlimit1;return0; }/* substring, line43 */
167169
among_var=find_among_b(z,a_0,32);
168170
if (!(among_var)) {z->lb=mlimit1;return0; }
169-
z->bra=z->c;/* ], line41 */
171+
z->bra=z->c;/* ], line43 */
170172
z->lb=mlimit1;
171173
}
172-
switch (among_var) {/* among, line42 */
174+
switch (among_var) {/* among, line44 */
173175
case1:
174-
{intret=slice_del(z);/* delete, line48 */
176+
{intret=slice_del(z);/* delete, line50 */
175177
if (ret<0)returnret;
176178
}
177179
break;
178180
case2:
179-
if (in_grouping_b(z,g_s_ending,97,229,0))return0;/* grouping s_ending, line50 */
180-
{intret=slice_del(z);/* delete, line50 */
181+
if (in_grouping_b(z,g_s_ending,97,229,0))return0;/* grouping s_ending, line52 */
182+
{intret=slice_del(z);/* delete, line52 */
181183
if (ret<0)returnret;
182184
}
183185
break;
@@ -186,59 +188,59 @@ static int r_main_suffix(struct SN_env * z) { /* backwardmode */
186188
}
187189

188190
staticintr_consonant_pair(structSN_env*z) {/* backwardmode */
189-
{intm_test1=z->l-z->c;/* test, line55 */
191+
{intm_test1=z->l-z->c;/* test, line57 */
190192

191-
{intmlimit2;/* setlimit, line56 */
193+
{intmlimit2;/* setlimit, line58 */
192194
if (z->c<z->I[0])return0;
193195
mlimit2=z->lb;z->lb=z->I[0];
194-
z->ket=z->c;/* [, line56 */
195-
if (z->c-1 <=z->lb|| (z->p[z->c-1]!=100&&z->p[z->c-1]!=116)) {z->lb=mlimit2;return0; }/* substring, line56 */
196+
z->ket=z->c;/* [, line58 */
197+
if (z->c-1 <=z->lb|| (z->p[z->c-1]!=100&&z->p[z->c-1]!=116)) {z->lb=mlimit2;return0; }/* substring, line58 */
196198
if (!(find_among_b(z,a_1,4))) {z->lb=mlimit2;return0; }
197-
z->bra=z->c;/* ], line56 */
199+
z->bra=z->c;/* ], line58 */
198200
z->lb=mlimit2;
199201
}
200202
z->c=z->l-m_test1;
201203
}
202204
if (z->c <=z->lb)return0;
203-
z->c--;/* next, line62 */
204-
z->bra=z->c;/* ], line62 */
205-
{intret=slice_del(z);/* delete, line62 */
205+
z->c--;/* next, line64 */
206+
z->bra=z->c;/* ], line64 */
207+
{intret=slice_del(z);/* delete, line64 */
206208
if (ret<0)returnret;
207209
}
208210
return1;
209211
}
210212

211213
staticintr_other_suffix(structSN_env*z) {/* backwardmode */
212214
intamong_var;
213-
{intm1=z->l-z->c; (void)m1;/* do, line66 */
214-
z->ket=z->c;/* [, line66 */
215-
if (!(eq_s_b(z,2,s_0))) gotolab0;/* literal, line66 */
216-
z->bra=z->c;/* ], line66 */
217-
if (!(eq_s_b(z,2,s_1))) gotolab0;/* literal, line66 */
218-
{intret=slice_del(z);/* delete, line66 */
215+
{intm1=z->l-z->c; (void)m1;/* do, line68 */
216+
z->ket=z->c;/* [, line68 */
217+
if (!(eq_s_b(z,2,s_0))) gotolab0;/* literal, line68 */
218+
z->bra=z->c;/* ], line68 */
219+
if (!(eq_s_b(z,2,s_1))) gotolab0;/* literal, line68 */
220+
{intret=slice_del(z);/* delete, line68 */
219221
if (ret<0)returnret;
220222
}
221223
lab0:
222224
z->c=z->l-m1;
223225
}
224226

225-
{intmlimit2;/* setlimit, line67 */
227+
{intmlimit2;/* setlimit, line69 */
226228
if (z->c<z->I[0])return0;
227229
mlimit2=z->lb;z->lb=z->I[0];
228-
z->ket=z->c;/* [, line67 */
229-
if (z->c-1 <=z->lb||z->p[z->c-1] >>5!=3|| !((1572992 >> (z->p[z->c-1]&0x1f))&1)) {z->lb=mlimit2;return0; }/* substring, line67 */
230+
z->ket=z->c;/* [, line69 */
231+
if (z->c-1 <=z->lb||z->p[z->c-1] >>5!=3|| !((1572992 >> (z->p[z->c-1]&0x1f))&1)) {z->lb=mlimit2;return0; }/* substring, line69 */
230232
among_var=find_among_b(z,a_2,5);
231233
if (!(among_var)) {z->lb=mlimit2;return0; }
232-
z->bra=z->c;/* ], line67 */
234+
z->bra=z->c;/* ], line69 */
233235
z->lb=mlimit2;
234236
}
235-
switch (among_var) {/* among, line68 */
237+
switch (among_var) {/* among, line70 */
236238
case1:
237-
{intret=slice_del(z);/* delete, line70 */
239+
{intret=slice_del(z);/* delete, line72 */
238240
if (ret<0)returnret;
239241
}
240-
{intm3=z->l-z->c; (void)m3;/* do, line70 */
241-
{intret=r_consonant_pair(z);/* call consonant_pair, line70 */
242+
{intm3=z->l-z->c; (void)m3;/* do, line72 */
243+
{intret=r_consonant_pair(z);/* call consonant_pair, line72 */
242244
if (ret==0) gotolab1;
243245
if (ret<0)returnret;
244246
}
@@ -247,7 +249,7 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
247249
}
248250
break;
249251
case2:
250-
{intret=slice_from_s(z,3,s_2);/* <-, line72 */
252+
{intret=slice_from_s(z,3,s_2);/* <-, line74 */
251253
if (ret<0)returnret;
252254
}
253255
break;
@@ -257,60 +259,60 @@ static int r_other_suffix(struct SN_env * z) { /* backwardmode */
257259

258260
staticintr_undouble(structSN_env*z) {/* backwardmode */
259261

260-
{intmlimit1;/* setlimit, line76 */
262+
{intmlimit1;/* setlimit, line78 */
261263
if (z->c<z->I[0])return0;
262264
mlimit1=z->lb;z->lb=z->I[0];
263-
z->ket=z->c;/* [, line76 */
264-
if (out_grouping_b(z,g_v,97,248,0)) {z->lb=mlimit1;return0; }/*non v, line76 */
265-
z->bra=z->c;/* ], line76 */
266-
z->S[0]=slice_to(z,z->S[0]);/* -> ch, line76 */
267-
if (z->S[0]==0)return-1;/* -> ch, line76 */
265+
z->ket=z->c;/* [, line78 */
266+
if (in_grouping_b(z,g_c,98,122,0)) {z->lb=mlimit1;return0; }/*grouping c, line78 */
267+
z->bra=z->c;/* ], line78 */
268+
z->S[0]=slice_to(z,z->S[0]);/* -> ch, line78 */
269+
if (z->S[0]==0)return-1;/* -> ch, line78 */
268270
z->lb=mlimit1;
269271
}
270-
if (!(eq_v_b(z,z->S[0])))return0;/* name ch, line77 */
271-
{intret=slice_del(z);/* delete, line78 */
272+
if (!(eq_v_b(z,z->S[0])))return0;/* name ch, line79 */
273+
{intret=slice_del(z);/* delete, line80 */
272274
if (ret<0)returnret;
273275
}
274276
return1;
275277
}
276278

277279
externintdanish_ISO_8859_1_stem(structSN_env*z) {/* forwardmode */
278-
{intc1=z->c;/* do, line84 */
279-
{intret=r_mark_regions(z);/* call mark_regions, line84 */
280+
{intc1=z->c;/* do, line86 */
281+
{intret=r_mark_regions(z);/* call mark_regions, line86 */
280282
if (ret==0) gotolab0;
281283
if (ret<0)returnret;
282284
}
283285
lab0:
284286
z->c=c1;
285287
}
286-
z->lb=z->c;z->c=z->l;/* backwards, line85 */
288+
z->lb=z->c;z->c=z->l;/* backwards, line87 */
287289

288-
{intm2=z->l-z->c; (void)m2;/* do, line86 */
289-
{intret=r_main_suffix(z);/* call main_suffix, line86 */
290+
{intm2=z->l-z->c; (void)m2;/* do, line88 */
291+
{intret=r_main_suffix(z);/* call main_suffix, line88 */
290292
if (ret==0) gotolab1;
291293
if (ret<0)returnret;
292294
}
293295
lab1:
294296
z->c=z->l-m2;
295297
}
296-
{intm3=z->l-z->c; (void)m3;/* do, line87 */
297-
{intret=r_consonant_pair(z);/* call consonant_pair, line87 */
298+
{intm3=z->l-z->c; (void)m3;/* do, line89 */
299+
{intret=r_consonant_pair(z);/* call consonant_pair, line89 */
298300
if (ret==0) gotolab2;
299301
if (ret<0)returnret;
300302
}
301303
lab2:
302304
z->c=z->l-m3;
303305
}
304-
{intm4=z->l-z->c; (void)m4;/* do, line88 */
305-
{intret=r_other_suffix(z);/* call other_suffix, line88 */
306+
{intm4=z->l-z->c; (void)m4;/* do, line90 */
307+
{intret=r_other_suffix(z);/* call other_suffix, line90 */
306308
if (ret==0) gotolab3;
307309
if (ret<0)returnret;
308310
}
309311
lab3:
310312
z->c=z->l-m4;
311313
}
312-
{intm5=z->l-z->c; (void)m5;/* do, line89 */
313-
{intret=r_undouble(z);/* call undouble, line89 */
314+
{intm5=z->l-z->c; (void)m5;/* do, line91 */
315+
{intret=r_undouble(z);/* call undouble, line91 */
314316
if (ret==0) gotolab4;
315317
if (ret<0)returnret;
316318
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp