Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit90260e2

Browse files
committed
Fix INITCAP() word boundaries for PG_UNICODE_FAST.
Word boundaries are based on whether a character is alphanumeric ornot. For the PG_UNICODE_FAST collation, alphanumeric includesnon-ASCII digits; whereas for the PG_C_UTF8 collation, it onlyincludes digits 0-9. Pass down the right information from thepg_locale_t into initcap_wbnext to differentiate the behavior.Reported-by: Noah Misch <noah@leadboat.com>Reviewed-by: Noah Misch <noah@leadboat.com>Discussion:https://postgr.es/m/20250417135841.33.nmisch@google.com
1 parent80b727e commit90260e2

File tree

4 files changed

+23
-4
lines changed

4 files changed

+23
-4
lines changed

‎src/backend/utils/adt/pg_locale_builtin.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ struct WordBoundaryState
4040
constchar*str;
4141
size_tlen;
4242
size_toffset;
43+
boolposix;
4344
boolinit;
4445
boolprev_alnum;
4546
};
@@ -58,7 +59,7 @@ initcap_wbnext(void *state)
5859
{
5960
pg_wcharu=utf8_to_unicode((unsignedchar*)wbstate->str+
6061
wbstate->offset);
61-
boolcurr_alnum=pg_u_isalnum(u,true);
62+
boolcurr_alnum=pg_u_isalnum(u,wbstate->posix);
6263

6364
if (!wbstate->init||curr_alnum!=wbstate->prev_alnum)
6465
{
@@ -92,6 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
9293
.str=src,
9394
.len=srclen,
9495
.offset=0,
96+
.posix= !locale->info.builtin.casemap_full,
9597
.init= false,
9698
.prev_alnum= false,
9799
};

‎src/common/unicode/case_test.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ struct WordBoundaryState
4141
constchar*str;
4242
size_tlen;
4343
size_toffset;
44+
boolposix;
4445
boolinit;
4546
boolprev_alnum;
4647
};
@@ -55,7 +56,7 @@ initcap_wbnext(void *state)
5556
{
5657
pg_wcharu=utf8_to_unicode((unsignedchar*)wbstate->str+
5758
wbstate->offset);
58-
boolcurr_alnum=pg_u_isalnum(u,true);
59+
boolcurr_alnum=pg_u_isalnum(u,wbstate->posix);
5960

6061
if (!wbstate->init||curr_alnum!=wbstate->prev_alnum)
6162
{
@@ -112,10 +113,13 @@ icu_test_full(char *str)
112113
charicu_upper[BUFSZ];
113114
charicu_fold[BUFSZ];
114115
UErrorCodestatus;
116+
117+
/* full case mapping doesn't use posix semantics */
115118
structWordBoundaryStatewbstate= {
116119
.str=str,
117120
.len=strlen(str),
118121
.offset=0,
122+
.posix= false,
119123
.init= false,
120124
.prev_alnum= false,
121125
};
@@ -344,6 +348,12 @@ test_convert_case()
344348
test_convert(tfunc_lower,"σς'Σ' ΣΣ'Σ'","σς'ς' σσ'ς'");
345349
test_convert(tfunc_title,"σςΣ ΣΣΣ","Σςς Σσς");
346350
test_convert(tfunc_fold,"σςΣ ΣΣΣ","σσσ σσσ");
351+
/* test that alphanumerics are word characters */
352+
test_convert(tfunc_title,"λλ","Λλ");
353+
test_convert(tfunc_title,"1a","1a");
354+
/* U+FF11 FULLWIDTH ONE is alphanumeric for full case mapping */
355+
test_convert(tfunc_title,"\uFF11a","\uFF11a");
356+
347357

348358
#ifdefUSE_ICU
349359
icu_test_full("");
@@ -354,6 +364,7 @@ test_convert_case()
354364
icu_test_full("abc 123xyz");
355365
icu_test_full("σςΣ ΣΣΣ");
356366
icu_test_full("ıiIİ");
367+
icu_test_full("\uFF11a");
357368
/* test <alpha><iota_subscript><acute> */
358369
icu_test_full("\u0391\u0345\u0301");
359370
#endif

‎src/test/regress/expected/collate.utf8.out

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ INSERT INTO test_pg_c_utf8 VALUES
5252
('abc DEF 123abc'),
5353
('ábc sßs ßss DÉF'),
5454
('DŽxxDŽ džxxDž Džxxdž'),
55+
(U&'Λλ 1a \FF11a'),
5556
('ȺȺȺ'),
5657
('ⱥⱥⱥ'),
5758
('ⱥȺ');
@@ -67,10 +68,11 @@ SELECT
6768
abc DEF 123abc | abc def 123abc | Abc Def 123abc | ABC DEF 123ABC | 14 | 14 | 14 | 14
6869
ábc sßs ßss DÉF | ábc sßs ßss déf | Ábc Sßs ßss Déf | ÁBC SßS ßSS DÉF | 19 | 19 | 19 | 19
6970
DŽxxDŽ džxxDž Džxxdž | džxxdž džxxdž džxxdž | DŽxxdž DŽxxdž DŽxxdž | DŽXXDŽ DŽXXDŽ DŽXXDŽ | 20 | 20 | 20 | 20
71+
Λλ 1a 1a | λλ 1a 1a | Λλ 1a 1A | ΛΛ 1A 1A | 12 | 12 | 12 | 12
7072
ȺȺȺ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 6 | 9 | 8 | 6
7173
ⱥⱥⱥ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 9 | 9 | 8 | 6
7274
ⱥȺ | ⱥⱥ | Ⱥⱥ | ȺȺ | 5 | 6 | 5 | 4
73-
(6 rows)
75+
(7 rows)
7476

7577
DROP TABLE test_pg_c_utf8;
7678
-- negative test: Final_Sigma not used for builtin locale C.UTF-8
@@ -182,6 +184,7 @@ INSERT INTO test_pg_unicode_fast VALUES
182184
('abc DEF 123abc'),
183185
('ábc sßs ßss DÉF'),
184186
('DŽxxDŽ džxxDž Džxxdž'),
187+
(U&'Λλ 1a \FF11a'),
185188
('ȺȺȺ'),
186189
('ⱥⱥⱥ'),
187190
('ⱥȺ');
@@ -197,10 +200,11 @@ SELECT
197200
abc DEF 123abc | abc def 123abc | Abc Def 123abc | ABC DEF 123ABC | 14 | 14 | 14 | 14
198201
ábc sßs ßss DÉF | ábc sßs ßss déf | Ábc Sßs Ssss Déf | ÁBC SSSS SSSS DÉF | 19 | 19 | 19 | 19
199202
DŽxxDŽ džxxDž Džxxdž | džxxdž džxxdž džxxdž | Džxxdž Džxxdž Džxxdž | DŽXXDŽ DŽXXDŽ DŽXXDŽ | 20 | 20 | 20 | 20
203+
Λλ 1a 1a | λλ 1a 1a | Λλ 1a 1a | ΛΛ 1A 1A | 12 | 12 | 12 | 12
200204
ȺȺȺ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 6 | 9 | 8 | 6
201205
ⱥⱥⱥ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 9 | 9 | 8 | 6
202206
ⱥȺ | ⱥⱥ | Ⱥⱥ | ȺȺ | 5 | 6 | 5 | 4
203-
(6 rows)
207+
(7 rows)
204208

205209
DROP TABLE test_pg_unicode_fast;
206210
-- test Final_Sigma

‎src/test/regress/sql/collate.utf8.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ INSERT INTO test_pg_c_utf8 VALUES
4545
('abc DEF 123abc'),
4646
('ábc sßs ßss DÉF'),
4747
('DŽxxDŽ džxxDž Džxxdž'),
48+
(U&'Λλ 1a\FF11a'),
4849
('ȺȺȺ'),
4950
('ⱥⱥⱥ'),
5051
('ⱥȺ');
@@ -100,6 +101,7 @@ INSERT INTO test_pg_unicode_fast VALUES
100101
('abc DEF 123abc'),
101102
('ábc sßs ßss DÉF'),
102103
('DŽxxDŽ džxxDž Džxxdž'),
104+
(U&'Λλ 1a\FF11a'),
103105
('ȺȺȺ'),
104106
('ⱥⱥⱥ'),
105107
('ⱥȺ');

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp