Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit2ab0796

Browse files
committed
Fix char2wchar/wchar2char to support collations properly.
These functions should take a pg_locale_t, not a collation OID, and shouldcall mbstowcs_l/wcstombs_l where available. Where those functions are notavailable, temporarily select the correct locale with uselocale().This change removes the bogus assumption that all locales selectable ina given database have the same wide-character conversion method; inparticular, the collate.linux.utf8 regression test now passes withLC_CTYPE=C, so long as the database encoding is UTF8.I decided to move the char2wchar/wchar2char functions out of mbutils.c andinto pg_locale.c, because they work on wchar_t not pg_wchar_t and thusdon't really belong with the mbutils.c functions. Keeping them where theywere would have required importing pg_locale_t into pg_wchar.h somehow,which did not seem like a good plan.
1 parentbb85030 commit2ab0796

File tree

12 files changed

+217
-144
lines changed

12 files changed

+217
-144
lines changed

‎configure

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18985,7 +18985,8 @@ fi
1898518985

1898618986

1898718987

18988-
for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs
18988+
18989+
for ac_func in cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l
1898918990
do
1899018991
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
1899118992
{ $as_echo "$as_me:$LINENO: checking for $ac_func" >&5

‎configure.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1187,7 +1187,7 @@ PGAC_VAR_INT_TIMEZONE
11871187
AC_FUNC_ACCEPT_ARGTYPES
11881188
PGAC_FUNC_GETTIMEOFDAY_1ARG
11891189

1190-
AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs])
1190+
AC_CHECK_FUNCS([cbrt dlopen fcvt fdatasync getifaddrs getpeereid getpeerucred getrlimit memmove poll pstat readlink scandir setproctitle setsid sigprocmask symlink sysconf towlower utime utimes waitpid wcstombs wcstombs_l])
11911191

11921192
AC_REPLACE_FUNCS(fseeko)
11931193
case $host_os in

‎src/backend/tsearch/ts_locale.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ t_isdigit(const char *ptr)
2929
intclen=pg_mblen(ptr);
3030
wchar_tcharacter[2];
3131
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
32+
pg_locale_tmylocale=0;/* TODO */
3233

3334
if (clen==1||lc_ctype_is_c(collation))
3435
returnisdigit(TOUCHAR(ptr));
3536

36-
char2wchar(character,2,ptr,clen,collation);
37+
char2wchar(character,2,ptr,clen,mylocale);
3738

3839
returniswdigit((wint_t)character[0]);
3940
}
@@ -44,11 +45,12 @@ t_isspace(const char *ptr)
4445
intclen=pg_mblen(ptr);
4546
wchar_tcharacter[2];
4647
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
48+
pg_locale_tmylocale=0;/* TODO */
4749

4850
if (clen==1||lc_ctype_is_c(collation))
4951
returnisspace(TOUCHAR(ptr));
5052

51-
char2wchar(character,2,ptr,clen,collation);
53+
char2wchar(character,2,ptr,clen,mylocale);
5254

5355
returniswspace((wint_t)character[0]);
5456
}
@@ -59,11 +61,12 @@ t_isalpha(const char *ptr)
5961
intclen=pg_mblen(ptr);
6062
wchar_tcharacter[2];
6163
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
64+
pg_locale_tmylocale=0;/* TODO */
6265

6366
if (clen==1||lc_ctype_is_c(collation))
6467
returnisalpha(TOUCHAR(ptr));
6568

66-
char2wchar(character,2,ptr,clen,collation);
69+
char2wchar(character,2,ptr,clen,mylocale);
6770

6871
returniswalpha((wint_t)character[0]);
6972
}
@@ -74,11 +77,12 @@ t_isprint(const char *ptr)
7477
intclen=pg_mblen(ptr);
7578
wchar_tcharacter[2];
7679
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
80+
pg_locale_tmylocale=0;/* TODO */
7781

7882
if (clen==1||lc_ctype_is_c(collation))
7983
returnisprint(TOUCHAR(ptr));
8084

81-
char2wchar(character,2,ptr,clen,collation);
85+
char2wchar(character,2,ptr,clen,mylocale);
8286

8387
returniswprint((wint_t)character[0]);
8488
}
@@ -246,6 +250,7 @@ lowerstr_with_len(const char *str, int len)
246250

247251
#ifdefUSE_WIDE_UPPER_LOWER
248252
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
253+
pg_locale_tmylocale=0;/* TODO */
249254
#endif
250255

251256
if (len==0)
@@ -272,7 +277,7 @@ lowerstr_with_len(const char *str, int len)
272277
*/
273278
wptr=wstr= (wchar_t*)palloc(sizeof(wchar_t)* (len+1));
274279

275-
wlen=char2wchar(wstr,len+1,str,len,collation);
280+
wlen=char2wchar(wstr,len+1,str,len,mylocale);
276281
Assert(wlen <=len);
277282

278283
while (*wptr)
@@ -287,7 +292,7 @@ lowerstr_with_len(const char *str, int len)
287292
len=pg_database_encoding_max_length()*wlen+1;
288293
out= (char*)palloc(len);
289294

290-
wlen=wchar2char(out,wstr,len,collation);
295+
wlen=wchar2char(out,wstr,len,mylocale);
291296

292297
pfree(wstr);
293298

‎src/backend/tsearch/wparser_def.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,21 +300,23 @@ TParserInit(char *str, int len)
300300
if (prs->charmaxlen>1)
301301
{
302302
Oidcollation=DEFAULT_COLLATION_OID;/* TODO */
303+
pg_locale_tmylocale=0;/* TODO */
303304

304305
prs->usewide= true;
305306
if (lc_ctype_is_c(collation))
306307
{
307308
/*
308309
* char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
309-
* benot equal to sizeof(wchar_t)
310+
* bedifferent from sizeof(wchar_t)
310311
*/
311312
prs->pgwstr= (pg_wchar*)palloc(sizeof(pg_wchar)* (prs->lenstr+1));
312313
pg_mb2wchar_with_len(prs->str,prs->pgwstr,prs->lenstr);
313314
}
314315
else
315316
{
316317
prs->wstr= (wchar_t*)palloc(sizeof(wchar_t)* (prs->lenstr+1));
317-
char2wchar(prs->wstr,prs->lenstr+1,prs->str,prs->lenstr,collation);
318+
char2wchar(prs->wstr,prs->lenstr+1,prs->str,prs->lenstr,
319+
mylocale);
318320
}
319321
}
320322
else

‎src/backend/utils/adt/formatting.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,10 @@ str_numth(char *dest, char *num, int type)
14541454
returndest;
14551455
}
14561456

1457+
/*****************************************************************************
1458+
*upper/lower/initcap functions
1459+
*****************************************************************************/
1460+
14571461
/*
14581462
* If the system provides the needed functions for wide-character manipulation
14591463
* (which are all standardized by C99), then we implement upper/lower/initcap
@@ -1527,7 +1531,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
15271531
/* Output workspace cannot have more codes than input bytes */
15281532
workspace= (wchar_t*)palloc((nbytes+1)*sizeof(wchar_t));
15291533

1530-
char2wchar(workspace,nbytes+1,buff,nbytes,collid);
1534+
char2wchar(workspace,nbytes+1,buff,nbytes,mylocale);
15311535

15321536
for (curr_char=0;workspace[curr_char]!=0;curr_char++)
15331537
{
@@ -1543,7 +1547,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
15431547
result_size=curr_char*pg_database_encoding_max_length()+1;
15441548
result=palloc(result_size);
15451549

1546-
wchar2char(result,workspace,result_size,collid);
1550+
wchar2char(result,workspace,result_size,mylocale);
15471551
pfree(workspace);
15481552
}
15491553
#endif/* USE_WIDE_UPPER_LOWER */
@@ -1648,7 +1652,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16481652
/* Output workspace cannot have more codes than input bytes */
16491653
workspace= (wchar_t*)palloc((nbytes+1)*sizeof(wchar_t));
16501654

1651-
char2wchar(workspace,nbytes+1,buff,nbytes,collid);
1655+
char2wchar(workspace,nbytes+1,buff,nbytes,mylocale);
16521656

16531657
for (curr_char=0;workspace[curr_char]!=0;curr_char++)
16541658
{
@@ -1664,7 +1668,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16641668
result_size=curr_char*pg_database_encoding_max_length()+1;
16651669
result=palloc(result_size);
16661670

1667-
wchar2char(result,workspace,result_size,collid);
1671+
wchar2char(result,workspace,result_size,mylocale);
16681672
pfree(workspace);
16691673
}
16701674
#endif/* USE_WIDE_UPPER_LOWER */
@@ -1781,7 +1785,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17811785
/* Output workspace cannot have more codes than input bytes */
17821786
workspace= (wchar_t*)palloc((nbytes+1)*sizeof(wchar_t));
17831787

1784-
char2wchar(workspace,nbytes+1,buff,nbytes,collid);
1788+
char2wchar(workspace,nbytes+1,buff,nbytes,mylocale);
17851789

17861790
for (curr_char=0;workspace[curr_char]!=0;curr_char++)
17871791
{
@@ -1809,7 +1813,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18091813
result_size=curr_char*pg_database_encoding_max_length()+1;
18101814
result=palloc(result_size);
18111815

1812-
wchar2char(result,workspace,result_size,collid);
1816+
wchar2char(result,workspace,result_size,mylocale);
18131817
pfree(workspace);
18141818
}
18151819
#endif/* USE_WIDE_UPPER_LOWER */

‎src/backend/utils/adt/pg_locale.c

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,3 +1030,176 @@ pg_newlocale_from_collation(Oid collid)
10301030

10311031
returncache_entry->locale;
10321032
}
1033+
1034+
1035+
/*
1036+
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1037+
* Therefore we keep them here rather than with the mbutils code.
1038+
*/
1039+
1040+
#ifdefUSE_WIDE_UPPER_LOWER
1041+
1042+
/*
1043+
* wchar2char --- convert wide characters to multibyte format
1044+
*
1045+
* This has the same API as the standard wcstombs_l() function; in particular,
1046+
* tolen is the maximum number of bytes to store at *to, and *from must be
1047+
* zero-terminated. The output will be zero-terminated iff there is room.
1048+
*/
1049+
size_t
1050+
wchar2char(char*to,constwchar_t*from,size_ttolen,pg_locale_tlocale)
1051+
{
1052+
size_tresult;
1053+
1054+
if (tolen==0)
1055+
return0;
1056+
1057+
#ifdefWIN32
1058+
1059+
/*
1060+
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1061+
* for some reason mbstowcs and wcstombs won't do this for us, so we use
1062+
* MultiByteToWideChar().
1063+
*/
1064+
if (GetDatabaseEncoding()==PG_UTF8)
1065+
{
1066+
result=WideCharToMultiByte(CP_UTF8,0,from,-1,to,tolen,
1067+
NULL,NULL);
1068+
/* A zero return is failure */
1069+
if (result <=0)
1070+
result=-1;
1071+
else
1072+
{
1073+
Assert(result <=tolen);
1074+
/* Microsoft counts the zero terminator in the result */
1075+
result--;
1076+
}
1077+
}
1078+
else
1079+
#endif/* WIN32 */
1080+
if (locale== (pg_locale_t)0)
1081+
{
1082+
/* Use wcstombs directly for the default locale */
1083+
result=wcstombs(to,from,tolen);
1084+
}
1085+
else
1086+
{
1087+
#ifdefHAVE_LOCALE_T
1088+
#ifdefHAVE_WCSTOMBS_L
1089+
/* Use wcstombs_l for nondefault locales */
1090+
result=wcstombs_l(to,from,tolen,locale);
1091+
#else/* !HAVE_WCSTOMBS_L */
1092+
/* We have to temporarily set the locale as current ... ugh */
1093+
locale_tsave_locale=uselocale(locale);
1094+
1095+
result=wcstombs(to,from,tolen);
1096+
1097+
uselocale(save_locale);
1098+
#endif/* HAVE_WCSTOMBS_L */
1099+
#else/* !HAVE_LOCALE_T */
1100+
/* Can't have locale != 0 without HAVE_LOCALE_T */
1101+
elog(ERROR,"wcstombs_l is not available");
1102+
result=0;/* keep compiler quiet */
1103+
#endif/* HAVE_LOCALE_T */
1104+
}
1105+
1106+
returnresult;
1107+
}
1108+
1109+
/*
1110+
* char2wchar --- convert multibyte characters to wide characters
1111+
*
1112+
* This has almost the API of mbstowcs_l(), except that *from need not be
1113+
* null-terminated; instead, the number of input bytes is specified as
1114+
* fromlen. Also, we ereport() rather than returning -1 for invalid
1115+
* input encoding.tolen is the maximum number of wchar_t's to store at *to.
1116+
* The output will be zero-terminated iff there is room.
1117+
*/
1118+
size_t
1119+
char2wchar(wchar_t*to,size_ttolen,constchar*from,size_tfromlen,
1120+
pg_locale_tlocale)
1121+
{
1122+
size_tresult;
1123+
1124+
if (tolen==0)
1125+
return0;
1126+
1127+
#ifdefWIN32
1128+
/* See WIN32 "Unicode" comment above */
1129+
if (GetDatabaseEncoding()==PG_UTF8)
1130+
{
1131+
/* Win32 API does not work for zero-length input */
1132+
if (fromlen==0)
1133+
result=0;
1134+
else
1135+
{
1136+
result=MultiByteToWideChar(CP_UTF8,0,from,fromlen,to,tolen-1);
1137+
/* A zero return is failure */
1138+
if (result==0)
1139+
result=-1;
1140+
}
1141+
1142+
if (result!=-1)
1143+
{
1144+
Assert(result<tolen);
1145+
/* Append trailing null wchar (MultiByteToWideChar() does not) */
1146+
to[result]=0;
1147+
}
1148+
}
1149+
else
1150+
#endif/* WIN32 */
1151+
{
1152+
/* mbstowcs requires ending '\0' */
1153+
char*str=pnstrdup(from,fromlen);
1154+
1155+
if (locale== (pg_locale_t)0)
1156+
{
1157+
/* Use mbstowcs directly for the default locale */
1158+
result=mbstowcs(to,str,tolen);
1159+
}
1160+
else
1161+
{
1162+
#ifdefHAVE_LOCALE_T
1163+
#ifdefHAVE_WCSTOMBS_L
1164+
/* Use mbstowcs_l for nondefault locales */
1165+
result=mbstowcs_l(to,str,tolen,locale);
1166+
#else/* !HAVE_WCSTOMBS_L */
1167+
/* We have to temporarily set the locale as current ... ugh */
1168+
locale_tsave_locale=uselocale(locale);
1169+
1170+
result=mbstowcs(to,str,tolen);
1171+
1172+
uselocale(save_locale);
1173+
#endif/* HAVE_WCSTOMBS_L */
1174+
#else/* !HAVE_LOCALE_T */
1175+
/* Can't have locale != 0 without HAVE_LOCALE_T */
1176+
elog(ERROR,"mbstowcs_l is not available");
1177+
result=0;/* keep compiler quiet */
1178+
#endif/* HAVE_LOCALE_T */
1179+
}
1180+
1181+
pfree(str);
1182+
}
1183+
1184+
if (result==-1)
1185+
{
1186+
/*
1187+
* Invalid multibyte character encountered. We try to give a useful
1188+
* error message by letting pg_verifymbstr check the string. But it's
1189+
* possible that the string is OK to us, and not OK to mbstowcs ---
1190+
* this suggests that the LC_CTYPE locale is different from the
1191+
* database encoding. Give a generic error message if verifymbstr
1192+
* can't find anything wrong.
1193+
*/
1194+
pg_verifymbstr(from,fromlen, false);/* might not return */
1195+
/* but if it does ... */
1196+
ereport(ERROR,
1197+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1198+
errmsg("invalid multibyte character for locale"),
1199+
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1200+
}
1201+
1202+
returnresult;
1203+
}
1204+
1205+
#endif/* USE_WIDE_UPPER_LOWER */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp