Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita382997

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code pathswhere the result isn't supposed to be locale-dependent, for exampleto_char(timestamp, 'DAY'). Since the source data is always just ASCIIin these cases, that usually didn't matter ... but it does matter inTurkish locales, which have unusual treatment of "i" and "I". To confusematters even more, the misbehavior was only visible in UTF8 encoding,because in single-byte encodings we used pg_toupper/pg_tolower whichdon't have locale-specific behavior for ASCII characters. Fix by providingintentionally ASCII-only case-folding functions and using these whereappropriate. Per bug #7913 from Adnan Dursun. Back-patch to all activebranches, since it's been like this for a long time.
1 parentda5f032 commita382997

File tree

2 files changed

+117
-25
lines changed

2 files changed

+117
-25
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 113 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,87 @@ str_initcap(const char *buff, size_t nbytes)
16441644
returnresult;
16451645
}
16461646

1647+
/*
1648+
* ASCII-only lower function
1649+
*
1650+
* We pass the number of bytes so we can pass varlena and char*
1651+
* to this function. The result is a palloc'd, null-terminated string.
1652+
*/
1653+
char*
1654+
asc_tolower(constchar*buff,size_tnbytes)
1655+
{
1656+
char*result;
1657+
char*p;
1658+
1659+
if (!buff)
1660+
returnNULL;
1661+
1662+
result=pnstrdup(buff,nbytes);
1663+
1664+
for (p=result;*p;p++)
1665+
*p=pg_tolower((unsignedchar)*p);
1666+
1667+
returnresult;
1668+
}
1669+
1670+
/*
1671+
* ASCII-only upper function
1672+
*
1673+
* We pass the number of bytes so we can pass varlena and char*
1674+
* to this function. The result is a palloc'd, null-terminated string.
1675+
*/
1676+
char*
1677+
asc_toupper(constchar*buff,size_tnbytes)
1678+
{
1679+
char*result;
1680+
char*p;
1681+
1682+
if (!buff)
1683+
returnNULL;
1684+
1685+
result=pnstrdup(buff,nbytes);
1686+
1687+
for (p=result;*p;p++)
1688+
*p=pg_toupper((unsignedchar)*p);
1689+
1690+
returnresult;
1691+
}
1692+
1693+
/*
1694+
* ASCII-only initcap function
1695+
*
1696+
* We pass the number of bytes so we can pass varlena and char*
1697+
* to this function. The result is a palloc'd, null-terminated string.
1698+
*/
1699+
char*
1700+
asc_initcap(constchar*buff,size_tnbytes)
1701+
{
1702+
char*result;
1703+
char*p;
1704+
intwasalnum= false;
1705+
1706+
if (!buff)
1707+
returnNULL;
1708+
1709+
result=pnstrdup(buff,nbytes);
1710+
1711+
for (p=result;*p;p++)
1712+
{
1713+
charc;
1714+
1715+
if (wasalnum)
1716+
*p=c=pg_tolower((unsignedchar)*p);
1717+
else
1718+
*p=c=pg_toupper((unsignedchar)*p);
1719+
/* we don't trust isalnum() here */
1720+
wasalnum= ((c >='A'&&c <='Z')||
1721+
(c >='a'&&c <='z')||
1722+
(c >='0'&&c <='9'));
1723+
}
1724+
1725+
returnresult;
1726+
}
1727+
16471728
/* convenience routines for when the input is null-terminated */
16481729

16491730
staticchar*
@@ -1664,6 +1745,20 @@ str_initcap_z(const char *buff)
16641745
returnstr_initcap(buff,strlen(buff));
16651746
}
16661747

1748+
staticchar*
1749+
asc_tolower_z(constchar*buff)
1750+
{
1751+
returnasc_tolower(buff,strlen(buff));
1752+
}
1753+
1754+
staticchar*
1755+
asc_toupper_z(constchar*buff)
1756+
{
1757+
returnasc_toupper(buff,strlen(buff));
1758+
}
1759+
1760+
/* asc_initcap_z is not currently needed */
1761+
16671762

16681763
/* ----------
16691764
* Skip TM / th in FROM_CHAR
@@ -2151,7 +2246,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
21512246
INVALID_FOR_INTERVAL;
21522247
if (tmtcTzn(in))
21532248
{
2154-
char*p=str_tolower_z(tmtcTzn(in));
2249+
/* We assume here that timezone names aren't localized */
2250+
char*p=asc_tolower_z(tmtcTzn(in));
21552251

21562252
strcpy(s,p);
21572253
pfree(p);
@@ -2198,7 +2294,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
21982294
strcpy(s,str_toupper_z(localized_full_months[tm->tm_mon-1]));
21992295
else
22002296
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2201-
str_toupper_z(months_full[tm->tm_mon-1]));
2297+
asc_toupper_z(months_full[tm->tm_mon-1]));
22022298
s+=strlen(s);
22032299
break;
22042300
caseDCH_Month:
@@ -2208,7 +2304,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22082304
if (S_TM(n->suffix))
22092305
strcpy(s,str_initcap_z(localized_full_months[tm->tm_mon-1]));
22102306
else
2211-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2307+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2308+
months_full[tm->tm_mon-1]);
22122309
s+=strlen(s);
22132310
break;
22142311
caseDCH_month:
@@ -2218,10 +2315,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22182315
if (S_TM(n->suffix))
22192316
strcpy(s,str_tolower_z(localized_full_months[tm->tm_mon-1]));
22202317
else
2221-
{
2222-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2223-
*s=pg_tolower((unsignedchar)*s);
2224-
}
2318+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2319+
asc_tolower_z(months_full[tm->tm_mon-1]));
22252320
s+=strlen(s);
22262321
break;
22272322
caseDCH_MON:
@@ -2231,7 +2326,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22312326
if (S_TM(n->suffix))
22322327
strcpy(s,str_toupper_z(localized_abbrev_months[tm->tm_mon-1]));
22332328
else
2234-
strcpy(s,str_toupper_z(months[tm->tm_mon-1]));
2329+
strcpy(s,asc_toupper_z(months[tm->tm_mon-1]));
22352330
s+=strlen(s);
22362331
break;
22372332
caseDCH_Mon:
@@ -2251,10 +2346,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22512346
if (S_TM(n->suffix))
22522347
strcpy(s,str_tolower_z(localized_abbrev_months[tm->tm_mon-1]));
22532348
else
2254-
{
2255-
strcpy(s,months[tm->tm_mon-1]);
2256-
*s=pg_tolower((unsignedchar)*s);
2257-
}
2349+
strcpy(s,asc_tolower_z(months[tm->tm_mon-1]));
22582350
s+=strlen(s);
22592351
break;
22602352
caseDCH_MM:
@@ -2269,34 +2361,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
22692361
strcpy(s,str_toupper_z(localized_full_days[tm->tm_wday]));
22702362
else
22712363
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2272-
str_toupper_z(days[tm->tm_wday]));
2364+
asc_toupper_z(days[tm->tm_wday]));
22732365
s+=strlen(s);
22742366
break;
22752367
caseDCH_Day:
22762368
INVALID_FOR_INTERVAL;
22772369
if (S_TM(n->suffix))
22782370
strcpy(s,str_initcap_z(localized_full_days[tm->tm_wday]));
22792371
else
2280-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2372+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2373+
days[tm->tm_wday]);
22812374
s+=strlen(s);
22822375
break;
22832376
caseDCH_day:
22842377
INVALID_FOR_INTERVAL;
22852378
if (S_TM(n->suffix))
22862379
strcpy(s,str_tolower_z(localized_full_days[tm->tm_wday]));
22872380
else
2288-
{
2289-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2290-
*s=pg_tolower((unsignedchar)*s);
2291-
}
2381+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2382+
asc_tolower_z(days[tm->tm_wday]));
22922383
s+=strlen(s);
22932384
break;
22942385
caseDCH_DY:
22952386
INVALID_FOR_INTERVAL;
22962387
if (S_TM(n->suffix))
22972388
strcpy(s,str_toupper_z(localized_abbrev_days[tm->tm_wday]));
22982389
else
2299-
strcpy(s,str_toupper_z(days_short[tm->tm_wday]));
2390+
strcpy(s,asc_toupper_z(days_short[tm->tm_wday]));
23002391
s+=strlen(s);
23012392
break;
23022393
caseDCH_Dy:
@@ -2312,10 +2403,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
23122403
if (S_TM(n->suffix))
23132404
strcpy(s,str_tolower_z(localized_abbrev_days[tm->tm_wday]));
23142405
else
2315-
{
2316-
strcpy(s,days_short[tm->tm_wday]);
2317-
*s=pg_tolower((unsignedchar)*s);
2318-
}
2406+
strcpy(s,asc_tolower_z(days_short[tm->tm_wday]));
23192407
s+=strlen(s);
23202408
break;
23212409
caseDCH_DDD:
@@ -4422,12 +4510,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
44224510
caseNUM_rn:
44234511
if (IS_FILLMODE(Np->Num))
44244512
{
4425-
strcpy(Np->inout_p,str_tolower_z(Np->number_p));
4513+
strcpy(Np->inout_p,asc_tolower_z(Np->number_p));
44264514
Np->inout_p+=strlen(Np->inout_p)-1;
44274515
}
44284516
else
44294517
{
4430-
sprintf(Np->inout_p,"%15s",str_tolower_z(Np->number_p));
4518+
sprintf(Np->inout_p,"%15s",asc_tolower_z(Np->number_p));
44314519
Np->inout_p+=strlen(Np->inout_p)-1;
44324520
}
44334521
break;

‎src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ extern char *str_tolower(const char *buff, size_t nbytes);
2525
externchar*str_toupper(constchar*buff,size_tnbytes);
2626
externchar*str_initcap(constchar*buff,size_tnbytes);
2727

28+
externchar*asc_tolower(constchar*buff,size_tnbytes);
29+
externchar*asc_toupper(constchar*buff,size_tnbytes);
30+
externchar*asc_initcap(constchar*buff,size_tnbytes);
31+
2832
externDatumtimestamp_to_char(PG_FUNCTION_ARGS);
2933
externDatumtimestamptz_to_char(PG_FUNCTION_ARGS);
3034
externDatuminterval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp