Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit81e2255

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code pathswhere the result isn't supposed to be locale-dependent, for exampleto_char(timestamp, 'DAY'). Since the source data is always just ASCIIin these cases, that usually didn't matter ... but it does matter inTurkish locales, which have unusual treatment of "i" and "I". To confusematters even more, the misbehavior was only visible in UTF8 encoding,because in single-byte encodings we used pg_toupper/pg_tolower whichdon't have locale-specific behavior for ASCII characters. Fix by providingintentionally ASCII-only case-folding functions and using these whereappropriate. Per bug #7913 from Adnan Dursun. Back-patch to all activebranches, since it's been like this for a long time.
1 parent3a77936 commit81e2255

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,12 +1491,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14911491
/* C/POSIX collations use this path regardless of database encoding */
14921492
if (lc_ctype_is_c(collid))
14931493
{
1494-
char*p;
1495-
1496-
result=pnstrdup(buff,nbytes);
1497-
1498-
for (p=result;*p;p++)
1499-
*p=pg_ascii_tolower((unsignedchar)*p);
1494+
result=asc_tolower(buff,nbytes);
15001495
}
15011496
#ifdefUSE_WIDE_UPPER_LOWER
15021497
elseif (pg_database_encoding_max_length()>1)
@@ -1612,12 +1607,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16121607
/* C/POSIX collations use this path regardless of database encoding */
16131608
if (lc_ctype_is_c(collid))
16141609
{
1615-
char*p;
1616-
1617-
result=pnstrdup(buff,nbytes);
1618-
1619-
for (p=result;*p;p++)
1620-
*p=pg_ascii_toupper((unsignedchar)*p);
1610+
result=asc_toupper(buff,nbytes);
16211611
}
16221612
#ifdefUSE_WIDE_UPPER_LOWER
16231613
elseif (pg_database_encoding_max_length()>1)
@@ -1734,23 +1724,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17341724
/* C/POSIX collations use this path regardless of database encoding */
17351725
if (lc_ctype_is_c(collid))
17361726
{
1737-
char*p;
1738-
1739-
result=pnstrdup(buff,nbytes);
1740-
1741-
for (p=result;*p;p++)
1742-
{
1743-
charc;
1744-
1745-
if (wasalnum)
1746-
*p=c=pg_ascii_tolower((unsignedchar)*p);
1747-
else
1748-
*p=c=pg_ascii_toupper((unsignedchar)*p);
1749-
/* we don't trust isalnum() here */
1750-
wasalnum= ((c >='A'&&c <='Z')||
1751-
(c >='a'&&c <='z')||
1752-
(c >='0'&&c <='9'));
1753-
}
1727+
result=asc_initcap(buff,nbytes);
17541728
}
17551729
#ifdefUSE_WIDE_UPPER_LOWER
17561730
elseif (pg_database_encoding_max_length()>1)
@@ -1873,6 +1847,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18731847
returnresult;
18741848
}
18751849

1850+
/*
1851+
* ASCII-only lower function
1852+
*
1853+
* We pass the number of bytes so we can pass varlena and char*
1854+
* to this function. The result is a palloc'd, null-terminated string.
1855+
*/
1856+
char*
1857+
asc_tolower(constchar*buff,size_tnbytes)
1858+
{
1859+
char*result;
1860+
char*p;
1861+
1862+
if (!buff)
1863+
returnNULL;
1864+
1865+
result=pnstrdup(buff,nbytes);
1866+
1867+
for (p=result;*p;p++)
1868+
*p=pg_ascii_tolower((unsignedchar)*p);
1869+
1870+
returnresult;
1871+
}
1872+
1873+
/*
1874+
* ASCII-only upper function
1875+
*
1876+
* We pass the number of bytes so we can pass varlena and char*
1877+
* to this function. The result is a palloc'd, null-terminated string.
1878+
*/
1879+
char*
1880+
asc_toupper(constchar*buff,size_tnbytes)
1881+
{
1882+
char*result;
1883+
char*p;
1884+
1885+
if (!buff)
1886+
returnNULL;
1887+
1888+
result=pnstrdup(buff,nbytes);
1889+
1890+
for (p=result;*p;p++)
1891+
*p=pg_ascii_toupper((unsignedchar)*p);
1892+
1893+
returnresult;
1894+
}
1895+
1896+
/*
1897+
* ASCII-only initcap function
1898+
*
1899+
* We pass the number of bytes so we can pass varlena and char*
1900+
* to this function. The result is a palloc'd, null-terminated string.
1901+
*/
1902+
char*
1903+
asc_initcap(constchar*buff,size_tnbytes)
1904+
{
1905+
char*result;
1906+
char*p;
1907+
intwasalnum= false;
1908+
1909+
if (!buff)
1910+
returnNULL;
1911+
1912+
result=pnstrdup(buff,nbytes);
1913+
1914+
for (p=result;*p;p++)
1915+
{
1916+
charc;
1917+
1918+
if (wasalnum)
1919+
*p=c=pg_ascii_tolower((unsignedchar)*p);
1920+
else
1921+
*p=c=pg_ascii_toupper((unsignedchar)*p);
1922+
/* we don't trust isalnum() here */
1923+
wasalnum= ((c >='A'&&c <='Z')||
1924+
(c >='a'&&c <='z')||
1925+
(c >='0'&&c <='9'));
1926+
}
1927+
1928+
returnresult;
1929+
}
1930+
18761931
/* convenience routines for when the input is null-terminated */
18771932

18781933
staticchar*
@@ -1893,6 +1948,20 @@ str_initcap_z(const char *buff, Oid collid)
18931948
returnstr_initcap(buff,strlen(buff),collid);
18941949
}
18951950

1951+
staticchar*
1952+
asc_tolower_z(constchar*buff)
1953+
{
1954+
returnasc_tolower(buff,strlen(buff));
1955+
}
1956+
1957+
staticchar*
1958+
asc_toupper_z(constchar*buff)
1959+
{
1960+
returnasc_toupper(buff,strlen(buff));
1961+
}
1962+
1963+
/* asc_initcap_z is not currently needed */
1964+
18961965

18971966
/* ----------
18981967
* Skip TM / th in FROM_CHAR
@@ -2380,7 +2449,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
23802449
INVALID_FOR_INTERVAL;
23812450
if (tmtcTzn(in))
23822451
{
2383-
char*p=str_tolower_z(tmtcTzn(in),collid);
2452+
/* We assume here that timezone names aren't localized */
2453+
char*p=asc_tolower_z(tmtcTzn(in));
23842454

23852455
strcpy(s,p);
23862456
pfree(p);
@@ -2427,7 +2497,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24272497
strcpy(s,str_toupper_z(localized_full_months[tm->tm_mon-1],collid));
24282498
else
24292499
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2430-
str_toupper_z(months_full[tm->tm_mon-1],collid));
2500+
asc_toupper_z(months_full[tm->tm_mon-1]));
24312501
s+=strlen(s);
24322502
break;
24332503
caseDCH_Month:
@@ -2437,7 +2507,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24372507
if (S_TM(n->suffix))
24382508
strcpy(s,str_initcap_z(localized_full_months[tm->tm_mon-1],collid));
24392509
else
2440-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2510+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2511+
months_full[tm->tm_mon-1]);
24412512
s+=strlen(s);
24422513
break;
24432514
caseDCH_month:
@@ -2447,10 +2518,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24472518
if (S_TM(n->suffix))
24482519
strcpy(s,str_tolower_z(localized_full_months[tm->tm_mon-1],collid));
24492520
else
2450-
{
2451-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2452-
*s=pg_tolower((unsignedchar)*s);
2453-
}
2521+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2522+
asc_tolower_z(months_full[tm->tm_mon-1]));
24542523
s+=strlen(s);
24552524
break;
24562525
caseDCH_MON:
@@ -2460,7 +2529,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24602529
if (S_TM(n->suffix))
24612530
strcpy(s,str_toupper_z(localized_abbrev_months[tm->tm_mon-1],collid));
24622531
else
2463-
strcpy(s,str_toupper_z(months[tm->tm_mon-1],collid));
2532+
strcpy(s,asc_toupper_z(months[tm->tm_mon-1]));
24642533
s+=strlen(s);
24652534
break;
24662535
caseDCH_Mon:
@@ -2480,10 +2549,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24802549
if (S_TM(n->suffix))
24812550
strcpy(s,str_tolower_z(localized_abbrev_months[tm->tm_mon-1],collid));
24822551
else
2483-
{
2484-
strcpy(s,months[tm->tm_mon-1]);
2485-
*s=pg_tolower((unsignedchar)*s);
2486-
}
2552+
strcpy(s,asc_tolower_z(months[tm->tm_mon-1]));
24872553
s+=strlen(s);
24882554
break;
24892555
caseDCH_MM:
@@ -2498,34 +2564,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24982564
strcpy(s,str_toupper_z(localized_full_days[tm->tm_wday],collid));
24992565
else
25002566
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2501-
str_toupper_z(days[tm->tm_wday],collid));
2567+
asc_toupper_z(days[tm->tm_wday]));
25022568
s+=strlen(s);
25032569
break;
25042570
caseDCH_Day:
25052571
INVALID_FOR_INTERVAL;
25062572
if (S_TM(n->suffix))
25072573
strcpy(s,str_initcap_z(localized_full_days[tm->tm_wday],collid));
25082574
else
2509-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2575+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2576+
days[tm->tm_wday]);
25102577
s+=strlen(s);
25112578
break;
25122579
caseDCH_day:
25132580
INVALID_FOR_INTERVAL;
25142581
if (S_TM(n->suffix))
25152582
strcpy(s,str_tolower_z(localized_full_days[tm->tm_wday],collid));
25162583
else
2517-
{
2518-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2519-
*s=pg_tolower((unsignedchar)*s);
2520-
}
2584+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2585+
asc_tolower_z(days[tm->tm_wday]));
25212586
s+=strlen(s);
25222587
break;
25232588
caseDCH_DY:
25242589
INVALID_FOR_INTERVAL;
25252590
if (S_TM(n->suffix))
25262591
strcpy(s,str_toupper_z(localized_abbrev_days[tm->tm_wday],collid));
25272592
else
2528-
strcpy(s,str_toupper_z(days_short[tm->tm_wday],collid));
2593+
strcpy(s,asc_toupper_z(days_short[tm->tm_wday]));
25292594
s+=strlen(s);
25302595
break;
25312596
caseDCH_Dy:
@@ -2541,10 +2606,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25412606
if (S_TM(n->suffix))
25422607
strcpy(s,str_tolower_z(localized_abbrev_days[tm->tm_wday],collid));
25432608
else
2544-
{
2545-
strcpy(s,days_short[tm->tm_wday]);
2546-
*s=pg_tolower((unsignedchar)*s);
2547-
}
2609+
strcpy(s,asc_tolower_z(days_short[tm->tm_wday]));
25482610
s+=strlen(s);
25492611
break;
25502612
caseDCH_DDD:
@@ -4651,12 +4713,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46514713
caseNUM_rn:
46524714
if (IS_FILLMODE(Np->Num))
46534715
{
4654-
strcpy(Np->inout_p,str_tolower_z(Np->number_p,collid));
4716+
strcpy(Np->inout_p,asc_tolower_z(Np->number_p));
46554717
Np->inout_p+=strlen(Np->inout_p)-1;
46564718
}
46574719
else
46584720
{
4659-
sprintf(Np->inout_p,"%15s",str_tolower_z(Np->number_p,collid));
4721+
sprintf(Np->inout_p,"%15s",asc_tolower_z(Np->number_p));
46604722
Np->inout_p+=strlen(Np->inout_p)-1;
46614723
}
46624724
break;

‎src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
externchar*str_toupper(constchar*buff,size_tnbytes,Oidcollid);
2525
externchar*str_initcap(constchar*buff,size_tnbytes,Oidcollid);
2626

27+
externchar*asc_tolower(constchar*buff,size_tnbytes);
28+
externchar*asc_toupper(constchar*buff,size_tnbytes);
29+
externchar*asc_initcap(constchar*buff,size_tnbytes);
30+
2731
externDatumtimestamp_to_char(PG_FUNCTION_ARGS);
2832
externDatumtimestamptz_to_char(PG_FUNCTION_ARGS);
2933
externDatuminterval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp