Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit80b011e

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code pathswhere the result isn't supposed to be locale-dependent, for exampleto_char(timestamp, 'DAY'). Since the source data is always just ASCIIin these cases, that usually didn't matter ... but it does matter inTurkish locales, which have unusual treatment of "i" and "I". To confusematters even more, the misbehavior was only visible in UTF8 encoding,because in single-byte encodings we used pg_toupper/pg_tolower whichdon't have locale-specific behavior for ASCII characters. Fix by providingintentionally ASCII-only case-folding functions and using these whereappropriate. Per bug #7913 from Adnan Dursun. Back-patch to all activebranches, since it's been like this for a long time.
1 parentc805659 commit80b011e

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,12 +1492,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14921492
/* C/POSIX collations use this path regardless of database encoding */
14931493
if (lc_ctype_is_c(collid))
14941494
{
1495-
char*p;
1496-
1497-
result=pnstrdup(buff,nbytes);
1498-
1499-
for (p=result;*p;p++)
1500-
*p=pg_ascii_tolower((unsignedchar)*p);
1495+
result=asc_tolower(buff,nbytes);
15011496
}
15021497
#ifdefUSE_WIDE_UPPER_LOWER
15031498
elseif (pg_database_encoding_max_length()>1)
@@ -1617,12 +1612,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16171612
/* C/POSIX collations use this path regardless of database encoding */
16181613
if (lc_ctype_is_c(collid))
16191614
{
1620-
char*p;
1621-
1622-
result=pnstrdup(buff,nbytes);
1623-
1624-
for (p=result;*p;p++)
1625-
*p=pg_ascii_toupper((unsignedchar)*p);
1615+
result=asc_toupper(buff,nbytes);
16261616
}
16271617
#ifdefUSE_WIDE_UPPER_LOWER
16281618
elseif (pg_database_encoding_max_length()>1)
@@ -1743,23 +1733,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17431733
/* C/POSIX collations use this path regardless of database encoding */
17441734
if (lc_ctype_is_c(collid))
17451735
{
1746-
char*p;
1747-
1748-
result=pnstrdup(buff,nbytes);
1749-
1750-
for (p=result;*p;p++)
1751-
{
1752-
charc;
1753-
1754-
if (wasalnum)
1755-
*p=c=pg_ascii_tolower((unsignedchar)*p);
1756-
else
1757-
*p=c=pg_ascii_toupper((unsignedchar)*p);
1758-
/* we don't trust isalnum() here */
1759-
wasalnum= ((c >='A'&&c <='Z')||
1760-
(c >='a'&&c <='z')||
1761-
(c >='0'&&c <='9'));
1762-
}
1736+
result=asc_initcap(buff,nbytes);
17631737
}
17641738
#ifdefUSE_WIDE_UPPER_LOWER
17651739
elseif (pg_database_encoding_max_length()>1)
@@ -1886,6 +1860,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18861860
returnresult;
18871861
}
18881862

1863+
/*
1864+
* ASCII-only lower function
1865+
*
1866+
* We pass the number of bytes so we can pass varlena and char*
1867+
* to this function. The result is a palloc'd, null-terminated string.
1868+
*/
1869+
char*
1870+
asc_tolower(constchar*buff,size_tnbytes)
1871+
{
1872+
char*result;
1873+
char*p;
1874+
1875+
if (!buff)
1876+
returnNULL;
1877+
1878+
result=pnstrdup(buff,nbytes);
1879+
1880+
for (p=result;*p;p++)
1881+
*p=pg_ascii_tolower((unsignedchar)*p);
1882+
1883+
returnresult;
1884+
}
1885+
1886+
/*
1887+
* ASCII-only upper function
1888+
*
1889+
* We pass the number of bytes so we can pass varlena and char*
1890+
* to this function. The result is a palloc'd, null-terminated string.
1891+
*/
1892+
char*
1893+
asc_toupper(constchar*buff,size_tnbytes)
1894+
{
1895+
char*result;
1896+
char*p;
1897+
1898+
if (!buff)
1899+
returnNULL;
1900+
1901+
result=pnstrdup(buff,nbytes);
1902+
1903+
for (p=result;*p;p++)
1904+
*p=pg_ascii_toupper((unsignedchar)*p);
1905+
1906+
returnresult;
1907+
}
1908+
1909+
/*
1910+
* ASCII-only initcap function
1911+
*
1912+
* We pass the number of bytes so we can pass varlena and char*
1913+
* to this function. The result is a palloc'd, null-terminated string.
1914+
*/
1915+
char*
1916+
asc_initcap(constchar*buff,size_tnbytes)
1917+
{
1918+
char*result;
1919+
char*p;
1920+
intwasalnum= false;
1921+
1922+
if (!buff)
1923+
returnNULL;
1924+
1925+
result=pnstrdup(buff,nbytes);
1926+
1927+
for (p=result;*p;p++)
1928+
{
1929+
charc;
1930+
1931+
if (wasalnum)
1932+
*p=c=pg_ascii_tolower((unsignedchar)*p);
1933+
else
1934+
*p=c=pg_ascii_toupper((unsignedchar)*p);
1935+
/* we don't trust isalnum() here */
1936+
wasalnum= ((c >='A'&&c <='Z')||
1937+
(c >='a'&&c <='z')||
1938+
(c >='0'&&c <='9'));
1939+
}
1940+
1941+
returnresult;
1942+
}
1943+
18891944
/* convenience routines for when the input is null-terminated */
18901945

18911946
staticchar*
@@ -1906,6 +1961,20 @@ str_initcap_z(const char *buff, Oid collid)
19061961
returnstr_initcap(buff,strlen(buff),collid);
19071962
}
19081963

1964+
staticchar*
1965+
asc_tolower_z(constchar*buff)
1966+
{
1967+
returnasc_tolower(buff,strlen(buff));
1968+
}
1969+
1970+
staticchar*
1971+
asc_toupper_z(constchar*buff)
1972+
{
1973+
returnasc_toupper(buff,strlen(buff));
1974+
}
1975+
1976+
/* asc_initcap_z is not currently needed */
1977+
19091978

19101979
/* ----------
19111980
* Skip TM / th in FROM_CHAR
@@ -2418,7 +2487,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24182487
INVALID_FOR_INTERVAL;
24192488
if (tmtcTzn(in))
24202489
{
2421-
char*p=str_tolower_z(tmtcTzn(in),collid);
2490+
/* We assume here that timezone names aren't localized */
2491+
char*p=asc_tolower_z(tmtcTzn(in));
24222492

24232493
strcpy(s,p);
24242494
pfree(p);
@@ -2465,7 +2535,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24652535
strcpy(s,str_toupper_z(localized_full_months[tm->tm_mon-1],collid));
24662536
else
24672537
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2468-
str_toupper_z(months_full[tm->tm_mon-1],collid));
2538+
asc_toupper_z(months_full[tm->tm_mon-1]));
24692539
s+=strlen(s);
24702540
break;
24712541
caseDCH_Month:
@@ -2475,7 +2545,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24752545
if (S_TM(n->suffix))
24762546
strcpy(s,str_initcap_z(localized_full_months[tm->tm_mon-1],collid));
24772547
else
2478-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2548+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2549+
months_full[tm->tm_mon-1]);
24792550
s+=strlen(s);
24802551
break;
24812552
caseDCH_month:
@@ -2485,10 +2556,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24852556
if (S_TM(n->suffix))
24862557
strcpy(s,str_tolower_z(localized_full_months[tm->tm_mon-1],collid));
24872558
else
2488-
{
2489-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2490-
*s=pg_tolower((unsignedchar)*s);
2491-
}
2559+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2560+
asc_tolower_z(months_full[tm->tm_mon-1]));
24922561
s+=strlen(s);
24932562
break;
24942563
caseDCH_MON:
@@ -2498,7 +2567,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24982567
if (S_TM(n->suffix))
24992568
strcpy(s,str_toupper_z(localized_abbrev_months[tm->tm_mon-1],collid));
25002569
else
2501-
strcpy(s,str_toupper_z(months[tm->tm_mon-1],collid));
2570+
strcpy(s,asc_toupper_z(months[tm->tm_mon-1]));
25022571
s+=strlen(s);
25032572
break;
25042573
caseDCH_Mon:
@@ -2518,10 +2587,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25182587
if (S_TM(n->suffix))
25192588
strcpy(s,str_tolower_z(localized_abbrev_months[tm->tm_mon-1],collid));
25202589
else
2521-
{
2522-
strcpy(s,months[tm->tm_mon-1]);
2523-
*s=pg_tolower((unsignedchar)*s);
2524-
}
2590+
strcpy(s,asc_tolower_z(months[tm->tm_mon-1]));
25252591
s+=strlen(s);
25262592
break;
25272593
caseDCH_MM:
@@ -2536,34 +2602,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25362602
strcpy(s,str_toupper_z(localized_full_days[tm->tm_wday],collid));
25372603
else
25382604
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2539-
str_toupper_z(days[tm->tm_wday],collid));
2605+
asc_toupper_z(days[tm->tm_wday]));
25402606
s+=strlen(s);
25412607
break;
25422608
caseDCH_Day:
25432609
INVALID_FOR_INTERVAL;
25442610
if (S_TM(n->suffix))
25452611
strcpy(s,str_initcap_z(localized_full_days[tm->tm_wday],collid));
25462612
else
2547-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2613+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2614+
days[tm->tm_wday]);
25482615
s+=strlen(s);
25492616
break;
25502617
caseDCH_day:
25512618
INVALID_FOR_INTERVAL;
25522619
if (S_TM(n->suffix))
25532620
strcpy(s,str_tolower_z(localized_full_days[tm->tm_wday],collid));
25542621
else
2555-
{
2556-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2557-
*s=pg_tolower((unsignedchar)*s);
2558-
}
2622+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2623+
asc_tolower_z(days[tm->tm_wday]));
25592624
s+=strlen(s);
25602625
break;
25612626
caseDCH_DY:
25622627
INVALID_FOR_INTERVAL;
25632628
if (S_TM(n->suffix))
25642629
strcpy(s,str_toupper_z(localized_abbrev_days[tm->tm_wday],collid));
25652630
else
2566-
strcpy(s,str_toupper_z(days_short[tm->tm_wday],collid));
2631+
strcpy(s,asc_toupper_z(days_short[tm->tm_wday]));
25672632
s+=strlen(s);
25682633
break;
25692634
caseDCH_Dy:
@@ -2579,10 +2644,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25792644
if (S_TM(n->suffix))
25802645
strcpy(s,str_tolower_z(localized_abbrev_days[tm->tm_wday],collid));
25812646
else
2582-
{
2583-
strcpy(s,days_short[tm->tm_wday]);
2584-
*s=pg_tolower((unsignedchar)*s);
2585-
}
2647+
strcpy(s,asc_tolower_z(days_short[tm->tm_wday]));
25862648
s+=strlen(s);
25872649
break;
25882650
caseDCH_DDD:
@@ -4690,12 +4752,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46904752
caseNUM_rn:
46914753
if (IS_FILLMODE(Np->Num))
46924754
{
4693-
strcpy(Np->inout_p,str_tolower_z(Np->number_p,collid));
4755+
strcpy(Np->inout_p,asc_tolower_z(Np->number_p));
46944756
Np->inout_p+=strlen(Np->inout_p)-1;
46954757
}
46964758
else
46974759
{
4698-
sprintf(Np->inout_p,"%15s",str_tolower_z(Np->number_p,collid));
4760+
sprintf(Np->inout_p,"%15s",asc_tolower_z(Np->number_p));
46994761
Np->inout_p+=strlen(Np->inout_p)-1;
47004762
}
47014763
break;

‎src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
externchar*str_toupper(constchar*buff,size_tnbytes,Oidcollid);
2525
externchar*str_initcap(constchar*buff,size_tnbytes,Oidcollid);
2626

27+
externchar*asc_tolower(constchar*buff,size_tnbytes);
28+
externchar*asc_toupper(constchar*buff,size_tnbytes);
29+
externchar*asc_initcap(constchar*buff,size_tnbytes);
30+
2731
externDatumtimestamp_to_char(PG_FUNCTION_ARGS);
2832
externDatumtimestamptz_to_char(PG_FUNCTION_ARGS);
2933
externDatuminterval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp