Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit1db236c

Browse files
committed
Fix to_char() to use ASCII-only case-folding rules where appropriate.
formatting.c used locale-dependent case folding rules in some code pathswhere the result isn't supposed to be locale-dependent, for exampleto_char(timestamp, 'DAY'). Since the source data is always just ASCIIin these cases, that usually didn't matter ... but it does matter inTurkish locales, which have unusual treatment of "i" and "I". To confusematters even more, the misbehavior was only visible in UTF8 encoding,because in single-byte encodings we used pg_toupper/pg_tolower whichdon't have locale-specific behavior for ASCII characters. Fix by providingintentionally ASCII-only case-folding functions and using these whereappropriate. Per bug #7913 from Adnan Dursun. Back-patch to all activebranches, since it's been like this for a long time.
1 parentfa85230 commit1db236c

File tree

2 files changed

+120
-54
lines changed

2 files changed

+120
-54
lines changed

‎src/backend/utils/adt/formatting.c

Lines changed: 116 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,12 +1493,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
14931493
/* C/POSIX collations use this path regardless of database encoding */
14941494
if (lc_ctype_is_c(collid))
14951495
{
1496-
char*p;
1497-
1498-
result=pnstrdup(buff,nbytes);
1499-
1500-
for (p=result;*p;p++)
1501-
*p=pg_ascii_tolower((unsignedchar)*p);
1496+
result=asc_tolower(buff,nbytes);
15021497
}
15031498
#ifdefUSE_WIDE_UPPER_LOWER
15041499
elseif (pg_database_encoding_max_length()>1)
@@ -1618,12 +1613,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16181613
/* C/POSIX collations use this path regardless of database encoding */
16191614
if (lc_ctype_is_c(collid))
16201615
{
1621-
char*p;
1622-
1623-
result=pnstrdup(buff,nbytes);
1624-
1625-
for (p=result;*p;p++)
1626-
*p=pg_ascii_toupper((unsignedchar)*p);
1616+
result=asc_toupper(buff,nbytes);
16271617
}
16281618
#ifdefUSE_WIDE_UPPER_LOWER
16291619
elseif (pg_database_encoding_max_length()>1)
@@ -1744,23 +1734,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
17441734
/* C/POSIX collations use this path regardless of database encoding */
17451735
if (lc_ctype_is_c(collid))
17461736
{
1747-
char*p;
1748-
1749-
result=pnstrdup(buff,nbytes);
1750-
1751-
for (p=result;*p;p++)
1752-
{
1753-
charc;
1754-
1755-
if (wasalnum)
1756-
*p=c=pg_ascii_tolower((unsignedchar)*p);
1757-
else
1758-
*p=c=pg_ascii_toupper((unsignedchar)*p);
1759-
/* we don't trust isalnum() here */
1760-
wasalnum= ((c >='A'&&c <='Z')||
1761-
(c >='a'&&c <='z')||
1762-
(c >='0'&&c <='9'));
1763-
}
1737+
result=asc_initcap(buff,nbytes);
17641738
}
17651739
#ifdefUSE_WIDE_UPPER_LOWER
17661740
elseif (pg_database_encoding_max_length()>1)
@@ -1887,6 +1861,87 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18871861
returnresult;
18881862
}
18891863

1864+
/*
1865+
* ASCII-only lower function
1866+
*
1867+
* We pass the number of bytes so we can pass varlena and char*
1868+
* to this function. The result is a palloc'd, null-terminated string.
1869+
*/
1870+
char*
1871+
asc_tolower(constchar*buff,size_tnbytes)
1872+
{
1873+
char*result;
1874+
char*p;
1875+
1876+
if (!buff)
1877+
returnNULL;
1878+
1879+
result=pnstrdup(buff,nbytes);
1880+
1881+
for (p=result;*p;p++)
1882+
*p=pg_ascii_tolower((unsignedchar)*p);
1883+
1884+
returnresult;
1885+
}
1886+
1887+
/*
1888+
* ASCII-only upper function
1889+
*
1890+
* We pass the number of bytes so we can pass varlena and char*
1891+
* to this function. The result is a palloc'd, null-terminated string.
1892+
*/
1893+
char*
1894+
asc_toupper(constchar*buff,size_tnbytes)
1895+
{
1896+
char*result;
1897+
char*p;
1898+
1899+
if (!buff)
1900+
returnNULL;
1901+
1902+
result=pnstrdup(buff,nbytes);
1903+
1904+
for (p=result;*p;p++)
1905+
*p=pg_ascii_toupper((unsignedchar)*p);
1906+
1907+
returnresult;
1908+
}
1909+
1910+
/*
1911+
* ASCII-only initcap function
1912+
*
1913+
* We pass the number of bytes so we can pass varlena and char*
1914+
* to this function. The result is a palloc'd, null-terminated string.
1915+
*/
1916+
char*
1917+
asc_initcap(constchar*buff,size_tnbytes)
1918+
{
1919+
char*result;
1920+
char*p;
1921+
intwasalnum= false;
1922+
1923+
if (!buff)
1924+
returnNULL;
1925+
1926+
result=pnstrdup(buff,nbytes);
1927+
1928+
for (p=result;*p;p++)
1929+
{
1930+
charc;
1931+
1932+
if (wasalnum)
1933+
*p=c=pg_ascii_tolower((unsignedchar)*p);
1934+
else
1935+
*p=c=pg_ascii_toupper((unsignedchar)*p);
1936+
/* we don't trust isalnum() here */
1937+
wasalnum= ((c >='A'&&c <='Z')||
1938+
(c >='a'&&c <='z')||
1939+
(c >='0'&&c <='9'));
1940+
}
1941+
1942+
returnresult;
1943+
}
1944+
18901945
/* convenience routines for when the input is null-terminated */
18911946

18921947
staticchar*
@@ -1907,6 +1962,20 @@ str_initcap_z(const char *buff, Oid collid)
19071962
returnstr_initcap(buff,strlen(buff),collid);
19081963
}
19091964

1965+
staticchar*
1966+
asc_tolower_z(constchar*buff)
1967+
{
1968+
returnasc_tolower(buff,strlen(buff));
1969+
}
1970+
1971+
staticchar*
1972+
asc_toupper_z(constchar*buff)
1973+
{
1974+
returnasc_toupper(buff,strlen(buff));
1975+
}
1976+
1977+
/* asc_initcap_z is not currently needed */
1978+
19101979

19111980
/* ----------
19121981
* Skip TM / th in FROM_CHAR
@@ -2419,7 +2488,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24192488
INVALID_FOR_INTERVAL;
24202489
if (tmtcTzn(in))
24212490
{
2422-
char*p=str_tolower_z(tmtcTzn(in),collid);
2491+
/* We assume here that timezone names aren't localized */
2492+
char*p=asc_tolower_z(tmtcTzn(in));
24232493

24242494
strcpy(s,p);
24252495
pfree(p);
@@ -2466,7 +2536,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24662536
strcpy(s,str_toupper_z(localized_full_months[tm->tm_mon-1],collid));
24672537
else
24682538
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2469-
str_toupper_z(months_full[tm->tm_mon-1],collid));
2539+
asc_toupper_z(months_full[tm->tm_mon-1]));
24702540
s+=strlen(s);
24712541
break;
24722542
caseDCH_Month:
@@ -2476,7 +2546,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24762546
if (S_TM(n->suffix))
24772547
strcpy(s,str_initcap_z(localized_full_months[tm->tm_mon-1],collid));
24782548
else
2479-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2549+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2550+
months_full[tm->tm_mon-1]);
24802551
s+=strlen(s);
24812552
break;
24822553
caseDCH_month:
@@ -2486,10 +2557,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24862557
if (S_TM(n->suffix))
24872558
strcpy(s,str_tolower_z(localized_full_months[tm->tm_mon-1],collid));
24882559
else
2489-
{
2490-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,months_full[tm->tm_mon-1]);
2491-
*s=pg_tolower((unsignedchar)*s);
2492-
}
2560+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2561+
asc_tolower_z(months_full[tm->tm_mon-1]));
24932562
s+=strlen(s);
24942563
break;
24952564
caseDCH_MON:
@@ -2499,7 +2568,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
24992568
if (S_TM(n->suffix))
25002569
strcpy(s,str_toupper_z(localized_abbrev_months[tm->tm_mon-1],collid));
25012570
else
2502-
strcpy(s,str_toupper_z(months[tm->tm_mon-1],collid));
2571+
strcpy(s,asc_toupper_z(months[tm->tm_mon-1]));
25032572
s+=strlen(s);
25042573
break;
25052574
caseDCH_Mon:
@@ -2519,10 +2588,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25192588
if (S_TM(n->suffix))
25202589
strcpy(s,str_tolower_z(localized_abbrev_months[tm->tm_mon-1],collid));
25212590
else
2522-
{
2523-
strcpy(s,months[tm->tm_mon-1]);
2524-
*s=pg_tolower((unsignedchar)*s);
2525-
}
2591+
strcpy(s,asc_tolower_z(months[tm->tm_mon-1]));
25262592
s+=strlen(s);
25272593
break;
25282594
caseDCH_MM:
@@ -2537,34 +2603,33 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25372603
strcpy(s,str_toupper_z(localized_full_days[tm->tm_wday],collid));
25382604
else
25392605
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2540-
str_toupper_z(days[tm->tm_wday],collid));
2606+
asc_toupper_z(days[tm->tm_wday]));
25412607
s+=strlen(s);
25422608
break;
25432609
caseDCH_Day:
25442610
INVALID_FOR_INTERVAL;
25452611
if (S_TM(n->suffix))
25462612
strcpy(s,str_initcap_z(localized_full_days[tm->tm_wday],collid));
25472613
else
2548-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2614+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2615+
days[tm->tm_wday]);
25492616
s+=strlen(s);
25502617
break;
25512618
caseDCH_day:
25522619
INVALID_FOR_INTERVAL;
25532620
if (S_TM(n->suffix))
25542621
strcpy(s,str_tolower_z(localized_full_days[tm->tm_wday],collid));
25552622
else
2556-
{
2557-
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,days[tm->tm_wday]);
2558-
*s=pg_tolower((unsignedchar)*s);
2559-
}
2623+
sprintf(s,"%*s",S_FM(n->suffix) ?0 :-9,
2624+
asc_tolower_z(days[tm->tm_wday]));
25602625
s+=strlen(s);
25612626
break;
25622627
caseDCH_DY:
25632628
INVALID_FOR_INTERVAL;
25642629
if (S_TM(n->suffix))
25652630
strcpy(s,str_toupper_z(localized_abbrev_days[tm->tm_wday],collid));
25662631
else
2567-
strcpy(s,str_toupper_z(days_short[tm->tm_wday],collid));
2632+
strcpy(s,asc_toupper_z(days_short[tm->tm_wday]));
25682633
s+=strlen(s);
25692634
break;
25702635
caseDCH_Dy:
@@ -2580,10 +2645,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid col
25802645
if (S_TM(n->suffix))
25812646
strcpy(s,str_tolower_z(localized_abbrev_days[tm->tm_wday],collid));
25822647
else
2583-
{
2584-
strcpy(s,days_short[tm->tm_wday]);
2585-
*s=pg_tolower((unsignedchar)*s);
2586-
}
2648+
strcpy(s,asc_tolower_z(days_short[tm->tm_wday]));
25872649
s+=strlen(s);
25882650
break;
25892651
caseDCH_DDD:
@@ -4670,12 +4732,12 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
46704732
caseNUM_rn:
46714733
if (IS_FILLMODE(Np->Num))
46724734
{
4673-
strcpy(Np->inout_p,str_tolower_z(Np->number_p,collid));
4735+
strcpy(Np->inout_p,asc_tolower_z(Np->number_p));
46744736
Np->inout_p+=strlen(Np->inout_p)-1;
46754737
}
46764738
else
46774739
{
4678-
sprintf(Np->inout_p,"%15s",str_tolower_z(Np->number_p,collid));
4740+
sprintf(Np->inout_p,"%15s",asc_tolower_z(Np->number_p));
46794741
Np->inout_p+=strlen(Np->inout_p)-1;
46804742
}
46814743
break;

‎src/include/utils/formatting.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extern char *str_tolower(const char *buff, size_t nbytes, Oid collid);
2424
externchar*str_toupper(constchar*buff,size_tnbytes,Oidcollid);
2525
externchar*str_initcap(constchar*buff,size_tnbytes,Oidcollid);
2626

27+
externchar*asc_tolower(constchar*buff,size_tnbytes);
28+
externchar*asc_toupper(constchar*buff,size_tnbytes);
29+
externchar*asc_initcap(constchar*buff,size_tnbytes);
30+
2731
externDatumtimestamp_to_char(PG_FUNCTION_ARGS);
2832
externDatumtimestamptz_to_char(PG_FUNCTION_ARGS);
2933
externDatuminterval_to_char(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp