Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit72fe6d2

Browse files
committed
Make collation not depend on setlocale().
Now that the result of pg_newlocale_from_collation() is alwaysnon-NULL, then we can move the collate_is_c and ctype_is_c flags intopg_locale_t. That simplifies the logic in lc_collate_is_c() andlc_ctype_is_c(), removing the dependence on setlocale().This commit also eliminates the multi-stage initialization of thecollation cache.As long as we have catalog access, then it's now safe to callpg_newlocale_from_collation() without checking lc_collate_is_c()first.Discussion:https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.orgReviewed-by: Peter Eisentraut, Andreas Karlsson
1 parent9b282a9 commit72fe6d2

File tree

4 files changed

+81
-154
lines changed

4 files changed

+81
-154
lines changed

‎src/backend/utils/adt/pg_locale.c

Lines changed: 26 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
128128
typedefstruct
129129
{
130130
Oidcollid;/* hash key: pg_collation OID */
131-
boolcollate_is_c;/* is collation's LC_COLLATE C? */
132-
boolctype_is_c;/* is collation's LC_CTYPE C? */
133-
boolflags_valid;/* true if above flags are valid */
134131
pg_locale_tlocale;/* locale_t struct, or 0 if not valid */
135132

136133
/* needed for simplehash */
@@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
12251222
/*
12261223
* Cache mechanism for collation information.
12271224
*
1228-
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1229-
* (or POSIX), so we can optimize a few code paths in various places.
1230-
* For the built-in C and POSIX collations, we can know that without even
1231-
* doing a cache lookup, but we want to support aliases for C/POSIX too.
1232-
* For the "default" collation, there are separate static cache variables,
1233-
* since consulting the pg_collation catalog doesn't tell us what we need.
1234-
*
1235-
* Also, if a pg_locale_t has been requested for a collation, we cache that
1236-
* for the life of a backend.
1237-
*
1238-
* Note that some code relies on the flags not reporting false negatives
1239-
* (that is, saying it's not C when it is). For example, char2wchar()
1240-
* could fail if the locale is C, so str_tolower() shouldn't call it
1241-
* in that case.
1242-
*
12431225
* Note that we currently lack any way to flush the cache. Since we don't
12441226
* support ALTER COLLATION, this is OK. The worst case is that someone
12451227
* drops a collation, and a useless cache entry hangs around in existing
12461228
* backends.
12471229
*/
1248-
12491230
staticcollation_cache_entry*
1250-
lookup_collation_cache(Oidcollation,boolset_flags)
1231+
lookup_collation_cache(Oidcollation)
12511232
{
12521233
collation_cache_entry*cache_entry;
12531234
boolfound;
@@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
12711252
* Make sure cache entry is marked invalid, in case we fail before
12721253
* setting things.
12731254
*/
1274-
cache_entry->flags_valid= false;
12751255
cache_entry->locale=0;
12761256
}
12771257

1278-
if (set_flags&& !cache_entry->flags_valid)
1279-
{
1280-
/* Attempt to set the flags */
1281-
HeapTupletp;
1282-
Form_pg_collationcollform;
1283-
1284-
tp=SearchSysCache1(COLLOID,ObjectIdGetDatum(collation));
1285-
if (!HeapTupleIsValid(tp))
1286-
elog(ERROR,"cache lookup failed for collation %u",collation);
1287-
collform= (Form_pg_collation)GETSTRUCT(tp);
1288-
1289-
if (collform->collprovider==COLLPROVIDER_BUILTIN)
1290-
{
1291-
Datumdatum;
1292-
constchar*colllocale;
1293-
1294-
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_colllocale);
1295-
colllocale=TextDatumGetCString(datum);
1296-
1297-
cache_entry->collate_is_c= true;
1298-
cache_entry->ctype_is_c= (strcmp(colllocale,"C")==0);
1299-
}
1300-
elseif (collform->collprovider==COLLPROVIDER_LIBC)
1301-
{
1302-
Datumdatum;
1303-
constchar*collcollate;
1304-
constchar*collctype;
1305-
1306-
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_collcollate);
1307-
collcollate=TextDatumGetCString(datum);
1308-
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_collctype);
1309-
collctype=TextDatumGetCString(datum);
1310-
1311-
cache_entry->collate_is_c= ((strcmp(collcollate,"C")==0)||
1312-
(strcmp(collcollate,"POSIX")==0));
1313-
cache_entry->ctype_is_c= ((strcmp(collctype,"C")==0)||
1314-
(strcmp(collctype,"POSIX")==0));
1315-
}
1316-
else
1317-
{
1318-
cache_entry->collate_is_c= false;
1319-
cache_entry->ctype_is_c= false;
1320-
}
1321-
1322-
cache_entry->flags_valid= true;
1323-
1324-
ReleaseSysCache(tp);
1325-
}
1326-
13271258
returncache_entry;
13281259
}
13291260

@@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
13411272
if (!OidIsValid(collation))
13421273
return false;
13431274

1344-
/*
1345-
* If we're asked about the default collation, we have to inquire of the C
1346-
* library. Cache the result so we only have to compute it once.
1347-
*/
1348-
if (collation==DEFAULT_COLLATION_OID)
1349-
{
1350-
staticintresult=-1;
1351-
constchar*localeptr;
1352-
1353-
if (result >=0)
1354-
return (bool)result;
1355-
1356-
if (default_locale.provider==COLLPROVIDER_BUILTIN)
1357-
{
1358-
result= true;
1359-
return (bool)result;
1360-
}
1361-
elseif (default_locale.provider==COLLPROVIDER_ICU)
1362-
{
1363-
result= false;
1364-
return (bool)result;
1365-
}
1366-
elseif (default_locale.provider==COLLPROVIDER_LIBC)
1367-
{
1368-
localeptr=setlocale(LC_CTYPE,NULL);
1369-
if (!localeptr)
1370-
elog(ERROR,"invalid LC_CTYPE setting");
1371-
}
1372-
else
1373-
elog(ERROR,"unexpected collation provider '%c'",
1374-
default_locale.provider);
1375-
1376-
if (strcmp(localeptr,"C")==0)
1377-
result= true;
1378-
elseif (strcmp(localeptr,"POSIX")==0)
1379-
result= true;
1380-
else
1381-
result= false;
1382-
return (bool)result;
1383-
}
1384-
13851275
/*
13861276
* If we're asked about the built-in C/POSIX collations, we know that.
13871277
*/
@@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
13921282
/*
13931283
* Otherwise, we have to consult pg_collation, but we cache that.
13941284
*/
1395-
return(lookup_collation_cache(collation, true))->collate_is_c;
1285+
returnpg_newlocale_from_collation(collation)->collate_is_c;
13961286
}
13971287

13981288
/*
@@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
14081298
if (!OidIsValid(collation))
14091299
return false;
14101300

1411-
/*
1412-
* If we're asked about the default collation, we have to inquire of the C
1413-
* library. Cache the result so we only have to compute it once.
1414-
*/
1415-
if (collation==DEFAULT_COLLATION_OID)
1416-
{
1417-
staticintresult=-1;
1418-
constchar*localeptr;
1419-
1420-
if (result >=0)
1421-
return (bool)result;
1422-
1423-
if (default_locale.provider==COLLPROVIDER_BUILTIN)
1424-
{
1425-
localeptr=default_locale.info.builtin.locale;
1426-
}
1427-
elseif (default_locale.provider==COLLPROVIDER_ICU)
1428-
{
1429-
result= false;
1430-
return (bool)result;
1431-
}
1432-
elseif (default_locale.provider==COLLPROVIDER_LIBC)
1433-
{
1434-
localeptr=setlocale(LC_CTYPE,NULL);
1435-
if (!localeptr)
1436-
elog(ERROR,"invalid LC_CTYPE setting");
1437-
}
1438-
else
1439-
elog(ERROR,"unexpected collation provider '%c'",
1440-
default_locale.provider);
1441-
1442-
if (strcmp(localeptr,"C")==0)
1443-
result= true;
1444-
elseif (strcmp(localeptr,"POSIX")==0)
1445-
result= true;
1446-
else
1447-
result= false;
1448-
return (bool)result;
1449-
}
1450-
14511301
/*
14521302
* If we're asked about the built-in C/POSIX collations, we know that.
14531303
*/
@@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
14581308
/*
14591309
* Otherwise, we have to consult pg_collation, but we cache that.
14601310
*/
1461-
return(lookup_collation_cache(collation, true))->ctype_is_c;
1311+
returnpg_newlocale_from_collation(collation)->ctype_is_c;
14621312
}
14631313

14641314
/* simple subroutine for reporting errors from newlocale() */
@@ -1647,6 +1497,9 @@ init_database_collation(void)
16471497

16481498
builtin_validate_locale(dbform->encoding,datlocale);
16491499

1500+
default_locale.collate_is_c= true;
1501+
default_locale.ctype_is_c= (strcmp(datlocale,"C")==0);
1502+
16501503
default_locale.info.builtin.locale=MemoryContextStrdup(
16511504
TopMemoryContext,datlocale);
16521505
}
@@ -1658,6 +1511,9 @@ init_database_collation(void)
16581511
datum=SysCacheGetAttrNotNull(DATABASEOID,tup,Anum_pg_database_datlocale);
16591512
datlocale=TextDatumGetCString(datum);
16601513

1514+
default_locale.collate_is_c= false;
1515+
default_locale.ctype_is_c= false;
1516+
16611517
datum=SysCacheGetAttr(DATABASEOID,tup,Anum_pg_database_daticurules,&isnull);
16621518
if (!isnull)
16631519
icurules=TextDatumGetCString(datum);
@@ -1678,6 +1534,11 @@ init_database_collation(void)
16781534
datum=SysCacheGetAttrNotNull(DATABASEOID,tup,Anum_pg_database_datctype);
16791535
datctype=TextDatumGetCString(datum);
16801536

1537+
default_locale.collate_is_c= (strcmp(datcollate,"C")==0)||
1538+
(strcmp(datcollate,"POSIX")==0);
1539+
default_locale.ctype_is_c= (strcmp(datctype,"C")==0)||
1540+
(strcmp(datctype,"POSIX")==0);
1541+
16811542
make_libc_collator(datcollate,datctype,&default_locale);
16821543
}
16831544

@@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
17121573
if (collid==DEFAULT_COLLATION_OID)
17131574
return&default_locale;
17141575

1715-
cache_entry=lookup_collation_cache(collid, false);
1576+
cache_entry=lookup_collation_cache(collid);
17161577

17171578
if (cache_entry->locale==0)
17181579
{
@@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
17411602
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_colllocale);
17421603
locstr=TextDatumGetCString(datum);
17431604

1605+
result.collate_is_c= true;
1606+
result.ctype_is_c= (strcmp(locstr,"C")==0);
1607+
17441608
builtin_validate_locale(GetDatabaseEncoding(),locstr);
17451609

17461610
result.info.builtin.locale=MemoryContextStrdup(TopMemoryContext,
@@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
17561620
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_collctype);
17571621
collctype=TextDatumGetCString(datum);
17581622

1623+
result.collate_is_c= (strcmp(collcollate,"C")==0)||
1624+
(strcmp(collcollate,"POSIX")==0);
1625+
result.ctype_is_c= (strcmp(collctype,"C")==0)||
1626+
(strcmp(collctype,"POSIX")==0);
1627+
17591628
make_libc_collator(collcollate,collctype,&result);
17601629
}
17611630
elseif (collform->collprovider==COLLPROVIDER_ICU)
@@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
17661635
datum=SysCacheGetAttrNotNull(COLLOID,tp,Anum_pg_collation_colllocale);
17671636
iculocstr=TextDatumGetCString(datum);
17681637

1638+
result.collate_is_c= false;
1639+
result.ctype_is_c= false;
1640+
17691641
datum=SysCacheGetAttr(COLLOID,tp,Anum_pg_collation_collicurules,&isnull);
17701642
if (!isnull)
17711643
icurules=TextDatumGetCString(datum);

‎src/include/utils/pg_locale.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,25 @@ extern void cache_locale_time(void);
6969
/*
7070
* We use a discriminated union to hold either a locale_t or an ICU collator.
7171
* pg_locale_t is occasionally checked for truth, so make it a pointer.
72+
*
73+
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
74+
* (or POSIX), so we can optimize a few code paths in various places. For the
75+
* built-in C and POSIX collations, we can know that without even doing a
76+
* cache lookup, but we want to support aliases for C/POSIX too. For the
77+
* "default" collation, there are separate static cache variables, since
78+
* consulting the pg_collation catalog doesn't tell us what we need.
79+
*
80+
* Note that some code relies on the flags not reporting false negatives
81+
* (that is, saying it's not C when it is). For example, char2wchar()
82+
* could fail if the locale is C, so str_tolower() shouldn't call it
83+
* in that case.
7284
*/
7385
structpg_locale_struct
7486
{
7587
charprovider;
7688
booldeterministic;
89+
boolcollate_is_c;
90+
boolctype_is_c;
7791
union
7892
{
7993
struct

‎src/test/regress/expected/collate.utf8.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
99
\endif
1010
SET client_encoding TO UTF8;
1111
--
12+
-- Test builtin "C"
13+
--
14+
CREATE COLLATION regress_builtin_c (
15+
provider = builtin, locale = 'C');
16+
-- non-ASCII characters are unchanged
17+
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
18+
?column?
19+
----------
20+
t
21+
(1 row)
22+
23+
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
24+
?column?
25+
----------
26+
t
27+
(1 row)
28+
29+
-- non-ASCII characters are not alphabetic
30+
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
31+
?column?
32+
----------
33+
t
34+
(1 row)
35+
36+
DROP COLLATION regress_builtin_c;
37+
--
1238
-- Test PG_C_UTF8
1339
--
1440
CREATE COLLATION regress_pg_c_utf8 (

‎src/test/regress/sql/collate.utf8.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
1111

1212
SET client_encoding TO UTF8;
1313

14+
--
15+
-- Test builtin "C"
16+
--
17+
CREATE COLLATION regress_builtin_c (
18+
provider= builtin, locale='C');
19+
20+
-- non-ASCII characters are unchanged
21+
SELECTLOWER(U&'\00C1' COLLATE regress_builtin_c)= U&'\00C1';
22+
SELECTUPPER(U&'\00E1' COLLATE regress_builtin_c)= U&'\00E1';
23+
24+
-- non-ASCII characters are not alphabetic
25+
SELECT U&'\00C1\00E1' !~'[[:alpha:]]' COLLATE regress_builtin_c;
26+
27+
DROP COLLATION regress_builtin_c;
28+
1429
--
1530
-- Test PG_C_UTF8
1631
--

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp