Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb8f9a2a

Browse files
committed
Add support for collation attributes on older ICU versions
Starting in ICU 54, collation customization attributes can bespecified in the locale string, for example"@colStrength=primary;colCaseLevel=yes". Add support for this forolder ICU versions as well, by adding some minimal parsing of theattributes in the locale string and calling ucol_setAttribute() onthem. This is essentially what never ICU versions do internally inucol_open(). This was we can offer this functionality in a consistentway in all ICU versions supported by PostgreSQL.Also add some tests for ICU collation customization.Reported-by: Daniel Verite <daniel@manitou-mail.org>Discussion:https://www.postgresql.org/message-id/0270ebd4-f67c-8774-1a5a-91adfb9bb41f@2ndquadrant.com
1 parent042162d commitb8f9a2a

File tree

3 files changed

+164
-0
lines changed

3 files changed

+164
-0
lines changed

‎src/backend/utils/adt/pg_locale.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include"catalog/pg_control.h"
5959
#include"mb/pg_wchar.h"
6060
#include"utils/builtins.h"
61+
#include"utils/formatting.h"
6162
#include"utils/hsearch.h"
6263
#include"utils/lsyscache.h"
6364
#include"utils/memutils.h"
@@ -132,6 +133,9 @@ static HTAB *collation_cache = NULL;
132133
staticchar*IsoLocaleName(constchar*);/* MSVC specific */
133134
#endif
134135

136+
#ifdefUSE_ICU
137+
staticvoidicu_set_collation_attributes(UCollator*collator,constchar*loc);
138+
#endif
135139

136140
/*
137141
* pg_perm_setlocale
@@ -1380,6 +1384,9 @@ pg_newlocale_from_collation(Oid collid)
13801384
(errmsg("could not open collator for locale \"%s\": %s",
13811385
collcollate,u_errorName(status))));
13821386

1387+
if (U_ICU_VERSION_MAJOR_NUM<54)
1388+
icu_set_collation_attributes(collator,collcollate);
1389+
13831390
/* We will leak this string if we get an error below :-( */
13841391
result.info.icu.locale=MemoryContextStrdup(TopMemoryContext,
13851392
collcollate);
@@ -1588,6 +1595,103 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
15881595
returnlen_result;
15891596
}
15901597

1598+
/*
1599+
* Parse collation attributes and apply them to the open collator. This takes
1600+
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1601+
* applies the key-value arguments.
1602+
*
1603+
* Starting with ICU version 54, the attributes are processed automatically by
1604+
* ucol_open(), so this is only necessary for emulating this behavior on older
1605+
* versions.
1606+
*/
1607+
pg_attribute_unused()
1608+
staticvoid
1609+
icu_set_collation_attributes(UCollator*collator,constchar*loc)
1610+
{
1611+
char*str=asc_tolower(loc,strlen(loc));
1612+
1613+
str=strchr(str,'@');
1614+
if (!str)
1615+
return;
1616+
str++;
1617+
1618+
for (char*token=strtok(str,";");token;token=strtok(NULL,";"))
1619+
{
1620+
char*e=strchr(token,'=');
1621+
1622+
if (e)
1623+
{
1624+
char*name;
1625+
char*value;
1626+
UColAttributeuattr=-1;
1627+
UColAttributeValueuvalue=-1;
1628+
UErrorCodestatus;
1629+
1630+
status=U_ZERO_ERROR;
1631+
1632+
*e='\0';
1633+
name=token;
1634+
value=e+1;
1635+
1636+
/*
1637+
* See attribute name and value lists in ICU i18n/coll.cpp
1638+
*/
1639+
if (strcmp(name,"colstrength")==0)
1640+
uattr=UCOL_STRENGTH;
1641+
elseif (strcmp(name,"colbackwards")==0)
1642+
uattr=UCOL_FRENCH_COLLATION;
1643+
elseif (strcmp(name,"colcaselevel")==0)
1644+
uattr=UCOL_CASE_LEVEL;
1645+
elseif (strcmp(name,"colcasefirst")==0)
1646+
uattr=UCOL_CASE_FIRST;
1647+
elseif (strcmp(name,"colalternate")==0)
1648+
uattr=UCOL_ALTERNATE_HANDLING;
1649+
elseif (strcmp(name,"colnormalization")==0)
1650+
uattr=UCOL_NORMALIZATION_MODE;
1651+
elseif (strcmp(name,"colnumeric")==0)
1652+
uattr=UCOL_NUMERIC_COLLATION;
1653+
/* ignore if unknown */
1654+
1655+
if (strcmp(value,"primary")==0)
1656+
uvalue=UCOL_PRIMARY;
1657+
elseif (strcmp(value,"secondary")==0)
1658+
uvalue=UCOL_SECONDARY;
1659+
elseif (strcmp(value,"tertiary")==0)
1660+
uvalue=UCOL_TERTIARY;
1661+
elseif (strcmp(value,"quaternary")==0)
1662+
uvalue=UCOL_QUATERNARY;
1663+
elseif (strcmp(value,"identical")==0)
1664+
uvalue=UCOL_IDENTICAL;
1665+
elseif (strcmp(value,"no")==0)
1666+
uvalue=UCOL_OFF;
1667+
elseif (strcmp(value,"yes")==0)
1668+
uvalue=UCOL_ON;
1669+
elseif (strcmp(value,"shifted")==0)
1670+
uvalue=UCOL_SHIFTED;
1671+
elseif (strcmp(value,"non-ignorable")==0)
1672+
uvalue=UCOL_NON_IGNORABLE;
1673+
elseif (strcmp(value,"lower")==0)
1674+
uvalue=UCOL_LOWER_FIRST;
1675+
elseif (strcmp(value,"upper")==0)
1676+
uvalue=UCOL_UPPER_FIRST;
1677+
else
1678+
status=U_ILLEGAL_ARGUMENT_ERROR;
1679+
1680+
if (uattr!=-1&&uvalue!=-1)
1681+
ucol_setAttribute(collator,uattr,uvalue,&status);
1682+
1683+
/*
1684+
* Pretend the error came from ucol_open(), for consistent error
1685+
* message across ICU versions.
1686+
*/
1687+
if (U_FAILURE(status))
1688+
ereport(ERROR,
1689+
(errmsg("could not open collator for locale \"%s\": %s",
1690+
loc,u_errorName(status))));
1691+
}
1692+
}
1693+
}
1694+
15911695
#endif/* USE_ICU */
15921696

15931697
/*

‎src/test/regress/expected/collate.icu.utf8.out

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,45 @@ select textrange_en_us('A','Z') @> 'b'::text;
11001100

11011101
drop type textrange_c;
11021102
drop type textrange_en_us;
1103+
-- test ICU collation customization
1104+
CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
1105+
SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
1106+
?column? | ?column?
1107+
----------+----------
1108+
t | t
1109+
(1 row)
1110+
1111+
CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
1112+
SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
1113+
?column? | ?column?
1114+
----------+----------
1115+
t | t
1116+
(1 row)
1117+
1118+
CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
1119+
CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
1120+
SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
1121+
?column? | ?column?
1122+
----------+----------
1123+
t | t
1124+
(1 row)
1125+
1126+
CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
1127+
SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
1128+
?column? | ?column?
1129+
----------+----------
1130+
t | t
1131+
(1 row)
1132+
1133+
CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
1134+
SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
1135+
?column? | ?column?
1136+
----------+----------
1137+
t | t
1138+
(1 row)
1139+
1140+
CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
1141+
ERROR: could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUMENT_ERROR
11031142
-- cleanup
11041143
SET client_min_messages TO warning;
11051144
DROP SCHEMA collate_tests CASCADE;

‎src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,27 @@ drop type textrange_c;
425425
droptype textrange_en_us;
426426

427427

428+
-- test ICU collation customization
429+
430+
CREATE COLLATION testcoll_ignore_accents (provider= icu, locale='@colStrength=primary;colCaseLevel=yes');
431+
SELECT'aaá'>'AAA' COLLATE"und-x-icu",'aaá'<'AAA' COLLATE testcoll_ignore_accents;
432+
433+
CREATE COLLATION testcoll_backwards (provider= icu, locale='@colBackwards=yes');
434+
SELECT'coté'<'côte' COLLATE"und-x-icu",'coté'>'côte' COLLATE testcoll_backwards;
435+
436+
CREATE COLLATION testcoll_lower_first (provider= icu, locale='@colCaseFirst=lower');
437+
CREATE COLLATION testcoll_upper_first (provider= icu, locale='@colCaseFirst=upper');
438+
SELECT'aaa'<'AAA' COLLATE testcoll_lower_first,'aaa'>'AAA' COLLATE testcoll_upper_first;
439+
440+
CREATE COLLATION testcoll_shifted (provider= icu, locale='@colAlternate=shifted');
441+
SELECT'de-luge'<'deanza' COLLATE"und-x-icu",'de-luge'>'deanza' COLLATE testcoll_shifted;
442+
443+
CREATE COLLATION testcoll_numeric (provider= icu, locale='@colNumeric=yes');
444+
SELECT'A-21'>'A-123' COLLATE"und-x-icu",'A-21'<'A-123' COLLATE testcoll_numeric;
445+
446+
CREATE COLLATION testcoll_error1 (provider= icu, locale='@colNumeric=lower');
447+
448+
428449
-- cleanup
429450
SET client_min_messages TO warning;
430451
DROPSCHEMA collate_tests CASCADE;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp