99 * src/port/win32setlocale.c
1010 *
1111 *
12- * Windows has a problem with locale names that have a dot in the country
13- * name. For example:
12+ * The setlocale() function in Windows is broken in two ways. First, it
13+ * has a problem with locale names that have a dot in the country name. For
14+ * example:
1415 *
1516 * "Chinese (Traditional)_Hong Kong S.A.R..950"
1617 *
17- * For some reason, setlocale() doesn't accept that. Fortunately, Windows'
18- * setlocale() accepts various alternative names for such countries, so we
19- * provide a wrapper setlocale() function that maps the troublemaking locale
20- * names to accepted aliases.
18+ * For some reason, setlocale() doesn't accept that as argument, even though
19+ * setlocale(LC_ALL, NULL) returns exactly that. Fortunately, it accepts
20+ * various alternative names for such countries, so to work around the broken
21+ * setlocale() function, we map the troublemaking locale names to accepted
22+ * aliases, before calling setlocale().
23+ *
24+ * The second problem is that the locale name for "Norwegian (Bokmål)"
25+ * contains a non-ASCII character. That's problematic, because it's not clear
26+ * what encoding the locale name itself is supposed to be in, when you
27+ * haven't yet set a locale. Also, it causes problems when the cluster
28+ * contains databases with different encodings, as the locale name is stored
29+ * in the pg_database system catalog. To work around that, when setlocale()
30+ * returns that locale name, map it to a pure-ASCII alias for the same
31+ * locale.
2132 *-------------------------------------------------------------------------
2233 */
2334
2738
2839struct locale_map
2940{
30- const char * locale_name_part ;/* string in locale name to replace */
31- const char * replacement ;/* string to replace it with */
41+ /*
42+ * String in locale name to replace. Can be a single string (end is NULL),
43+ * or separate start and end strings. If two strings are given, the
44+ * locale name must contain both of them, and everything between them
45+ * is replaced. This is used for a poor-man's regexp search, allowing
46+ * replacement of "start.*end".
47+ */
48+ const char * locale_name_start ;
49+ const char * locale_name_end ;
50+
51+ const char * replacement ;/* string to replace the match with */
3252};
3353
34- static const struct locale_map locale_map_list []= {
54+ /*
55+ * Mappings applied before calling setlocale(), to the argument.
56+ */
57+ static const struct locale_map locale_map_argument []= {
3558/*
3659 * "HKG" is listed here:
3760 * http://msdn.microsoft.com/en-us/library/cdax410z%28v=vs.71%29.aspx
@@ -40,8 +63,8 @@ static const struct locale_map locale_map_list[] = {
4063 * "ARE" is the ISO-3166 three-letter code for U.A.E. It is not on the
4164 * above list, but seems to work anyway.
4265 */
43- {"Hong Kong S.A.R." ,"HKG" },
44- {"U.A.E." ,"ARE" },
66+ {"Hong Kong S.A.R." ,NULL , "HKG" },
67+ {"U.A.E." ,NULL , "ARE" },
4568
4669/*
4770 * The ISO-3166 country code for Macau S.A.R. is MAC, but Windows doesn't
@@ -56,60 +79,107 @@ static const struct locale_map locale_map_list[] = {
5679 *
5780 * Some versions of Windows spell it "Macau", others "Macao".
5881 */
59- {"Chinese (Traditional)_Macau S.A.R..950" ,"ZHM" },
60- {"Chinese_Macau S.A.R..950" ,"ZHM" },
61- {"Chinese (Traditional)_Macao S.A.R..950" ,"ZHM" },
62- {"Chinese_Macao S.A.R..950" ,"ZHM" }
82+ {"Chinese (Traditional)_Macau S.A.R..950" ,NULL ,"ZHM" },
83+ {"Chinese_Macau S.A.R..950" ,NULL ,"ZHM" },
84+ {"Chinese (Traditional)_Macao S.A.R..950" ,NULL ,"ZHM" },
85+ {"Chinese_Macao S.A.R..950" ,NULL ,"ZHM" },
86+ {NULL ,NULL ,NULL }
6387};
6488
65- char *
66- pgwin32_setlocale (int category ,const char * locale )
89+ /*
90+ * Mappings applied after calling setlocale(), to its return value.
91+ */
92+ static const struct locale_map locale_map_result []= {
93+ /*
94+ * "Norwegian (Bokmål)" locale name contains the a-ring character.
95+ * Map it to a pure-ASCII alias.
96+ *
97+ * It's not clear what encoding setlocale() uses when it returns the
98+ * locale name, so to play it safe, we search for "Norwegian (Bok*l)".
99+ */
100+ {"Norwegian (Bokm" ,"l)" ,"norwegian-bokmal" },
101+ {NULL ,NULL ,NULL }
102+ };
103+
104+ #define MAX_LOCALE_NAME_LEN 100
105+
106+ static char *
107+ map_locale (struct locale_map * map ,char * locale )
67108{
68- char * result ;
69- char * alias ;
109+ static char aliasbuf [MAX_LOCALE_NAME_LEN ];
70110int i ;
71111
72- if (locale == NULL )
73- return setlocale (category ,locale );
74-
75112/* Check if the locale name matches any of the problematic ones. */
76- alias = NULL ;
77- for (i = 0 ;i < lengthof (locale_map_list );i ++ )
113+ for (i = 0 ;map [i ].locale_name_start != NULL ;i ++ )
78114{
79- const char * needle = locale_map_list [i ].locale_name_part ;
80- const char * replacement = locale_map_list [i ].replacement ;
115+ const char * needle_start = map [i ].locale_name_start ;
116+ const char * needle_end = map [i ].locale_name_end ;
117+ const char * replacement = map [i ].replacement ;
81118char * match ;
119+ char * match_start = NULL ;
120+ char * match_end = NULL ;
82121
83- match = strstr (locale ,needle );
84- if (match != NULL )
122+ match = strstr (locale ,needle_start );
123+ if (match )
124+ {
125+ /*
126+ * Found a match for the first part. If this was a two-part
127+ * replacement, find the second part.
128+ */
129+ match_start = match ;
130+ if (needle_end )
131+ {
132+ match = strstr (match_start + strlen (needle_start ),needle_end );
133+ if (match )
134+ match_end = match + strlen (needle_end );
135+ else
136+ match_start = NULL ;
137+ }
138+ else
139+ match_end = match_start + strlen (needle_start );
140+ }
141+
142+ if (match_start )
85143{
86144/* Found a match. Replace the matched string. */
87- int matchpos = match - locale ;
145+ int matchpos = match_start - locale ;
88146int replacementlen = strlen (replacement );
89- char * rest = match + strlen ( needle ) ;
147+ char * rest = match_end ;
90148int restlen = strlen (rest );
91149
92- alias = malloc ( matchpos + replacementlen + restlen + 1 );
93- if (! alias )
150+ /* check that the result fits in the static buffer */
151+ if (matchpos + replacementlen + restlen + 1 > MAX_LOCALE_NAME_LEN )
94152return NULL ;
95153
96- memcpy (& alias [0 ],& locale [0 ],matchpos );
97- memcpy (& alias [matchpos ],replacement ,replacementlen );
98- memcpy ( & alias [ matchpos + replacementlen ], rest , restlen + 1 ); /* includes null
99- * terminator */
154+ memcpy (& aliasbuf [0 ],& locale [0 ],matchpos );
155+ memcpy (& aliasbuf [matchpos ],replacement ,replacementlen );
156+ /* includes null terminator */
157+ memcpy ( & aliasbuf [ matchpos + replacementlen ], rest , restlen + 1 );
100158
101- break ;
159+ return aliasbuf ;
102160}
103161}
104162
105- /* Call the real setlocale() function */
106- if (alias )
107- {
108- result = setlocale (category ,alias );
109- free (alias );
110- }
163+ /* no match, just return the original string */
164+ return locale ;
165+ }
166+
167+ char *
168+ pgwin32_setlocale (int category ,const char * locale )
169+ {
170+ char * argument ;
171+ char * result ;
172+
173+ if (locale == NULL )
174+ argument = NULL ;
111175else
112- result = setlocale (category ,locale );
176+ argument = map_locale (locale_map_argument ,locale );
177+
178+ /* Call the real setlocale() function */
179+ result = setlocale (category ,argument );
180+
181+ if (result )
182+ result = map_locale (locale_map_result ,result );
113183
114184return result ;
115185}