88 *
99 *
1010 * IDENTIFICATION
11- * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.3 2007/09/29 00:01:43 tgl Exp $
11+ * $PostgreSQL: pgsql/src/port/chklocale.c,v 1.4 2007/10/03 17:16:39 tgl Exp $
1212 *
1313 *-------------------------------------------------------------------------
1414 */
2727#include "mb/pg_wchar.h"
2828
2929
30- #if defined(HAVE_LANGINFO_H )&& defined(CODESET )
31-
3230/*
3331 * This table needs to recognize all the CODESET spellings for supported
3432 * backend encodings, as well as frontend-only encodings where possible
3533 * (the latter case is currently only needed for initdb to recognize
36- * error situations).
34+ * error situations). On Windows, we rely on entries for codepage
35+ * numbers (CPnnn).
3736 *
3837 * Note that we search the table with pg_strcasecmp(), so variant
3938 * capitalizations don't need their own entries.
@@ -49,23 +48,27 @@ static const struct encoding_match encoding_match_list[] = {
4948{PG_EUC_JP ,"eucJP" },
5049{PG_EUC_JP ,"IBM-eucJP" },
5150{PG_EUC_JP ,"sdeckanji" },
51+ {PG_EUC_JP ,"CP20932" },
5252
5353{PG_EUC_CN ,"EUC-CN" },
5454{PG_EUC_CN ,"eucCN" },
5555{PG_EUC_CN ,"IBM-eucCN" },
5656{PG_EUC_CN ,"GB2312" },
5757{PG_EUC_CN ,"dechanzi" },
58+ {PG_EUC_CN ,"CP20936" },
5859
5960{PG_EUC_KR ,"EUC-KR" },
6061{PG_EUC_KR ,"eucKR" },
6162{PG_EUC_KR ,"IBM-eucKR" },
6263{PG_EUC_KR ,"deckorean" },
6364{PG_EUC_KR ,"5601" },
65+ {PG_EUC_KR ,"CP51949" },/* or 20949 ? */
6466
6567{PG_EUC_TW ,"EUC-TW" },
6668{PG_EUC_TW ,"eucTW" },
6769{PG_EUC_TW ,"IBM-eucTW" },
6870{PG_EUC_TW ,"cns11643" },
71+ /* No codepage for EUC-TW ? */
6972
7073{PG_UTF8 ,"UTF-8" },
7174{PG_UTF8 ,"utf8" },
@@ -111,6 +114,7 @@ static const struct encoding_match encoding_match_list[] = {
111114{PG_LATIN10 ,"iso885916" },
112115
113116{PG_KOI8R ,"KOI8-R" },
117+ {PG_KOI8R ,"CP20866" },
114118
115119{PG_WIN1252 ,"CP1252" },
116120{PG_WIN1253 ,"CP1253" },
@@ -143,23 +147,56 @@ static const struct encoding_match encoding_match_list[] = {
143147
144148{PG_SJIS ,"SJIS" },
145149{PG_SJIS ,"PCK" },
150+ {PG_SJIS ,"CP932" },
146151
147152{PG_BIG5 ,"BIG5" },
148153{PG_BIG5 ,"BIG5HKSCS" },
154+ {PG_BIG5 ,"CP950" },
149155
150156{PG_GBK ,"GBK" },
157+ {PG_GBK ,"CP936" },
151158
152159{PG_UHC ,"UHC" },
153160
154161{PG_JOHAB ,"JOHAB" },
162+ {PG_JOHAB ,"CP1361" },
155163
156164{PG_GB18030 ,"GB18030" },
165+ {PG_GB18030 ,"CP54936" },
157166
158167{PG_SHIFT_JIS_2004 ,"SJIS_2004" },
159168
160169{PG_SQL_ASCII ,NULL }/* end marker */
161170};
162171
172+ #ifdef WIN32
173+ /*
174+ * On Windows, use CP<codepage number> instead of the nl_langinfo() result
175+ */
176+ static char *
177+ win32_langinfo (const char * ctype )
178+ {
179+ char * r ;
180+ char * codepage ;
181+ int ln ;
182+
183+ /*
184+ * Locale format on Win32 is <Language>_<Country>.<CodePage> .
185+ * For example, English_USA.1252.
186+ */
187+ codepage = strrchr (ctype ,'.' );
188+ if (!codepage )
189+ return NULL ;
190+ codepage ++ ;
191+ ln = strlen (codepage );
192+ r = malloc (ln + 3 );
193+ sprintf (r ,"CP%s" ,codepage );
194+
195+ return r ;
196+ }
197+ #endif /* WIN32 */
198+
199+ #if (defined(HAVE_LANGINFO_H )&& defined(CODESET ))|| defined(WIN32 )
163200
164201/*
165202 * Given a setting for LC_CTYPE, return the Postgres ID of the associated
@@ -181,6 +218,7 @@ pg_get_encoding_from_locale(const char *ctype)
181218if (ctype )
182219{
183220char * save ;
221+ char * name ;
184222
185223save = setlocale (LC_CTYPE ,NULL );
186224if (!save )
@@ -190,15 +228,20 @@ pg_get_encoding_from_locale(const char *ctype)
190228if (!save )
191229return PG_SQL_ASCII ;/* out of memory; unlikely */
192230
193- if (!setlocale (LC_CTYPE ,ctype ))
231+ name = setlocale (LC_CTYPE ,ctype );
232+ if (!name )
194233{
195234free (save );
196235return PG_SQL_ASCII ;/* bogus ctype passed in? */
197236}
198237
238+ #ifndef WIN32
199239sys = nl_langinfo (CODESET );
200240if (sys )
201241sys = strdup (sys );
242+ #else
243+ sys = win32_langinfo (name );
244+ #endif
202245
203246setlocale (LC_CTYPE ,save );
204247free (save );
@@ -209,9 +252,13 @@ pg_get_encoding_from_locale(const char *ctype)
209252ctype = setlocale (LC_CTYPE ,NULL );
210253if (!ctype )
211254return PG_SQL_ASCII ;/* setlocale() broken? */
255+ #ifndef WIN32
212256sys = nl_langinfo (CODESET );
213257if (sys )
214258sys = strdup (sys );
259+ #else
260+ sys = win32_langinfo (ctype );
261+ #endif
215262}
216263
217264if (!sys )
@@ -268,7 +315,7 @@ pg_get_encoding_from_locale(const char *ctype)
268315return PG_SQL_ASCII ;
269316}
270317
271- #else /*! (HAVE_LANGINFO_H && CODESET) */
318+ #else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
272319
273320/*
274321 * stub if no platform support
@@ -279,4 +326,4 @@ pg_get_encoding_from_locale(const char *ctype)
279326return PG_SQL_ASCII ;
280327}
281328
282- #endif /* HAVE_LANGINFO_H && CODESET */
329+ #endif /*( HAVE_LANGINFO_H && CODESET) || WIN32 */