Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita837ed8

Browse files
committed
Detect locale/encoding mismatch in initdb, or pick a suitable encoding
automatically if none was specified.
1 parentbbcee54 commita837ed8

File tree

5 files changed

+241
-17
lines changed

5 files changed

+241
-17
lines changed

‎configure

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6989,7 +6989,8 @@ done
69896989

69906990

69916991

6992-
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h
6992+
6993+
for ac_header in crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h langinfo.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h
69936994
do
69946995
as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
69956996
if eval "test \"\${$as_ac_Header+set}\" = set"; then

‎configure.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
dnl Process this file with autoconf to produce a configure script.
2-
dnl $PostgreSQL: pgsql/configure.in,v 1.366 2004/07/10 01:24:29 momjian Exp $
2+
dnl $PostgreSQL: pgsql/configure.in,v 1.367 2004/07/14 17:55:09 petere Exp $
33
dnl
44
dnl Developers, please strive to achieve this order:
55
dnl
@@ -675,7 +675,7 @@ fi
675675
##
676676

677677
dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
678-
AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.h poll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h])
678+
AC_CHECK_HEADERS([crypt.h dld.h endian.h fp_class.h getopt.h ieeefp.hlanginfo.hpoll.h pwd.h sys/ipc.h sys/poll.h sys/pstat.h sys/select.h sys/sem.h sys/socket.h sys/shm.h sys/time.h sys/un.h termios.h utime.h wchar.h wctype.h kernel/OS.h kernel/image.h SupportDefs.h])
679679

680680
# At least on IRIX, cpp test for netinet/tcp.h will fail unless
681681
# netinet/in.h is included first.

‎doc/src/sgml/ref/initdb.sgml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/ref/initdb.sgml,v 1.30 2004/06/24 19:26:54 tgl Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/ref/initdb.sgml,v 1.31 2004/07/14 17:55:09 petere Exp $
33
PostgreSQL documentation
44
-->
55

@@ -121,8 +121,8 @@ PostgreSQL documentation
121121
<para>
122122
Selects the encoding of the template database. This will also
123123
be the default encoding of any database you create later,
124-
unless you override it there. The default is
125-
<literal>SQL_ASCII</literal>. The character sets supported by
124+
unless you override it there. The default is derived from the locale, or
125+
<literal>SQL_ASCII</literal> if that does not work. The character sets supported by
126126
the <productname>PostgreSQL</productname> server are described
127127
in <xref linkend="multibyte-charset-supported">.
128128
</para>

‎src/bin/initdb/initdb.c

Lines changed: 231 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
* Portions Copyright (c) 1994, Regents of the University of California
4040
* Portions taken from FreeBSD.
4141
*
42-
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.42 2004/07/12 01:54:10momjian Exp $
42+
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.43 2004/07/14 17:55:10petere Exp $
4343
*
4444
*-------------------------------------------------------------------------
4545
*/
@@ -52,6 +52,9 @@
5252
#include<locale.h>
5353
#include<signal.h>
5454
#include<errno.h>
55+
#ifdefHAVE_LANGINFO_H
56+
# include<langinfo.h>
57+
#endif
5558

5659
#include"libpq/pqsignal.h"
5760
#include"mb/pg_wchar.h"
@@ -600,29 +603,221 @@ get_id(void)
600603
returnxstrdup(pw->pw_name);
601604
}
602605

606+
staticchar*
607+
encodingid_to_string(intenc)
608+
{
609+
charresult[20];
610+
611+
sprintf(result,"%d",enc);
612+
returnxstrdup(result);
613+
}
614+
603615
/*
604616
* get the encoding id for a given encoding name
605617
*/
606618
staticchar*
607619
get_encoding_id(char*encoding_name)
608620
{
609621
intenc;
610-
charresult[20];
611622

612623
if (encoding_name&&*encoding_name)
613624
{
614625
if ((enc=pg_char_to_encoding(encoding_name)) >=0&&
615626
pg_valid_server_encoding(encoding_name) >=0)
616627
{
617-
sprintf(result,"%d",enc);
618-
returnxstrdup(result);
628+
returnencodingid_to_string(enc);
619629
}
620630
}
621631
fprintf(stderr,_("%s: \"%s\" is not a valid server encoding name\n"),
622632
progname,encoding_name ?encoding_name :"(null)");
623633
exit(1);
624634
}
625635

636+
#ifdefHAVE_LANGINFO_H
637+
/*
638+
* Checks whether the encoding selected for PostgreSQL and the
639+
* encoding used by the system locale match.
640+
*/
641+
642+
structencoding_match
643+
{
644+
enumpg_encpg_enc_code;
645+
char*system_enc_name;
646+
};
647+
648+
structencoding_matchencoding_match_list[]= {
649+
{PG_EUC_JP,"EUC-JP" },
650+
{PG_EUC_JP,"eucJP" },
651+
{PG_EUC_JP,"IBM-eucJP" },
652+
{PG_EUC_JP,"sdeckanji" },
653+
654+
{PG_EUC_CN,"EUC-CN" },
655+
{PG_EUC_CN,"eucCN" },
656+
{PG_EUC_CN,"IBM-eucCN" },
657+
{PG_EUC_CN,"GB2312" },
658+
{PG_EUC_CN,"dechanzi" },
659+
660+
{PG_EUC_KR,"EUC-KR" },
661+
{PG_EUC_KR,"eucKR" },
662+
{PG_EUC_KR,"IBM-eucKR" },
663+
{PG_EUC_KR,"deckorean" },
664+
{PG_EUC_KR,"5601" },
665+
666+
{PG_EUC_TW,"EUC-TW" },
667+
{PG_EUC_TW,"eucTW" },
668+
{PG_EUC_TW,"IBM-eucTW" },
669+
{PG_EUC_TW,"cns11643" },
670+
671+
#ifdefNOT_VERIFIED
672+
{PG_JOHAB,"???" },
673+
#endif
674+
675+
{PG_UTF8,"UTF-8" },
676+
{PG_UTF8,"utf8" },
677+
678+
{PG_LATIN1,"ISO-8859-1" },
679+
{PG_LATIN1,"ISO8859-1" },
680+
{PG_LATIN1,"iso88591" },
681+
682+
{PG_LATIN2,"ISO-8859-2" },
683+
{PG_LATIN2,"ISO8859-2" },
684+
{PG_LATIN2,"iso88592" },
685+
686+
{PG_LATIN3,"ISO-8859-3" },
687+
{PG_LATIN3,"ISO8859-3" },
688+
{PG_LATIN3,"iso88593" },
689+
690+
{PG_LATIN4,"ISO-8859-4" },
691+
{PG_LATIN4,"ISO8859-4" },
692+
{PG_LATIN4,"iso88594" },
693+
694+
{PG_LATIN5,"ISO-8859-9" },
695+
{PG_LATIN5,"ISO8859-9" },
696+
{PG_LATIN5,"iso88599" },
697+
698+
{PG_LATIN6,"ISO-8859-10" },
699+
{PG_LATIN6,"ISO8859-10" },
700+
{PG_LATIN6,"iso885910" },
701+
702+
{PG_LATIN7,"ISO-8859-13" },
703+
{PG_LATIN7,"ISO8859-13" },
704+
{PG_LATIN7,"iso885913" },
705+
706+
{PG_LATIN8,"ISO-8859-14" },
707+
{PG_LATIN8,"ISO8859-14" },
708+
{PG_LATIN8,"iso885914" },
709+
710+
{PG_LATIN9,"ISO-8859-15" },
711+
{PG_LATIN9,"ISO8859-15" },
712+
{PG_LATIN9,"iso885915" },
713+
714+
{PG_LATIN10,"ISO-8859-16" },
715+
{PG_LATIN10,"ISO8859-16" },
716+
{PG_LATIN10,"iso885916" },
717+
718+
{PG_WIN1256,"CP1256" },
719+
{PG_TCVN,"CP1258" },
720+
#ifdefNOT_VERIFIED
721+
{PG_WIN874,"???" },
722+
#endif
723+
{PG_KOI8R,"KOI8-R" },
724+
{PG_WIN1251,"CP1251" },
725+
{PG_ALT,"CP866" },
726+
727+
{PG_ISO_8859_5,"ISO-8859-5" },
728+
{PG_ISO_8859_5,"ISO8859-5" },
729+
{PG_ISO_8859_5,"iso88595" },
730+
731+
{PG_ISO_8859_6,"ISO-8859-6" },
732+
{PG_ISO_8859_6,"ISO8859-6" },
733+
{PG_ISO_8859_6,"iso88596" },
734+
735+
{PG_ISO_8859_7,"ISO-8859-7" },
736+
{PG_ISO_8859_7,"ISO8859-7" },
737+
{PG_ISO_8859_7,"iso88597" },
738+
739+
{PG_ISO_8859_8,"ISO-8859-8" },
740+
{PG_ISO_8859_8,"ISO8859-8" },
741+
{PG_ISO_8859_8,"iso88598" },
742+
743+
{PG_SQL_ASCII,NULL }/* end marker */
744+
};
745+
746+
staticchar*
747+
get_encoding_from_locale(constchar*ctype)
748+
{
749+
char*save;
750+
char*sys;
751+
752+
save=setlocale(LC_CTYPE,NULL);
753+
if (!save)
754+
returnNULL;
755+
save=xstrdup(save);
756+
757+
setlocale(LC_CTYPE,ctype);
758+
sys=nl_langinfo(CODESET);
759+
sys=xstrdup(sys);
760+
761+
setlocale(LC_CTYPE,save);
762+
free(save);
763+
764+
returnsys;
765+
}
766+
767+
staticvoid
768+
check_encodings_match(intpg_enc,constchar*ctype)
769+
{
770+
char*sys;
771+
inti;
772+
773+
sys=get_encoding_from_locale(ctype);
774+
775+
for (i=0;encoding_match_list[i].system_enc_name;i++)
776+
{
777+
if (pg_enc==encoding_match_list[i].pg_enc_code
778+
&&strcasecmp(sys,encoding_match_list[i].system_enc_name)==0)
779+
{
780+
free(sys);
781+
return;
782+
}
783+
}
784+
785+
fprintf(stderr,
786+
_("%s: warning: encoding mismatch\n"),progname);
787+
fprintf(stderr,
788+
_("The encoding you selected (%s) and the encoding that the selected\n"
789+
"locale uses (%s) are not known to match. This may lead to\n"
790+
"misbehavior in various character string processing functions. To fix\n"
791+
"this situation, rerun %s and either do not specify an encoding\n"
792+
"explicitly, or choose a matching combination.\n"),
793+
pg_encoding_to_char(pg_enc),sys,progname);
794+
795+
free(sys);
796+
return;
797+
}
798+
799+
staticint
800+
find_matching_encoding(constchar*ctype)
801+
{
802+
char*sys;
803+
inti;
804+
805+
sys=get_encoding_from_locale(ctype);
806+
807+
for (i=0;encoding_match_list[i].system_enc_name;i++)
808+
{
809+
if (strcasecmp(sys,encoding_match_list[i].system_enc_name)==0)
810+
{
811+
free(sys);
812+
returnencoding_match_list[i].pg_enc_code;
813+
}
814+
}
815+
816+
free(sys);
817+
return-1;
818+
}
819+
#endif/* HAVE_LANGINFO_H */
820+
626821
/*
627822
* get short version of VERSION
628823
*/
@@ -2027,13 +2222,11 @@ main(int argc, char *argv[])
20272222
fprintf(stderr,
20282223
"VERSION=%s\n"
20292224
"PGDATA=%s\nshare_path=%s\nPGPATH=%s\n"
2030-
"ENCODING=%s\nENCODINGID=%s\n"
20312225
"POSTGRES_SUPERUSERNAME=%s\nPOSTGRES_BKI=%s\n"
20322226
"POSTGRES_DESCR=%s\nPOSTGRESQL_CONF_SAMPLE=%s\n"
20332227
"PG_HBA_SAMPLE=%s\nPG_IDENT_SAMPLE=%s\n",
20342228
PG_VERSION,
20352229
pg_data,share_path,bin_path,
2036-
encoding,encodingid,
20372230
effective_user,bki_file,
20382231
desc_file,conf_file,
20392232
hba_file,ident_file);
@@ -2051,21 +2244,20 @@ main(int argc, char *argv[])
20512244
check_input(features_file);
20522245
check_input(system_views_file);
20532246

2247+
setlocales();
2248+
20542249
printf(_("The files belonging to this database system will be owned "
20552250
"by user \"%s\".\n"
20562251
"This user must also own the server process.\n\n"),
20572252
effective_user);
20582253

2059-
setlocales();
2060-
20612254
if (strcmp(lc_ctype,lc_collate)==0&&
20622255
strcmp(lc_ctype,lc_time)==0&&
20632256
strcmp(lc_ctype,lc_numeric)==0&&
20642257
strcmp(lc_ctype,lc_monetary)==0&&
20652258
strcmp(lc_ctype,lc_messages)==0)
20662259
{
2067-
printf(_("The database cluster will be initialized with locale %s.\n\n"),
2068-
lc_ctype);
2260+
printf(_("The database cluster will be initialized with locale %s.\n"),lc_ctype);
20692261
}
20702262
else
20712263
{
@@ -2075,7 +2267,7 @@ main(int argc, char *argv[])
20752267
" MESSAGES: %s\n"
20762268
" MONETARY: %s\n"
20772269
" NUMERIC: %s\n"
2078-
" TIME: %s\n\n"),
2270+
" TIME: %s\n"),
20792271
lc_collate,
20802272
lc_ctype,
20812273
lc_messages,
@@ -2084,6 +2276,34 @@ main(int argc, char *argv[])
20842276
lc_time);
20852277
}
20862278

2279+
#ifdefHAVE_LANGINFO_H
2280+
if (strcmp(lc_ctype,"C")!=0&&strcmp(lc_ctype,"POSIX")!=0)
2281+
{
2282+
if (strlen(encoding)==0)
2283+
{
2284+
inttmp;
2285+
tmp=find_matching_encoding(lc_ctype);
2286+
if (tmp==-1)
2287+
{
2288+
fprintf(stderr,_("%s: could not find suitable encoding for locale \"%s\"\n"),progname,lc_ctype);
2289+
fprintf(stderr,_("Rerun %s with the -E option.\n"),progname);
2290+
fprintf(stderr,_("Try \"%s --help\" for more information.\n"),progname);
2291+
exit(1);
2292+
}
2293+
else
2294+
{
2295+
encodingid=encodingid_to_string(tmp);
2296+
printf(_("The default database encoding has accordingly been set to %s.\n"),
2297+
pg_encoding_to_char(tmp));
2298+
}
2299+
}
2300+
else
2301+
check_encodings_match(atoi(encodingid),lc_ctype);
2302+
}
2303+
#endif/* HAVE_LANGINFO_H */
2304+
2305+
printf("\n");
2306+
20872307
umask(077);
20882308

20892309
/*

‎src/include/pg_config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@
200200
/* Define to 1 if `enc_part2' is member of `krb5_ticket'. */
201201
#undef HAVE_KRB5_TICKET_ENC_PART2
202202

203+
/* Define to 1 if you have the <langinfo.h> header file. */
204+
#undef HAVE_LANGINFO_H
205+
203206
/* Define to 1 if you have the `bind' library (-lbind). */
204207
#undef HAVE_LIBBIND
205208

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp