22 * conversion between client encoding and server internal encoding
33 * (currently mule internal code (mic) is used)
44 * Tatsuo Ishii
5- * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
5+ * $Id: conv.c,v 1.6 1999/03/24 07:02:16 ishii Exp $
66 */
77#include <stdio.h>
88#include <string.h>
@@ -588,6 +588,262 @@ mic2ascii(unsigned char *mic, unsigned char *p, int len)
588588* p = '\0' ;
589589}
590590
591+ /*
592+ * Cyrillic support
593+ * currently supported Cyrillic encodings:
594+ *
595+ * KOI8-R (this is the charset for the mule internal code
596+ *for Cyrillic)
597+ * ISO-8859-5
598+ * Microsoft's CP1251(windows-1251)
599+ * Alternativny Variant (MS-DOS CP866)
600+ */
601+
602+ /* koi2mic: KOI8-R to Mule internal code */
603+ static void
604+ koi2mic (unsignedchar * l ,unsignedchar * p ,int len )
605+ {
606+ latin2mic (l ,p ,len ,LC_KOI8_R );
607+ }
608+
609+ /* mic2koi: Mule internal code to KOI8-R */
610+ static void
611+ mic2koi (unsignedchar * mic ,unsignedchar * p ,int len )
612+ {
613+ mic2latin (mic ,p ,len ,LC_KOI8_R );
614+ }
615+
616+ /*
617+ * latin2mic_with_table: a generic single byte charset encoding
618+ * conversion from a local charset to the mule internal code.
619+ * with a encoding conversion table.
620+ * the table is ordered according to the local charset,
621+ * starting from 128 (0x80). each entry in the table
622+ * holds the corresponding code point for the mule internal code.
623+ */
624+ static void
625+ latin2mic_with_table (
626+ unsignedchar * l ,/* local charset string (source) */
627+ unsignedchar * p ,/* pointer to store mule internal code
628+ (destination) */
629+ int len ,/* length of l */
630+ int lc ,/* leading character of p */
631+ unsignedchar * tab /* code conversion table */
632+ )
633+ {
634+ unsignedchar c1 ,c2 ;
635+
636+ while (len -- > 0 && (c1 = * l ++ )) {
637+ if (c1 < 128 ) {
638+ * p ++ = c1 ;
639+ }else {
640+ c2 = tab [c1 - 128 ];
641+ if (c2 ) {
642+ * p ++ = lc ;
643+ * p ++ = c2 ;
644+ }else {
645+ * p ++ = ' ' ;/* cannot convert */
646+ }
647+ }
648+ }
649+ * p = '\0' ;
650+ }
651+
652+ /*
653+ * mic2latin_with_table: a generic single byte charset encoding
654+ * conversion from the mule internal code to a local charset
655+ * with a encoding conversion table.
656+ * the table is ordered according to the second byte of the mule
657+ * internal code starting from 128 (0x80).
658+ * each entry in the table
659+ * holds the corresponding code point for the local code.
660+ */
661+ static void
662+ mic2latin_with_table (
663+ unsignedchar * mic ,/* mule internal code (source) */
664+ unsignedchar * p ,/* local code (destination) */
665+ int len ,/* length of p */
666+ int lc ,/* leading character */
667+ unsignedchar * tab /* code conversion table */
668+ )
669+ {
670+
671+ unsignedchar c1 ,c2 ;
672+
673+ while (len -- > 0 && (c1 = * mic ++ )) {
674+ if (c1 < 128 ) {
675+ * p ++ = c1 ;
676+ }else if (c1 == lc ) {
677+ c1 = * mic ++ ;
678+ len -- ;
679+ c2 = tab [c1 - 128 ];
680+ if (c2 ) {
681+ * p ++ = c2 ;
682+ }else {
683+ * p ++ = ' ' ;/* cannot convert */
684+ }
685+ }else {
686+ * p ++ = ' ' ;/* bogus character */
687+ }
688+ }
689+ * p = '\0' ;
690+ }
691+
692+ /* iso2mic: ISO-8859-5 to Mule internal code */
693+ static void
694+ iso2mic (unsignedchar * l ,unsignedchar * p ,int len )
695+ {
696+ static char iso2koi []= {
697+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
698+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
699+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
700+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
701+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
702+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
703+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
704+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
705+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
706+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
707+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
708+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
709+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
710+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1 ,
711+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
712+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00
713+ };
714+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,iso2koi );
715+ }
716+
717+ /* mic2iso: Mule internal code to ISO8859-5 */
718+ static void
719+ mic2iso (unsignedchar * mic ,unsignedchar * p ,int len )
720+ {
721+ static char koi2iso []= {
722+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
723+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
724+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
725+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
726+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
727+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
728+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
729+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
730+ 0xee ,0xd0 ,0xd1 ,0xe6 ,0xd4 ,0xd5 ,0xe4 ,0xd3 ,
731+ 0xe5 ,0xd8 ,0xd9 ,0xda ,0xdb ,0xdc ,0xdd ,0xde ,
732+ 0xdf ,0xef ,0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xd6 ,0xd2 ,
733+ 0xec ,0xeb ,0xd7 ,0xe8 ,0xed ,0xe9 ,0xe7 ,0xea ,
734+ 0xce ,0xb0 ,0xb1 ,0xc6 ,0xb4 ,0xb5 ,0xc4 ,0xb3 ,
735+ 0xc5 ,0xb8 ,0xb9 ,0xba ,0xbb ,0xbc ,0xbd ,0xbe ,
736+ 0xbf ,0xcf ,0xc0 ,0xc1 ,0xc2 ,0xc3 ,0xb6 ,0xb2 ,
737+ 0xcc ,0xcb ,0xb7 ,0xc8 ,0xcd ,0xc9 ,0xc7 ,0xca
738+ };
739+
740+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2iso );
741+ }
742+
743+ /* win2mic: CP1251 to Mule internal code */
744+ static void
745+ win2mic (unsignedchar * l ,unsignedchar * p ,int len )
746+ {
747+ static char win2koi []= {
748+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
749+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
750+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
751+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
752+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
753+ 0xb3 ,0x00 ,0xb4 ,0x00 ,0x00 ,0x00 ,0x00 ,0xb7 ,
754+ 0x00 ,0x00 ,0xb6 ,0xa6 ,0xad ,0x00 ,0x00 ,0x00 ,
755+ 0xa3 ,0x00 ,0xa4 ,0x00 ,0x00 ,0x00 ,0x00 ,0xa7 ,
756+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
757+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
758+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
759+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
760+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
761+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
762+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
763+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1
764+ };
765+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,win2koi );
766+ }
767+
768+ /* mic2win: Mule internal code to CP1251 */
769+ static void
770+ mic2win (unsignedchar * mic ,unsignedchar * p ,int len )
771+ {
772+ static char koi2win []= {
773+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
774+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
775+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
776+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
777+ 0x00 ,0x00 ,0x00 ,0xb8 ,0xba ,0x00 ,0xb3 ,0xbf ,
778+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xb4 ,0x00 ,0x00 ,
779+ 0x00 ,0x00 ,0x00 ,0xa8 ,0xaa ,0x00 ,0xb2 ,0xaf ,
780+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xa5 ,0x00 ,0x00 ,
781+ 0xfe ,0xe0 ,0xe1 ,0xf6 ,0xe4 ,0xe5 ,0xf4 ,0xe3 ,
782+ 0xf5 ,0xe8 ,0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,
783+ 0xef ,0xff ,0xf0 ,0xf1 ,0xf2 ,0xf3 ,0xe6 ,0xe2 ,
784+ 0xfc ,0xfb ,0xe7 ,0xf8 ,0xfd ,0xf9 ,0xf7 ,0xfa ,
785+ 0xde ,0xc0 ,0xc1 ,0xd6 ,0xc4 ,0xc5 ,0xd4 ,0xc3 ,
786+ 0xd5 ,0xc8 ,0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,
787+ 0xcf ,0xdf ,0xd0 ,0xd1 ,0xd2 ,0xd3 ,0xc6 ,0xc2 ,
788+ 0xdc ,0xdb ,0xc7 ,0xd8 ,0xdd ,0xd9 ,0xd7 ,0xda
789+ };
790+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2win );
791+ }
792+
793+ /* alt2mic: CP866 to Mule internal code */
794+ static void
795+ alt2mic (unsignedchar * l ,unsignedchar * p ,int len )
796+ {
797+ static char alt2koi []= {
798+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
799+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
800+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
801+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
802+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
803+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
804+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
805+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
806+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
807+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
808+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
809+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
810+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
811+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1 ,
812+ 0xb3 ,0xa3 ,0xb4 ,0xa4 ,0xb7 ,0xa7 ,0x00 ,0x00 ,
813+ 0xb6 ,0xa6 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00
814+ };
815+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,alt2koi );
816+ }
817+
818+ /* mic2alt: Mule internal code to CP866 */
819+ static void
820+ mic2alt (unsignedchar * mic ,unsignedchar * p ,int len )
821+ {
822+ static char koi2alt []= {
823+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
824+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
825+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
826+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
827+ 0x00 ,0x00 ,0x00 ,0xf1 ,0xf3 ,0x00 ,0xf9 ,0xf5 ,
828+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xad ,0x00 ,0x00 ,
829+ 0x00 ,0x00 ,0x00 ,0xf0 ,0xf2 ,0x00 ,0xf8 ,0xf4 ,
830+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
831+ 0xee ,0xa0 ,0xa1 ,0xe6 ,0xa4 ,0xa5 ,0xe4 ,0xa3 ,
832+ 0xe5 ,0xa8 ,0xa9 ,0xaa ,0xab ,0xac ,0xad ,0xae ,
833+ 0xaf ,0xef ,0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xa6 ,0xa2 ,
834+ 0xec ,0xeb ,0xa7 ,0xe8 ,0xed ,0xe9 ,0xe7 ,0xea ,
835+ 0x9e ,0x80 ,0x81 ,0x96 ,0x84 ,0x85 ,0x94 ,0x83 ,
836+ 0x95 ,0x88 ,0x89 ,0x8a ,0x8b ,0x8c ,0x8d ,0x8e ,
837+ 0x8f ,0x9f ,0x90 ,0x91 ,0x92 ,0x93 ,0x86 ,0x82 ,
838+ 0x9c ,0x9b ,0x87 ,0x98 ,0x9d ,0x99 ,0x97 ,0x9a
839+ };
840+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2alt );
841+ }
842+
843+ /*
844+ * end of Cyrillic support
845+ */
846+
591847pg_encoding_conv_tbl pg_conv_tbl []= {
592848{SQL_ASCII ,"SQL_ASCII" ,0 ,ascii2mic ,mic2ascii },/* SQL/ACII */
593849{EUC_JP ,"EUC_JP" ,0 ,euc_jp2mic ,mic2euc_jp },/* EUC_JP */
@@ -600,7 +856,10 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
600856{LATIN2 ,"LATIN2" ,0 ,latin22mic ,mic2latin2 },/* ISO 8859 Latin 2 */
601857{LATIN3 ,"LATIN3" ,0 ,latin32mic ,mic2latin3 },/* ISO 8859 Latin 3 */
602858{LATIN4 ,"LATIN4" ,0 ,latin42mic ,mic2latin4 },/* ISO 8859 Latin 4 */
603- {LATIN5 ,"LATIN5" ,0 ,latin52mic ,mic2latin5 },/* ISO 8859 Latin 5 */
859+ {LATIN5 ,"LATIN5" ,0 ,iso2mic ,mic2iso },/* ISO 8859 Latin 5 */
860+ {KOI8 ,"KOI8" ,0 ,koi2mic ,mic2koi },/* KOI8-R */
861+ {WIN ,"WIN" ,0 ,win2mic ,mic2win },/* CP1251 */
862+ {ALT ,"ALT" ,0 ,alt2mic ,mic2alt },/* CP866 */
604863{SJIS ,"SJIS" ,1 ,sjis2mic ,mic2sjis },/* SJIS */
605864{BIG5 ,"BIG5" ,1 ,big52mic ,mic2big5 },/* Big5 */
606865{-1 ,"" ,0 ,0 ,0 }/* end mark */