2
2
* conversion between client encoding and server internal encoding
3
3
* (currently mule internal code (mic) is used)
4
4
* Tatsuo Ishii
5
- * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
5
+ * $Id: conv.c,v 1.6 1999/03/24 07:02:16 ishii Exp $
6
6
*/
7
7
#include <stdio.h>
8
8
#include <string.h>
@@ -588,6 +588,262 @@ mic2ascii(unsigned char *mic, unsigned char *p, int len)
588
588
* p = '\0' ;
589
589
}
590
590
591
+ /*
592
+ * Cyrillic support
593
+ * currently supported Cyrillic encodings:
594
+ *
595
+ * KOI8-R (this is the charset for the mule internal code
596
+ *for Cyrillic)
597
+ * ISO-8859-5
598
+ * Microsoft's CP1251(windows-1251)
599
+ * Alternativny Variant (MS-DOS CP866)
600
+ */
601
+
602
+ /* koi2mic: KOI8-R to Mule internal code */
603
+ static void
604
+ koi2mic (unsignedchar * l ,unsignedchar * p ,int len )
605
+ {
606
+ latin2mic (l ,p ,len ,LC_KOI8_R );
607
+ }
608
+
609
+ /* mic2koi: Mule internal code to KOI8-R */
610
+ static void
611
+ mic2koi (unsignedchar * mic ,unsignedchar * p ,int len )
612
+ {
613
+ mic2latin (mic ,p ,len ,LC_KOI8_R );
614
+ }
615
+
616
+ /*
617
+ * latin2mic_with_table: a generic single byte charset encoding
618
+ * conversion from a local charset to the mule internal code.
619
+ * with a encoding conversion table.
620
+ * the table is ordered according to the local charset,
621
+ * starting from 128 (0x80). each entry in the table
622
+ * holds the corresponding code point for the mule internal code.
623
+ */
624
+ static void
625
+ latin2mic_with_table (
626
+ unsignedchar * l ,/* local charset string (source) */
627
+ unsignedchar * p ,/* pointer to store mule internal code
628
+ (destination) */
629
+ int len ,/* length of l */
630
+ int lc ,/* leading character of p */
631
+ unsignedchar * tab /* code conversion table */
632
+ )
633
+ {
634
+ unsignedchar c1 ,c2 ;
635
+
636
+ while (len -- > 0 && (c1 = * l ++ )) {
637
+ if (c1 < 128 ) {
638
+ * p ++ = c1 ;
639
+ }else {
640
+ c2 = tab [c1 - 128 ];
641
+ if (c2 ) {
642
+ * p ++ = lc ;
643
+ * p ++ = c2 ;
644
+ }else {
645
+ * p ++ = ' ' ;/* cannot convert */
646
+ }
647
+ }
648
+ }
649
+ * p = '\0' ;
650
+ }
651
+
652
+ /*
653
+ * mic2latin_with_table: a generic single byte charset encoding
654
+ * conversion from the mule internal code to a local charset
655
+ * with a encoding conversion table.
656
+ * the table is ordered according to the second byte of the mule
657
+ * internal code starting from 128 (0x80).
658
+ * each entry in the table
659
+ * holds the corresponding code point for the local code.
660
+ */
661
+ static void
662
+ mic2latin_with_table (
663
+ unsignedchar * mic ,/* mule internal code (source) */
664
+ unsignedchar * p ,/* local code (destination) */
665
+ int len ,/* length of p */
666
+ int lc ,/* leading character */
667
+ unsignedchar * tab /* code conversion table */
668
+ )
669
+ {
670
+
671
+ unsignedchar c1 ,c2 ;
672
+
673
+ while (len -- > 0 && (c1 = * mic ++ )) {
674
+ if (c1 < 128 ) {
675
+ * p ++ = c1 ;
676
+ }else if (c1 == lc ) {
677
+ c1 = * mic ++ ;
678
+ len -- ;
679
+ c2 = tab [c1 - 128 ];
680
+ if (c2 ) {
681
+ * p ++ = c2 ;
682
+ }else {
683
+ * p ++ = ' ' ;/* cannot convert */
684
+ }
685
+ }else {
686
+ * p ++ = ' ' ;/* bogus character */
687
+ }
688
+ }
689
+ * p = '\0' ;
690
+ }
691
+
692
+ /* iso2mic: ISO-8859-5 to Mule internal code */
693
+ static void
694
+ iso2mic (unsignedchar * l ,unsignedchar * p ,int len )
695
+ {
696
+ static char iso2koi []= {
697
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
698
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
699
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
700
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
701
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
702
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
703
+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
704
+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
705
+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
706
+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
707
+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
708
+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
709
+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
710
+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1 ,
711
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
712
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00
713
+ };
714
+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,iso2koi );
715
+ }
716
+
717
+ /* mic2iso: Mule internal code to ISO8859-5 */
718
+ static void
719
+ mic2iso (unsignedchar * mic ,unsignedchar * p ,int len )
720
+ {
721
+ static char koi2iso []= {
722
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
723
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
724
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
725
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
726
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
727
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
728
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
729
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
730
+ 0xee ,0xd0 ,0xd1 ,0xe6 ,0xd4 ,0xd5 ,0xe4 ,0xd3 ,
731
+ 0xe5 ,0xd8 ,0xd9 ,0xda ,0xdb ,0xdc ,0xdd ,0xde ,
732
+ 0xdf ,0xef ,0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xd6 ,0xd2 ,
733
+ 0xec ,0xeb ,0xd7 ,0xe8 ,0xed ,0xe9 ,0xe7 ,0xea ,
734
+ 0xce ,0xb0 ,0xb1 ,0xc6 ,0xb4 ,0xb5 ,0xc4 ,0xb3 ,
735
+ 0xc5 ,0xb8 ,0xb9 ,0xba ,0xbb ,0xbc ,0xbd ,0xbe ,
736
+ 0xbf ,0xcf ,0xc0 ,0xc1 ,0xc2 ,0xc3 ,0xb6 ,0xb2 ,
737
+ 0xcc ,0xcb ,0xb7 ,0xc8 ,0xcd ,0xc9 ,0xc7 ,0xca
738
+ };
739
+
740
+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2iso );
741
+ }
742
+
743
+ /* win2mic: CP1251 to Mule internal code */
744
+ static void
745
+ win2mic (unsignedchar * l ,unsignedchar * p ,int len )
746
+ {
747
+ static char win2koi []= {
748
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
749
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
750
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
751
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
752
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
753
+ 0xb3 ,0x00 ,0xb4 ,0x00 ,0x00 ,0x00 ,0x00 ,0xb7 ,
754
+ 0x00 ,0x00 ,0xb6 ,0xa6 ,0xad ,0x00 ,0x00 ,0x00 ,
755
+ 0xa3 ,0x00 ,0xa4 ,0x00 ,0x00 ,0x00 ,0x00 ,0xa7 ,
756
+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
757
+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
758
+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
759
+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
760
+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
761
+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
762
+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
763
+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1
764
+ };
765
+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,win2koi );
766
+ }
767
+
768
+ /* mic2win: Mule internal code to CP1251 */
769
+ static void
770
+ mic2win (unsignedchar * mic ,unsignedchar * p ,int len )
771
+ {
772
+ static char koi2win []= {
773
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
774
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
775
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
776
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
777
+ 0x00 ,0x00 ,0x00 ,0xb8 ,0xba ,0x00 ,0xb3 ,0xbf ,
778
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xb4 ,0x00 ,0x00 ,
779
+ 0x00 ,0x00 ,0x00 ,0xa8 ,0xaa ,0x00 ,0xb2 ,0xaf ,
780
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xa5 ,0x00 ,0x00 ,
781
+ 0xfe ,0xe0 ,0xe1 ,0xf6 ,0xe4 ,0xe5 ,0xf4 ,0xe3 ,
782
+ 0xf5 ,0xe8 ,0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,
783
+ 0xef ,0xff ,0xf0 ,0xf1 ,0xf2 ,0xf3 ,0xe6 ,0xe2 ,
784
+ 0xfc ,0xfb ,0xe7 ,0xf8 ,0xfd ,0xf9 ,0xf7 ,0xfa ,
785
+ 0xde ,0xc0 ,0xc1 ,0xd6 ,0xc4 ,0xc5 ,0xd4 ,0xc3 ,
786
+ 0xd5 ,0xc8 ,0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,
787
+ 0xcf ,0xdf ,0xd0 ,0xd1 ,0xd2 ,0xd3 ,0xc6 ,0xc2 ,
788
+ 0xdc ,0xdb ,0xc7 ,0xd8 ,0xdd ,0xd9 ,0xd7 ,0xda
789
+ };
790
+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2win );
791
+ }
792
+
793
+ /* alt2mic: CP866 to Mule internal code */
794
+ static void
795
+ alt2mic (unsignedchar * l ,unsignedchar * p ,int len )
796
+ {
797
+ static char alt2koi []= {
798
+ 0xe1 ,0xe2 ,0xf7 ,0xe7 ,0xe4 ,0xe5 ,0xf6 ,0xfa ,
799
+ 0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,0xf0 ,
800
+ 0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xe6 ,0xe8 ,0xe3 ,0xfe ,
801
+ 0xfb ,0xfd ,0xff ,0xf9 ,0xf8 ,0xfc ,0xe0 ,0xf1 ,
802
+ 0xc1 ,0xc2 ,0xd7 ,0xc7 ,0xc4 ,0xc5 ,0xd6 ,0xda ,
803
+ 0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,0xd0 ,
804
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
805
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
806
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
807
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
808
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
809
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
810
+ 0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xc6 ,0xc8 ,0xc3 ,0xde ,
811
+ 0xdb ,0xdd ,0xdf ,0xd9 ,0xd8 ,0xdc ,0xc0 ,0xd1 ,
812
+ 0xb3 ,0xa3 ,0xb4 ,0xa4 ,0xb7 ,0xa7 ,0x00 ,0x00 ,
813
+ 0xb6 ,0xa6 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00
814
+ };
815
+ latin2mic_with_table (l ,p ,len ,LC_KOI8_R ,alt2koi );
816
+ }
817
+
818
+ /* mic2alt: Mule internal code to CP866 */
819
+ static void
820
+ mic2alt (unsignedchar * mic ,unsignedchar * p ,int len )
821
+ {
822
+ static char koi2alt []= {
823
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
824
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
825
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
826
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,
827
+ 0x00 ,0x00 ,0x00 ,0xf1 ,0xf3 ,0x00 ,0xf9 ,0xf5 ,
828
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xad ,0x00 ,0x00 ,
829
+ 0x00 ,0x00 ,0x00 ,0xf0 ,0xf2 ,0x00 ,0xf8 ,0xf4 ,
830
+ 0x00 ,0x00 ,0x00 ,0x00 ,0x00 ,0xbd ,0x00 ,0x00 ,
831
+ 0xee ,0xa0 ,0xa1 ,0xe6 ,0xa4 ,0xa5 ,0xe4 ,0xa3 ,
832
+ 0xe5 ,0xa8 ,0xa9 ,0xaa ,0xab ,0xac ,0xad ,0xae ,
833
+ 0xaf ,0xef ,0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xa6 ,0xa2 ,
834
+ 0xec ,0xeb ,0xa7 ,0xe8 ,0xed ,0xe9 ,0xe7 ,0xea ,
835
+ 0x9e ,0x80 ,0x81 ,0x96 ,0x84 ,0x85 ,0x94 ,0x83 ,
836
+ 0x95 ,0x88 ,0x89 ,0x8a ,0x8b ,0x8c ,0x8d ,0x8e ,
837
+ 0x8f ,0x9f ,0x90 ,0x91 ,0x92 ,0x93 ,0x86 ,0x82 ,
838
+ 0x9c ,0x9b ,0x87 ,0x98 ,0x9d ,0x99 ,0x97 ,0x9a
839
+ };
840
+ mic2latin_with_table (mic ,p ,len ,LC_KOI8_R ,koi2alt );
841
+ }
842
+
843
+ /*
844
+ * end of Cyrillic support
845
+ */
846
+
591
847
pg_encoding_conv_tbl pg_conv_tbl []= {
592
848
{SQL_ASCII ,"SQL_ASCII" ,0 ,ascii2mic ,mic2ascii },/* SQL/ACII */
593
849
{EUC_JP ,"EUC_JP" ,0 ,euc_jp2mic ,mic2euc_jp },/* EUC_JP */
@@ -600,7 +856,10 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
600
856
{LATIN2 ,"LATIN2" ,0 ,latin22mic ,mic2latin2 },/* ISO 8859 Latin 2 */
601
857
{LATIN3 ,"LATIN3" ,0 ,latin32mic ,mic2latin3 },/* ISO 8859 Latin 3 */
602
858
{LATIN4 ,"LATIN4" ,0 ,latin42mic ,mic2latin4 },/* ISO 8859 Latin 4 */
603
- {LATIN5 ,"LATIN5" ,0 ,latin52mic ,mic2latin5 },/* ISO 8859 Latin 5 */
859
+ {LATIN5 ,"LATIN5" ,0 ,iso2mic ,mic2iso },/* ISO 8859 Latin 5 */
860
+ {KOI8 ,"KOI8" ,0 ,koi2mic ,mic2koi },/* KOI8-R */
861
+ {WIN ,"WIN" ,0 ,win2mic ,mic2win },/* CP1251 */
862
+ {ALT ,"ALT" ,0 ,alt2mic ,mic2alt },/* CP866 */
604
863
{SJIS ,"SJIS" ,1 ,sjis2mic ,mic2sjis },/* SJIS */
605
864
{BIG5 ,"BIG5" ,1 ,big52mic ,mic2big5 },/* Big5 */
606
865
{-1 ,"" ,0 ,0 ,0 }/* end mark */