66 * Portions Copyright (c) 1994, Regents of the University of California
77 *
88 * IDENTIFICATION
9- * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
9+ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
1010 *
1111 *-------------------------------------------------------------------------
1212 */
@@ -58,23 +58,21 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
5858static void mic2sjis (unsignedchar * mic ,unsignedchar * p ,int len );
5959static void euc_jp2mic (unsignedchar * euc ,unsignedchar * p ,int len );
6060static void mic2euc_jp (unsignedchar * mic ,unsignedchar * p ,int len );
61+ static void euc_jp2sjis (unsignedchar * mic ,unsignedchar * p ,int len );
62+ static void sjis2euc_jp (unsignedchar * mic ,unsignedchar * p ,int len );
6163
6264Datum
6365euc_jp_to_sjis (PG_FUNCTION_ARGS )
6466{
6567unsignedchar * src = PG_GETARG_CSTRING (2 );
6668unsignedchar * dest = PG_GETARG_CSTRING (3 );
6769int len = PG_GETARG_INT32 (4 );
68- unsignedchar * buf ;
6970
7071Assert (PG_GETARG_INT32 (0 )== PG_EUC_JP );
7172Assert (PG_GETARG_INT32 (1 )== PG_SJIS );
7273Assert (len >=0 );
7374
74- buf = palloc (len * ENCODING_GROWTH_RATE );
75- euc_jp2mic (src ,buf ,len );
76- mic2sjis (buf ,dest ,strlen (buf ));
77- pfree (buf );
75+ euc_jp2sjis (src ,dest ,len );
7876
7977PG_RETURN_VOID ();
8078}
@@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
8583unsignedchar * src = PG_GETARG_CSTRING (2 );
8684unsignedchar * dest = PG_GETARG_CSTRING (3 );
8785int len = PG_GETARG_INT32 (4 );
88- unsignedchar * buf ;
8986
9087Assert (PG_GETARG_INT32 (0 )== PG_SJIS );
9188Assert (PG_GETARG_INT32 (1 )== PG_EUC_JP );
9289Assert (len >=0 );
9390
94- buf = palloc (len * ENCODING_GROWTH_RATE );
95- sjis2mic (src ,buf ,len );
96- mic2euc_jp (buf ,dest ,strlen (buf ));
97- pfree (buf );
91+ sjis2euc_jp (src ,dest ,len );
9892
9993PG_RETURN_VOID ();
10094}
@@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
454448}
455449* p = '\0' ;
456450}
451+
452+ /*
453+ * EUC_JP -> SJIS
454+ */
455+ static void
456+ euc_jp2sjis (unsignedchar * euc ,unsignedchar * p ,int len )
457+ {
458+ int c1 ,
459+ c2 ,
460+ k ;
461+ unsignedchar * euc_end = euc + len ;
462+
463+ while (euc_end >=euc && (c1 = * euc ++ ))
464+ {
465+ if (c1 < 0x80 )
466+ {
467+ /* should be ASCII */
468+ * p ++ = c1 ;
469+ }
470+ else if (c1 == SS2 )
471+ {
472+ /* hankaku kana? */
473+ * p ++ = * euc ++ ;
474+ }
475+ else if (c1 == SS3 )
476+ {
477+ /* JIS X0212 kanji? */
478+ c1 = * euc ++ ;
479+ c2 = * euc ++ ;
480+ k = c1 <<8 |c2 ;
481+ if (k >=0xf5a1 )
482+ {
483+ /* UDC2 */
484+ c1 -= 0x54 ;
485+ * p ++ = ((c1 - 0xa1 ) >>1 )+ ((c1 < 0xdf ) ?0x81 :0xc1 )+ 0x74 ;
486+ * p ++ = c2 - ((c1 & 1 ) ? ((c2 < 0xe0 ) ?0x61 :0x60 ) :2 );
487+ }
488+ else
489+ {
490+ int i ,k2 ;
491+
492+ /* IBM kanji */
493+ for (i = 0 ;;i ++ )
494+ {
495+ k2 = ibmkanji [i ].euc & 0xffff ;
496+ if (k2 == 0xffff )
497+ {
498+ * p ++ = PGSJISALTCODE >>8 ;
499+ * p ++ = PGSJISALTCODE & 0xff ;
500+ break ;
501+ }
502+ if (k2 == k )
503+ {
504+ k = ibmkanji [i ].sjis ;
505+ * p ++ = k >>8 ;
506+ * p ++ = k & 0xff ;
507+ break ;
508+ }
509+ }
510+ }
511+ }
512+ else
513+ {
514+ /* JIS X0208 kanji? */
515+ c2 = * euc ++ ;
516+ k = (c1 <<8 ) | (c2 & 0xff );
517+ if (k >=0xf5a1 )
518+ {
519+ /* UDC1 */
520+ c1 -= 0x54 ;
521+ * p ++ = ((c1 - 0xa1 ) >>1 )+ ((c1 < 0xdf ) ?0x81 :0xc1 )+ 0x6f ;
522+ }
523+ else
524+ * p ++ = ((c1 - 0xa1 ) >>1 )+ ((c1 < 0xdf ) ?0x81 :0xc1 );
525+ * p ++ = c2 - ((c1 & 1 ) ? ((c2 < 0xe0 ) ?0x61 :0x60 ) :2 );
526+ }
527+ }
528+ * p = '\0' ;
529+ }
530+
531+ /*
532+ * SJIS ---> EUC_JP
533+ */
534+ static void
535+ sjis2euc_jp (unsignedchar * sjis ,unsignedchar * p ,int len )
536+ {
537+ int c1 ,
538+ c2 ,
539+ i ,
540+ k ,
541+ k2 ;
542+ unsignedchar * sjis_end = sjis + len ;
543+
544+ while (sjis_end >=sjis && (c1 = * sjis ++ ))
545+ {
546+ if (c1 < 0x80 )
547+ {
548+ /* should be ASCII */
549+ * p ++ = c1 ;
550+ }
551+ else if (c1 >=0xa1 && c1 <=0xdf )
552+ {
553+ /* JIS X0201 (1 byte kana) */
554+ * p ++ = SS2 ;
555+ * p ++ = c1 ;
556+ }
557+ else
558+ {
559+ /*
560+ * JIS X0208, X0212, user defined extended characters
561+ */
562+ c2 = * sjis ++ ;
563+ k = (c1 <<8 )+ c2 ;
564+ if (k >=0xed40 && k < 0xf040 )
565+ {
566+ /* NEC selection IBM kanji */
567+ for (i = 0 ;;i ++ )
568+ {
569+ k2 = ibmkanji [i ].nec ;
570+ if (k2 == 0xffff )
571+ break ;
572+ if (k2 == k )
573+ {
574+ k = ibmkanji [i ].sjis ;
575+ c1 = (k >>8 )& 0xff ;
576+ c2 = k & 0xff ;
577+ }
578+ }
579+ }
580+
581+ if (k < 0xeb3f )
582+ {
583+ /* JIS X0208 */
584+ * p ++ = ((c1 & 0x3f ) <<1 )+ 0x9f + (c2 > 0x9e );
585+ * p ++ = c2 + ((c2 > 0x9e ) ?2 :0x60 )+ (c2 < 0x80 );
586+ }
587+ else if ((k >=0xeb40 && k < 0xf040 )|| (k >=0xfc4c && k <=0xfcfc ))
588+ {
589+ /* NEC selection IBM kanji - Other undecided justice */
590+ * p ++ = PGEUCALTCODE >>8 ;
591+ * p ++ = PGEUCALTCODE & 0xff ;
592+ }
593+ else if (k >=0xf040 && k < 0xf540 )
594+ {
595+ /*
596+ * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
597+ * 0x7e7e EUC 0xf5a1 - 0xfefe
598+ */
599+ c1 -= 0x6f ;
600+ * p ++ = ((c1 & 0x3f ) <<1 )+ 0xf3 + (c2 > 0x9e );
601+ * p ++ = c2 + ((c2 > 0x9e ) ?2 :0x60 )+ (c2 < 0x80 );
602+ }
603+ else if (k >=0xf540 && k < 0xfa40 )
604+ {
605+ /*
606+ * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
607+ * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
608+ */
609+ * p ++ = SS3 ;
610+ c1 -= 0x74 ;
611+ * p ++ = ((c1 & 0x3f ) <<1 )+ 0xf3 + (c2 > 0x9e );
612+ * p ++ = c2 + ((c2 > 0x9e ) ?2 :0x60 )+ (c2 < 0x80 );
613+ }
614+ else if (k >=0xfa40 )
615+ {
616+ /*
617+ * mapping IBM kanji to X0208 and X0212
618+ *
619+ */
620+ for (i = 0 ;;i ++ )
621+ {
622+ k2 = ibmkanji [i ].sjis ;
623+ if (k2 == 0xffff )
624+ break ;
625+ if (k2 == k )
626+ {
627+ k = ibmkanji [i ].euc ;
628+ if (k >=0x8f0000 )
629+ {
630+ * p ++ = SS3 ;
631+ * p ++ = 0x80 | ((k & 0xff00 ) >>8 );
632+ * p ++ = 0x80 | (k & 0xff );
633+ }
634+ else
635+ {
636+ * p ++ = 0x80 | (k >>8 );
637+ * p ++ = 0x80 | (k & 0xff );
638+ }
639+ }
640+ }
641+ }
642+ }
643+ }
644+ * p = '\0' ;
645+ }
646+