@@ -339,6 +339,54 @@ pg_euctw_dsplen(const unsigned char *s)
339339return len ;
340340}
341341
342+ /*
343+ * Convert pg_wchar to EUC_* encoding.
344+ * caller must allocate enough space for "to", including a trailing zero!
345+ * len: length of from.
346+ * "from" not necessarily null terminated.
347+ */
348+ static int
349+ pg_wchar2euc_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
350+ {
351+ int cnt = 0 ;
352+
353+ while (len > 0 && * from )
354+ {
355+ unsignedchar c ;
356+
357+ if ((c = * from >>24 ))
358+ {
359+ * to ++ = c ;
360+ * to ++ = (* from >>16 )& 0xff ;
361+ * to ++ = (* from >>8 )& 0xff ;
362+ * to ++ = * from & 0xff ;
363+ cnt += 4 ;
364+ }
365+ else if ((c = * from >>16 ))
366+ {
367+ * to ++ = c ;
368+ * to ++ = (* from >>8 )& 0xff ;
369+ * to ++ = * from & 0xff ;
370+ cnt += 3 ;
371+ }
372+ else if ((c = * from >>8 ))
373+ {
374+ * to ++ = c ;
375+ * to ++ = * from & 0xff ;
376+ cnt += 2 ;
377+ }
378+ else
379+ {
380+ * to ++ = * from ;
381+ cnt ++ ;
382+ }
383+ len -- ;
384+ }
385+ * to = 0 ;
386+ return cnt ;
387+ }
388+
389+
342390/*
343391 * JOHAB
344392 */
@@ -453,6 +501,30 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453501return utf8string ;
454502}
455503
504+ /*
505+ * Trivial conversion from pg_wchar to UTF-8.
506+ * caller should allocate enough space for "to"
507+ * len: length of from.
508+ * "from" not necessarily null terminated.
509+ */
510+ static int
511+ pg_wchar2utf_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
512+ {
513+ int cnt = 0 ;
514+
515+ while (len > 0 && * from )
516+ {
517+ int char_len ;
518+
519+ unicode_to_utf8 (* from ,to );
520+ char_len = pg_utf_mblen (to );
521+ len -- ;
522+ cnt += char_len ;
523+ to += char_len ;
524+ }
525+ * to = 0 ;
526+ return cnt ;
527+ }
456528
457529/*
458530 * Return the byte length of a UTF8 character pointed to by s
@@ -719,6 +791,75 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
719791return cnt ;
720792}
721793
794+ /*
795+ * convert pg_wchar to mule internal code
796+ * caller should allocate enough space for "to"
797+ * len: length of from.
798+ * "from" not necessarily null terminated.
799+ */
800+ static int
801+ pg_wchar2mule_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
802+ {
803+ int cnt = 0 ;
804+ unsignedchar lb ;
805+
806+ while (len > 0 && * from )
807+ {
808+ lb = (* from >>16 )& 0xff ;
809+ if (IS_LC1 (lb ))
810+ {
811+ * to ++ = lb ;
812+ * to ++ = * from & 0xff ;
813+ cnt += 2 ;
814+ }
815+ else if (IS_LC2 (lb ))
816+ {
817+ * to ++ = lb ;
818+ * to ++ = (* from >>8 )& 0xff ;
819+ * to ++ = * from & 0xff ;
820+ cnt += 3 ;
821+ }
822+ else if (IS_LCPRV1_A_RANGE (lb ))
823+ {
824+ * to ++ = LCPRV1_A ;
825+ * to ++ = lb ;
826+ * to ++ = * from & 0xff ;
827+ cnt += 3 ;
828+ }
829+ else if (IS_LCPRV1_B_RANGE (lb ))
830+ {
831+ * to ++ = LCPRV1_B ;
832+ * to ++ = lb ;
833+ * to ++ = * from & 0xff ;
834+ cnt += 3 ;
835+ }
836+ else if (IS_LCPRV2_A_RANGE (lb ))
837+ {
838+ * to ++ = LCPRV2_A ;
839+ * to ++ = lb ;
840+ * to ++ = (* from >>8 )& 0xff ;
841+ * to ++ = * from & 0xff ;
842+ cnt += 4 ;
843+ }
844+ else if (IS_LCPRV2_B_RANGE (lb ))
845+ {
846+ * to ++ = LCPRV2_B ;
847+ * to ++ = lb ;
848+ * to ++ = (* from >>8 )& 0xff ;
849+ * to ++ = * from & 0xff ;
850+ cnt += 4 ;
851+ }
852+ else
853+ {
854+ * to ++ = lb ;
855+ cnt += 1 ;
856+ }
857+ len -- ;
858+ }
859+ * to = 0 ;
860+ return cnt ;
861+ }
862+
722863int
723864pg_mule_mblen (const unsignedchar * s )
724865{
@@ -780,6 +921,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
780921return cnt ;
781922}
782923
924+ /*
925+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
926+ * high bits.
927+ * caller should allocate enough space for "to"
928+ * len: length of from.
929+ * "from" not necessarily null terminated.
930+ */
931+ static int
932+ pg_wchar2single_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
933+ {
934+ int cnt = 0 ;
935+
936+ while (len > 0 && * from )
937+ {
938+ * to ++ = * from ++ ;
939+ len -- ;
940+ cnt ++ ;
941+ }
942+ * to = 0 ;
943+ return cnt ;
944+ }
945+
783946static int
784947pg_latin1_mblen (const unsignedchar * s )
785948{
@@ -1556,48 +1719,48 @@ pg_eucjp_increment(unsigned char *charptr, int length)
15561719 *-------------------------------------------------------------------
15571720 */
15581721pg_wchar_tbl pg_wchar_table []= {
1559- {pg_ascii2wchar_with_len ,pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1560- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1561- {pg_euccn2wchar_with_len ,pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1562- {pg_euckr2wchar_with_len ,pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1563- {pg_euctw2wchar_with_len ,pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1564- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1565- {pg_utf2wchar_with_len ,pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1566- {pg_mule2wchar_with_len ,pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1567- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1568- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1569- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1570- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1571- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1572- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1573- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1574- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1575- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1576- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1577- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1578- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1579- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1580- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1581- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1582- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1583- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1584- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1585- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1586- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1587- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1588- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1589- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1590- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1591- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1592- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1593- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1594- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1595- {0 ,pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1596- {0 ,pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1597- {0 ,pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1598- {0 ,pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1599- {0 ,pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1600- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
1722+ {pg_ascii2wchar_with_len ,pg_wchar2single_with_len , pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1723+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1724+ {pg_euccn2wchar_with_len ,pg_wchar2euc_with_len , pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1725+ {pg_euckr2wchar_with_len ,pg_wchar2euc_with_len , pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1726+ {pg_euctw2wchar_with_len ,pg_wchar2euc_with_len , pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1727+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1728+ {pg_utf2wchar_with_len ,pg_wchar2utf_with_len , pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1729+ {pg_mule2wchar_with_len ,pg_wchar2mule_with_len , pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1730+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1731+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1732+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1733+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1734+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1735+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1736+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1737+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1738+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1739+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1740+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1741+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1742+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1743+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1744+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1745+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1746+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1747+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1748+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1749+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1750+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1751+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1752+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1753+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1754+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1755+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1756+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1757+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1758+ {0 ,0 , pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1759+ {0 ,0 , pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1760+ {0 ,0 , pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1761+ {0 ,0 , pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1762+ {0 ,0 , pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1763+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
16011764};
16021765
16031766/* returns the byte length of a word for mule internal code */