@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
9999* to |=* from ++ ;
100100len -= 2 ;
101101}
102- else
103- /* must be ASCII */
102+ else /* must be ASCII */
104103{
105104* to = * from ++ ;
106105len -- ;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
339338return len ;
340339}
341340
341+ /*
342+ * Convert pg_wchar to EUC_* encoding.
343+ * caller must allocate enough space for "to", including a trailing zero!
344+ * len: length of from.
345+ * "from" not necessarily null terminated.
346+ */
347+ static int
348+ pg_wchar2euc_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
349+ {
350+ int cnt = 0 ;
351+
352+ while (len > 0 && * from )
353+ {
354+ unsignedchar c ;
355+
356+ if ((c = (* from >>24 )))
357+ {
358+ * to ++ = c ;
359+ * to ++ = (* from >>16 )& 0xff ;
360+ * to ++ = (* from >>8 )& 0xff ;
361+ * to ++ = * from & 0xff ;
362+ cnt += 4 ;
363+ }
364+ else if ((c = (* from >>16 )))
365+ {
366+ * to ++ = c ;
367+ * to ++ = (* from >>8 )& 0xff ;
368+ * to ++ = * from & 0xff ;
369+ cnt += 3 ;
370+ }
371+ else if ((c = (* from >>8 )))
372+ {
373+ * to ++ = c ;
374+ * to ++ = * from & 0xff ;
375+ cnt += 2 ;
376+ }
377+ else
378+ {
379+ * to ++ = * from ;
380+ cnt ++ ;
381+ }
382+ from ++ ;
383+ len -- ;
384+ }
385+ * to = 0 ;
386+ return cnt ;
387+ }
388+
389+
342390/*
343391 * JOHAB
344392 */
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453501return utf8string ;
454502}
455503
504+ /*
505+ * Trivial conversion from pg_wchar to UTF-8.
506+ * caller should allocate enough space for "to"
507+ * len: length of from.
508+ * "from" not necessarily null terminated.
509+ */
510+ static int
511+ pg_wchar2utf_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
512+ {
513+ int cnt = 0 ;
514+
515+ while (len > 0 && * from )
516+ {
517+ int char_len ;
518+
519+ unicode_to_utf8 (* from ,to );
520+ char_len = pg_utf_mblen (to );
521+ cnt += char_len ;
522+ to += char_len ;
523+ from ++ ;
524+ len -- ;
525+ }
526+ * to = 0 ;
527+ return cnt ;
528+ }
456529
457530/*
458531 * Return the byte length of a UTF8 character pointed to by s
@@ -719,6 +792,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
719792return cnt ;
720793}
721794
795+ /*
796+ * convert pg_wchar to mule internal code
797+ * caller should allocate enough space for "to"
798+ * len: length of from.
799+ * "from" not necessarily null terminated.
800+ */
801+ static int
802+ pg_wchar2mule_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
803+ {
804+ int cnt = 0 ;
805+
806+ while (len > 0 && * from )
807+ {
808+ unsignedchar lb ;
809+
810+ lb = (* from >>16 )& 0xff ;
811+ if (IS_LC1 (lb ))
812+ {
813+ * to ++ = lb ;
814+ * to ++ = * from & 0xff ;
815+ cnt += 2 ;
816+ }
817+ else if (IS_LC2 (lb ))
818+ {
819+ * to ++ = lb ;
820+ * to ++ = (* from >>8 )& 0xff ;
821+ * to ++ = * from & 0xff ;
822+ cnt += 3 ;
823+ }
824+ else if (IS_LCPRV1_A_RANGE (lb ))
825+ {
826+ * to ++ = LCPRV1_A ;
827+ * to ++ = lb ;
828+ * to ++ = * from & 0xff ;
829+ cnt += 3 ;
830+ }
831+ else if (IS_LCPRV1_B_RANGE (lb ))
832+ {
833+ * to ++ = LCPRV1_B ;
834+ * to ++ = lb ;
835+ * to ++ = * from & 0xff ;
836+ cnt += 3 ;
837+ }
838+ else if (IS_LCPRV2_A_RANGE (lb ))
839+ {
840+ * to ++ = LCPRV2_A ;
841+ * to ++ = lb ;
842+ * to ++ = (* from >>8 )& 0xff ;
843+ * to ++ = * from & 0xff ;
844+ cnt += 4 ;
845+ }
846+ else if (IS_LCPRV2_B_RANGE (lb ))
847+ {
848+ * to ++ = LCPRV2_B ;
849+ * to ++ = lb ;
850+ * to ++ = (* from >>8 )& 0xff ;
851+ * to ++ = * from & 0xff ;
852+ cnt += 4 ;
853+ }
854+ else
855+ {
856+ * to ++ = * from & 0xff ;
857+ cnt += 1 ;
858+ }
859+ from ++ ;
860+ len -- ;
861+ }
862+ * to = 0 ;
863+ return cnt ;
864+ }
865+
722866int
723867pg_mule_mblen (const unsignedchar * s )
724868{
@@ -774,6 +918,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
774918return cnt ;
775919}
776920
921+ /*
922+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
923+ * high bits.
924+ * caller should allocate enough space for "to"
925+ * len: length of from.
926+ * "from" not necessarily null terminated.
927+ */
928+ static int
929+ pg_wchar2single_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
930+ {
931+ int cnt = 0 ;
932+
933+ while (len > 0 && * from )
934+ {
935+ * to ++ = * from ++ ;
936+ len -- ;
937+ cnt ++ ;
938+ }
939+ * to = 0 ;
940+ return cnt ;
941+ }
942+
777943static int
778944pg_latin1_mblen (const unsignedchar * s )
779945{
@@ -1550,48 +1716,48 @@ pg_eucjp_increment(unsigned char *charptr, int length)
15501716 *-------------------------------------------------------------------
15511717 */
15521718pg_wchar_tbl pg_wchar_table []= {
1553- {pg_ascii2wchar_with_len ,pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1554- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1555- {pg_euccn2wchar_with_len ,pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1556- {pg_euckr2wchar_with_len ,pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1557- {pg_euctw2wchar_with_len ,pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1558- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1559- {pg_utf2wchar_with_len ,pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1560- {pg_mule2wchar_with_len ,pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1561- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1562- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1563- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1564- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1565- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1566- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1567- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1568- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1569- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1570- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1571- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1572- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1573- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1574- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1575- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1576- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1577- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1578- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1579- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1580- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1581- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1582- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1583- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1584- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1585- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1586- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1587- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1588- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1589- {0 ,pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1590- {0 ,pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1591- {0 ,pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1592- {0 ,pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1593- {0 ,pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1594- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
1719+ {pg_ascii2wchar_with_len ,pg_wchar2single_with_len , pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1720+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1721+ {pg_euccn2wchar_with_len ,pg_wchar2euc_with_len , pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1722+ {pg_euckr2wchar_with_len ,pg_wchar2euc_with_len , pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1723+ {pg_euctw2wchar_with_len ,pg_wchar2euc_with_len , pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1724+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1725+ {pg_utf2wchar_with_len ,pg_wchar2utf_with_len , pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1726+ {pg_mule2wchar_with_len ,pg_wchar2mule_with_len , pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1727+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1728+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1729+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1730+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1731+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1732+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1733+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1734+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1735+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1736+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1737+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1738+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1739+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1740+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1741+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1742+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1743+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1744+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1745+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1746+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1747+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1748+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1749+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1750+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1751+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1752+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1753+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1754+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1755+ {0 ,0 , pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1756+ {0 ,0 , pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1757+ {0 ,0 , pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1758+ {0 ,0 , pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1759+ {0 ,0 , pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1760+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
15951761};
15961762
15971763/* returns the byte length of a word for mule internal code */