@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
9999* to |=* from ++ ;
100100len -= 2 ;
101101}
102- else
103- /* must be ASCII */
102+ else /* must be ASCII */
104103{
105104* to = * from ++ ;
106105len -- ;
@@ -339,6 +338,55 @@ pg_euctw_dsplen(const unsigned char *s)
339338return len ;
340339}
341340
341+ /*
342+ * Convert pg_wchar to EUC_* encoding.
343+ * caller must allocate enough space for "to", including a trailing zero!
344+ * len: length of from.
345+ * "from" not necessarily null terminated.
346+ */
347+ static int
348+ pg_wchar2euc_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
349+ {
350+ int cnt = 0 ;
351+
352+ while (len > 0 && * from )
353+ {
354+ unsignedchar c ;
355+
356+ if ((c = (* from >>24 )))
357+ {
358+ * to ++ = c ;
359+ * to ++ = (* from >>16 )& 0xff ;
360+ * to ++ = (* from >>8 )& 0xff ;
361+ * to ++ = * from & 0xff ;
362+ cnt += 4 ;
363+ }
364+ else if ((c = (* from >>16 )))
365+ {
366+ * to ++ = c ;
367+ * to ++ = (* from >>8 )& 0xff ;
368+ * to ++ = * from & 0xff ;
369+ cnt += 3 ;
370+ }
371+ else if ((c = (* from >>8 )))
372+ {
373+ * to ++ = c ;
374+ * to ++ = * from & 0xff ;
375+ cnt += 2 ;
376+ }
377+ else
378+ {
379+ * to ++ = * from ;
380+ cnt ++ ;
381+ }
382+ from ++ ;
383+ len -- ;
384+ }
385+ * to = 0 ;
386+ return cnt ;
387+ }
388+
389+
342390/*
343391 * JOHAB
344392 */
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
453501return utf8string ;
454502}
455503
504+ /*
505+ * Trivial conversion from pg_wchar to UTF-8.
506+ * caller should allocate enough space for "to"
507+ * len: length of from.
508+ * "from" not necessarily null terminated.
509+ */
510+ static int
511+ pg_wchar2utf_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
512+ {
513+ int cnt = 0 ;
514+
515+ while (len > 0 && * from )
516+ {
517+ int char_len ;
518+
519+ unicode_to_utf8 (* from ,to );
520+ char_len = pg_utf_mblen (to );
521+ cnt += char_len ;
522+ to += char_len ;
523+ from ++ ;
524+ len -- ;
525+ }
526+ * to = 0 ;
527+ return cnt ;
528+ }
456529
457530/*
458531 * Return the byte length of a UTF8 character pointed to by s
@@ -717,6 +790,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
717790return cnt ;
718791}
719792
793+ /*
794+ * convert pg_wchar to mule internal code
795+ * caller should allocate enough space for "to"
796+ * len: length of from.
797+ * "from" not necessarily null terminated.
798+ */
799+ static int
800+ pg_wchar2mule_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
801+ {
802+ int cnt = 0 ;
803+
804+ while (len > 0 && * from )
805+ {
806+ unsignedchar lb ;
807+
808+ lb = (* from >>16 )& 0xff ;
809+ if (IS_LC1 (lb ))
810+ {
811+ * to ++ = lb ;
812+ * to ++ = * from & 0xff ;
813+ cnt += 2 ;
814+ }
815+ else if (IS_LC2 (lb ))
816+ {
817+ * to ++ = lb ;
818+ * to ++ = (* from >>8 )& 0xff ;
819+ * to ++ = * from & 0xff ;
820+ cnt += 3 ;
821+ }
822+ else if (IS_LCPRV1_A_RANGE (lb ))
823+ {
824+ * to ++ = LCPRV1_A ;
825+ * to ++ = lb ;
826+ * to ++ = * from & 0xff ;
827+ cnt += 3 ;
828+ }
829+ else if (IS_LCPRV1_B_RANGE (lb ))
830+ {
831+ * to ++ = LCPRV1_B ;
832+ * to ++ = lb ;
833+ * to ++ = * from & 0xff ;
834+ cnt += 3 ;
835+ }
836+ else if (IS_LCPRV2_A_RANGE (lb ))
837+ {
838+ * to ++ = LCPRV2_A ;
839+ * to ++ = lb ;
840+ * to ++ = (* from >>8 )& 0xff ;
841+ * to ++ = * from & 0xff ;
842+ cnt += 4 ;
843+ }
844+ else if (IS_LCPRV2_B_RANGE (lb ))
845+ {
846+ * to ++ = LCPRV2_B ;
847+ * to ++ = lb ;
848+ * to ++ = (* from >>8 )& 0xff ;
849+ * to ++ = * from & 0xff ;
850+ cnt += 4 ;
851+ }
852+ else
853+ {
854+ * to ++ = * from & 0xff ;
855+ cnt += 1 ;
856+ }
857+ from ++ ;
858+ len -- ;
859+ }
860+ * to = 0 ;
861+ return cnt ;
862+ }
863+
720864int
721865pg_mule_mblen (const unsignedchar * s )
722866{
@@ -772,6 +916,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
772916return cnt ;
773917}
774918
919+ /*
920+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
921+ * high bits.
922+ * caller should allocate enough space for "to"
923+ * len: length of from.
924+ * "from" not necessarily null terminated.
925+ */
926+ static int
927+ pg_wchar2single_with_len (const pg_wchar * from ,unsignedchar * to ,int len )
928+ {
929+ int cnt = 0 ;
930+
931+ while (len > 0 && * from )
932+ {
933+ * to ++ = * from ++ ;
934+ len -- ;
935+ cnt ++ ;
936+ }
937+ * to = 0 ;
938+ return cnt ;
939+ }
940+
775941static int
776942pg_latin1_mblen (const unsignedchar * s )
777943{
@@ -1339,48 +1505,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
13391505 *-------------------------------------------------------------------
13401506 */
13411507pg_wchar_tbl pg_wchar_table []= {
1342- {pg_ascii2wchar_with_len ,pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1343- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1344- {pg_euccn2wchar_with_len ,pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1345- {pg_euckr2wchar_with_len ,pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1346- {pg_euctw2wchar_with_len ,pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1347- {pg_eucjp2wchar_with_len ,pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1348- {pg_utf2wchar_with_len ,pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1349- {pg_mule2wchar_with_len ,pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1350- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1351- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1352- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1353- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1354- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1355- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1356- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1357- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1358- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1359- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1360- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1361- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1362- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1363- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1364- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1365- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1366- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1367- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1368- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1369- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1370- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1371- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1372- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1373- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1374- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1375- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1376- {pg_latin12wchar_with_len ,pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1377- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1378- {0 ,pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1379- {0 ,pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1380- {0 ,pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1381- {0 ,pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1382- {0 ,pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1383- {0 ,pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
1508+ {pg_ascii2wchar_with_len ,pg_wchar2single_with_len , pg_ascii_mblen ,pg_ascii_dsplen ,pg_ascii_verifier ,1 },/* PG_SQL_ASCII */
1509+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JP */
1510+ {pg_euccn2wchar_with_len ,pg_wchar2euc_with_len , pg_euccn_mblen ,pg_euccn_dsplen ,pg_euccn_verifier ,2 },/* PG_EUC_CN */
1511+ {pg_euckr2wchar_with_len ,pg_wchar2euc_with_len , pg_euckr_mblen ,pg_euckr_dsplen ,pg_euckr_verifier ,3 },/* PG_EUC_KR */
1512+ {pg_euctw2wchar_with_len ,pg_wchar2euc_with_len , pg_euctw_mblen ,pg_euctw_dsplen ,pg_euctw_verifier ,4 },/* PG_EUC_TW */
1513+ {pg_eucjp2wchar_with_len ,pg_wchar2euc_with_len , pg_eucjp_mblen ,pg_eucjp_dsplen ,pg_eucjp_verifier ,3 },/* PG_EUC_JIS_2004 */
1514+ {pg_utf2wchar_with_len ,pg_wchar2utf_with_len , pg_utf_mblen ,pg_utf_dsplen ,pg_utf8_verifier ,4 },/* PG_UTF8 */
1515+ {pg_mule2wchar_with_len ,pg_wchar2mule_with_len , pg_mule_mblen ,pg_mule_dsplen ,pg_mule_verifier ,4 },/* PG_MULE_INTERNAL */
1516+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN1 */
1517+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN2 */
1518+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN3 */
1519+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN4 */
1520+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN5 */
1521+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN6 */
1522+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN7 */
1523+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN8 */
1524+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN9 */
1525+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_LATIN10 */
1526+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1256 */
1527+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1258 */
1528+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN866 */
1529+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN874 */
1530+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8R */
1531+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1251 */
1532+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1252 */
1533+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-5 */
1534+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-6 */
1535+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-7 */
1536+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* ISO-8859-8 */
1537+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1250 */
1538+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1253 */
1539+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1254 */
1540+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1255 */
1541+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_WIN1257 */
1542+ {pg_latin12wchar_with_len ,pg_wchar2single_with_len , pg_latin1_mblen ,pg_latin1_dsplen ,pg_latin1_verifier ,1 },/* PG_KOI8U */
1543+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 },/* PG_SJIS */
1544+ {0 ,0 , pg_big5_mblen ,pg_big5_dsplen ,pg_big5_verifier ,2 },/* PG_BIG5 */
1545+ {0 ,0 , pg_gbk_mblen ,pg_gbk_dsplen ,pg_gbk_verifier ,2 },/* PG_GBK */
1546+ {0 ,0 , pg_uhc_mblen ,pg_uhc_dsplen ,pg_uhc_verifier ,2 },/* PG_UHC */
1547+ {0 ,0 , pg_gb18030_mblen ,pg_gb18030_dsplen ,pg_gb18030_verifier ,4 },/* PG_GB18030 */
1548+ {0 ,0 , pg_johab_mblen ,pg_johab_dsplen ,pg_johab_verifier ,3 },/* PG_JOHAB */
1549+ {0 ,0 , pg_sjis_mblen ,pg_sjis_dsplen ,pg_sjis_verifier ,2 }/* PG_SHIFT_JIS_2004 */
13841550};
13851551
13861552/* returns the byte length of a word for mule internal code */