Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commite1de3e0

Browse files
committed
Implement following item in TODO:
* Reject character sequences those are not valid in their charset
1 parentd7f3cbc commite1de3e0

File tree

3 files changed

+125
-56
lines changed

3 files changed

+125
-56
lines changed

‎src/backend/utils/mb/conv.c

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* WIN1250 client encoding support contributed by Pavel Behal
77
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
88
*
9-
* $Id: conv.c,v 1.27 2001/09/06 04:57:29 ishii Exp $
9+
* $Id: conv.c,v 1.28 2001/09/11 04:50:36 ishii Exp $
1010
*
1111
*
1212
*/
@@ -1633,48 +1633,48 @@ big5_to_utf(unsigned char *euc, unsigned char *utf, int len)
16331633
*/
16341634
pg_enconvpg_enconv_tbl[]=
16351635
{
1636-
{PG_SQL_ASCII,ascii2mic,mic2ascii,ascii2utf,utf2ascii},
1637-
{PG_EUC_JP,euc_jp2mic,mic2euc_jp,euc_jp_to_utf,utf_to_euc_jp},
1638-
{PG_EUC_CN,euc_cn2mic,mic2euc_cn,euc_cn_to_utf,utf_to_euc_cn},
1639-
{PG_EUC_KR,euc_kr2mic,mic2euc_kr,euc_kr_to_utf,utf_to_euc_kr},
1640-
{PG_EUC_TW,euc_tw2mic,mic2euc_tw,euc_tw_to_utf,utf_to_euc_tw},
1641-
{PG_UTF8,0,0,0,0},
1642-
{PG_MULE_INTERNAL,0,0,0,0},
1643-
{PG_LATIN1,latin12mic,mic2latin1,latin1_to_utf,utf_to_latin1},
1644-
{PG_LATIN2,latin22mic,mic2latin2,latin2_to_utf,utf_to_latin2},
1645-
{PG_LATIN3,latin32mic,mic2latin3,latin3_to_utf,utf_to_latin3},
1646-
{PG_LATIN4,latin42mic,mic2latin4,latin4_to_utf,utf_to_latin4},
1647-
{PG_LATIN5,iso2mic,mic2iso,latin5_to_utf,utf_to_latin5},
1648-
{PG_KOI8R,koi8r2mic,mic2koi8r,KOI8R_to_utf,utf_to_KOI8R},
1649-
{PG_WIN1251,win12512mic,mic2win1251,WIN1251_to_utf,utf_to_WIN1251},
1650-
{PG_ALT,alt2mic,mic2alt,ALT_to_utf,utf_to_ALT},
1651-
{PG_SJIS,sjis2mic,mic2sjis,sjis_to_utf,utf_to_sjis},
1636+
{PG_SQL_ASCII,ascii2mic,mic2ascii,ascii2utf,utf2ascii},
1637+
{PG_EUC_JP,euc_jp2mic,mic2euc_jp,euc_jp_to_utf,utf_to_euc_jp},
1638+
{PG_EUC_CN,euc_cn2mic,mic2euc_cn,euc_cn_to_utf,utf_to_euc_cn},
1639+
{PG_EUC_KR,euc_kr2mic,mic2euc_kr,euc_kr_to_utf,utf_to_euc_kr},
1640+
{PG_EUC_TW,euc_tw2mic,mic2euc_tw,euc_tw_to_utf,utf_to_euc_tw},
1641+
{PG_UTF8,0,0,0,0},
1642+
{PG_MULE_INTERNAL,0,0,0,0},
1643+
{PG_LATIN1,latin12mic,mic2latin1,latin1_to_utf,utf_to_latin1},
1644+
{PG_LATIN2,latin22mic,mic2latin2,latin2_to_utf,utf_to_latin2},
1645+
{PG_LATIN3,latin32mic,mic2latin3,latin3_to_utf,utf_to_latin3},
1646+
{PG_LATIN4,latin42mic,mic2latin4,latin4_to_utf,utf_to_latin4},
1647+
{PG_LATIN5,iso2mic,mic2iso,latin5_to_utf,utf_to_latin5},
1648+
{PG_KOI8R,koi8r2mic,mic2koi8r,KOI8R_to_utf,utf_to_KOI8R},
1649+
{PG_WIN1251,win12512mic,mic2win1251,WIN1251_to_utf,utf_to_WIN1251},
1650+
{PG_ALT,alt2mic,mic2alt,ALT_to_utf,utf_to_ALT},
1651+
{PG_SJIS,sjis2mic,mic2sjis,sjis_to_utf,utf_to_sjis},
16521652
{PG_BIG5,big52mic,mic2big5,big5_to_utf,utf_to_big5},
1653-
{PG_WIN1250,win12502mic,mic2win1250,0,0},
1653+
{PG_WIN1250,win12502mic,mic2win1250,0,0},
16541654
};
16551655

16561656
#else
16571657

16581658
pg_enconvpg_enconv_tbl[]=
16591659
{
1660-
{PG_SQL_ASCII,ascii2mic,mic2ascii,0,0},
1661-
{PG_EUC_JP,euc_jp2mic,mic2euc_jp,0,0},
1662-
{PG_EUC_CN,euc_cn2mic,mic2euc_cn,0,0},
1663-
{PG_EUC_KR,euc_kr2mic,mic2euc_kr,0,0},
1664-
{PG_EUC_TW,euc_tw2mic,mic2euc_tw,0,0},
1665-
{PG_UTF8,0,0,0,0},
1666-
{PG_MULE_INTERNAL,0,0,0,0},
1667-
{PG_LATIN1,latin12mic,mic2latin1,0,0},
1668-
{PG_LATIN2,latin22mic,mic2latin2,0,0},
1669-
{PG_LATIN3,latin32mic,mic2latin3,0,0},
1670-
{PG_LATIN4,latin42mic,mic2latin4,0,0},
1671-
{PG_LATIN5,iso2mic,mic2iso,0,0},
1672-
{PG_KOI8R,koi8r2mic,mic2koi8r,0,0},
1673-
{PG_WIN1251,win12512mic,mic2win1251,0,0},
1674-
{PG_ALT,alt2mic,mic2alt,0,0},
1675-
{PG_SJIS,sjis2mic,mic2sjis,0,0},
1676-
{PG_BIG5,big52mic,mic2big5,0,0},
1677-
{PG_WIN1250,win12502mic,mic2win1250,0,0},
1660+
{PG_SQL_ASCII,ascii2mic,mic2ascii,0,0},
1661+
{PG_EUC_JP,euc_jp2mic,mic2euc_jp,0,0},
1662+
{PG_EUC_CN,euc_cn2mic,mic2euc_cn,0,0},
1663+
{PG_EUC_KR,euc_kr2mic,mic2euc_kr,0,0},
1664+
{PG_EUC_TW,euc_tw2mic,mic2euc_tw,0,0},
1665+
{PG_UTF8,0,0,0,0},
1666+
{PG_MULE_INTERNAL,0,0,0,0},
1667+
{PG_LATIN1,latin12mic,mic2latin1,0,0},
1668+
{PG_LATIN2,latin22mic,mic2latin2,0,0},
1669+
{PG_LATIN3,latin32mic,mic2latin3,0,0},
1670+
{PG_LATIN4,latin42mic,mic2latin4,0,0},
1671+
{PG_LATIN5,iso2mic,mic2iso,0,0},
1672+
{PG_KOI8R,koi8r2mic,mic2koi8r,0,0},
1673+
{PG_WIN1251,win12512mic,mic2win1251,0,0},
1674+
{PG_ALT,alt2mic,mic2alt,0,0},
1675+
{PG_SJIS,sjis2mic,mic2sjis,0,0},
1676+
{PG_BIG5,big52mic,mic2big5,0,0},
1677+
{PG_WIN1250,win12502mic,mic2win1250,0,0},
16781678
};
16791679

16801680
#endif/* UNICODE_CONVERSION */

‎src/backend/utils/mb/wchar.c

Lines changed: 84 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* conversion functions between pg_wchar and multi-byte streams.
33
* Tatsuo Ishii
4-
* $Id: wchar.c,v 1.19 2001/09/06 04:57:29 ishii Exp $
4+
* $Id: wchar.c,v 1.20 2001/09/11 04:50:36 ishii Exp $
55
*
66
* WIN1250 client encoding updated by Pavel Behal
77
*
@@ -458,24 +458,24 @@ pg_big5_mblen(const unsigned char *s)
458458
}
459459

460460
pg_wchar_tblpg_wchar_table[]= {
461-
{pg_ascii2wchar_with_len,pg_ascii_mblen},/* 0; PG_SQL_ASCII */
462-
{pg_eucjp2wchar_with_len,pg_eucjp_mblen},/* 1; PG_EUC_JP */
463-
{pg_euccn2wchar_with_len,pg_euccn_mblen},/* 2; PG_EUC_CN */
464-
{pg_euckr2wchar_with_len,pg_euckr_mblen},/* 3; PG_EUC_KR */
465-
{pg_euctw2wchar_with_len,pg_euctw_mblen},/* 4; PG_EUC_TW */
466-
{pg_utf2wchar_with_len,pg_utf_mblen},/* 5; PG_UNICODE */
467-
{pg_mule2wchar_with_len,pg_mule_mblen},/* 6; PG_MULE_INTERNAL */
468-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 7; PG_LATIN1 */
469-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 8; PG_LATIN2 */
470-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 9; PG_LATIN3 */
471-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 10; PG_LATIN4 */
472-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 11; PG_LATIN5 */
473-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 12; PG_KOI8 */
474-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 13; PG_WIN1251 */
475-
{pg_latin12wchar_with_len,pg_latin1_mblen},/* 14; PG_ALT */
476-
{0,pg_sjis_mblen},/* 15; PG_SJIS */
477-
{0,pg_big5_mblen},/* 17; PG_BIG5 */
478-
{pg_latin12wchar_with_len,pg_latin1_mblen}/* 18; PG_WIN1250 */
461+
{pg_ascii2wchar_with_len,pg_ascii_mblen,1},/* 0; PG_SQL_ASCII */
462+
{pg_eucjp2wchar_with_len,pg_eucjp_mblen,3},/* 1; PG_EUC_JP */
463+
{pg_euccn2wchar_with_len,pg_euccn_mblen,3},/* 2; PG_EUC_CN */
464+
{pg_euckr2wchar_with_len,pg_euckr_mblen,3},/* 3; PG_EUC_KR */
465+
{pg_euctw2wchar_with_len,pg_euctw_mblen,3},/* 4; PG_EUC_TW */
466+
{pg_utf2wchar_with_len,pg_utf_mblen,3},/* 5; PG_UNICODE */
467+
{pg_mule2wchar_with_len,pg_mule_mblen,3},/* 6; PG_MULE_INTERNAL */
468+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 7; PG_LATIN1 */
469+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 8; PG_LATIN2 */
470+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 9; PG_LATIN3 */
471+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 10; PG_LATIN4 */
472+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 11; PG_LATIN5 */
473+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 12; PG_KOI8 */
474+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 13; PG_WIN1251 */
475+
{pg_latin12wchar_with_len,pg_latin1_mblen,1},/* 14; PG_ALT */
476+
{0,pg_sjis_mblen,2},/* 15; PG_SJIS */
477+
{0,pg_big5_mblen,2},/* 17; PG_BIG5 */
478+
{pg_latin12wchar_with_len,pg_latin1_mblen,1}/* 18; PG_WIN1250 */
479479
};
480480

481481
/* returns the byte length of a word for mule internal code */
@@ -498,3 +498,68 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr)
498498
((*pg_wchar_table[encoding].mblen) (mbstr)) :
499499
((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
500500
}
501+
502+
#ifndefFRONTEND
503+
/*
504+
* Verify mbstr to make sure that it has a valid character sequence.
505+
* mbstr is not necessarily NULL terminated. length of mbstr is
506+
* specified by len. If an error was found, returns an error message.
507+
* Note that the message is kept in a static buffer, the next invocation
508+
* might break the message.
509+
* If no error was found, this function returns NULL.
510+
*/
511+
char*
512+
pg_verifymbstr(constunsignedchar*mbstr,intlen)
513+
{
514+
intl;
515+
inti,j;
516+
staticcharbuf[256];
517+
intslen=0;
518+
519+
/* we do not check single byte encodings */
520+
if (pg_wchar_table[GetDatabaseEncoding()].maxmblen <=1)
521+
returnNULL;
522+
523+
while (len>0&&*mbstr)
524+
{
525+
l=pg_mblen(mbstr);
526+
527+
/* multi-byte letter? */
528+
if (l>1)
529+
{
530+
for (i=1;i<l;i++)
531+
{
532+
if (i>len||*(mbstr+i)=='\0'||
533+
/* we assume that every muti-byte letter
534+
* consists of bytes being the 8th bit set
535+
*/
536+
((*(mbstr+i)&0x80)==0))
537+
{
538+
intremains=sizeof(buf);
539+
char*p=buf;
540+
541+
slen=snprintf(p,remains,"Invalid %s character sequence found (0x",
542+
GetDatabaseEncodingName());
543+
p+=slen;
544+
remains-=slen;
545+
546+
i= ((*(mbstr+i)&0x80)==0)?l:i;
547+
548+
for (j=0;j<i;j++)
549+
{
550+
slen=snprintf(p,remains,"%02x",
551+
*(mbstr+j));
552+
p+=slen;
553+
remains-=slen;
554+
}
555+
snprintf(p,remains,")");
556+
return(buf);
557+
}
558+
}
559+
}
560+
len-=l;
561+
mbstr+=l;
562+
}
563+
returnNULL;
564+
}
565+
#endif

‎src/include/mb/pg_wchar.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $Id: pg_wchar.h,v 1.29 2001/09/06 04:57:29 ishii Exp $ */
1+
/* $Id: pg_wchar.h,v 1.30 2001/09/11 04:50:36 ishii Exp $ */
22

33
#ifndefPG_WCHAR_H
44
#definePG_WCHAR_H
@@ -182,6 +182,8 @@ typedef struct
182182
int(*mb2wchar_with_len) ();/* convert a multi-byte
183183
* string to a wchar */
184184
int(*mblen) ();/* returns the length of a multi-byte word */
185+
intmaxmblen;/* max bytes for a letter in this charset */
186+
185187
}pg_wchar_tbl;
186188

187189
externpg_wchar_tblpg_wchar_table[];
@@ -240,6 +242,8 @@ extern unsigned char *pg_server_to_client(unsigned char *, int);
240242
externunsigned shortBIG5toCNS(unsigned short,unsignedchar*);
241243
externunsigned shortCNStoBIG5(unsigned short,unsignedchar);
242244

245+
char*pg_verifymbstr(constunsignedchar*,int);
246+
243247
#endif/* MULTIBYTE */
244248

245249
#endif/* PG_WCHAR_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp