Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit08e0b34

Browse files
committed
Back out fix for Unicode characters above 0x10000
1 parent5d7a555 commit08e0b34

File tree

3 files changed

+47
-84
lines changed

3 files changed

+47
-84
lines changed

‎doc/src/sgml/postgres.sgml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.65 2004/11/12 21:50:53 tgl Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.66 2004/12/03 01:20:14 momjian Exp $
33
-->
44

55
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V4.2//EN" [
@@ -179,6 +179,7 @@ $PostgreSQL: pgsql/doc/src/sgml/postgres.sgml,v 1.65 2004/11/12 21:50:53 tgl Exp
179179
&lobj;
180180
&ecpg;
181181
&infoschema;
182+
&external_projects;
182183

183184
</part>
184185

‎src/backend/utils/mb/wchar.c

Lines changed: 44 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* conversion functions between pg_wchar and multibyte streams.
33
* Tatsuo Ishii
4-
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.39 2004/12/02 22:37:13 momjian Exp $
4+
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.40 2004/12/03 01:20:20 momjian Exp $
55
*
66
* WIN1250 client encoding updated by Pavel Behal
77
*
@@ -343,31 +343,6 @@ pg_johab_dsplen(const unsigned char *s)
343343
return (pg_euc_dsplen(s));
344344
}
345345

346-
boolisLegalUTF8(constUTF8*source,intlen) {
347-
UTF8a;
348-
constUTF8*srcptr=source+len;
349-
if(!source|| (pg_utf_mblen(source)!=len))return false;
350-
switch (len) {
351-
default:return false;
352-
/* Everything else falls through when "true"... */
353-
case6:if ((a= (*--srcptr))<0x80||a>0xBF)return false;
354-
case5:if ((a= (*--srcptr))<0x80||a>0xBF)return false;
355-
case4:if ((a= (*--srcptr))<0x80||a>0xBF)return false;
356-
case3:if ((a= (*--srcptr))<0x80||a>0xBF)return false;
357-
case2:if ((a= (*--srcptr))>0xBF)return false;
358-
switch (*source) {
359-
/* no fall-through in this inner switch */
360-
case0xE0:if (a<0xA0)return false;break;
361-
case0xF0:if (a<0x90)return false;break;
362-
case0xF4:if (a>0x8F)return false;break;
363-
default:if (a<0x80)return false;
364-
}
365-
case1:if (*source >=0x80&&*source<0xC2)return false;
366-
if (*source>0xFD)return false;
367-
}
368-
return true;
369-
}
370-
371346
/*
372347
* convert UTF-8 string to pg_wchar (UCS-2)
373348
* caller should allocate enough space for "to"
@@ -423,27 +398,21 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
423398
* returns the byte length of a UTF-8 word pointed to by s
424399
*/
425400
int
426-
pg_utf_mblen(constUTF8*s)
401+
pg_utf_mblen(constunsignedchar*s)
427402
{
428403
intlen=1;
429404

430405
if ((*s&0x80)==0)
431406
len=1;
432407
elseif ((*s&0xe0)==0xc0)
433408
len=2;
434-
elseif ((*s&0xf0)==0xe0)
435-
len=3;
436-
elseif ((*s&0xf8)==0xf0)
437-
len=4;
438-
elseif ((*s&0xfc)==0xf8)
439-
len=5;
440-
elseif ((*s&0xfe)==0xfc)
441-
len=6;
409+
elseif ((*s&0xe0)==0xe0)
410+
len=3;
442411
return (len);
443412
}
444413

445414
staticint
446-
pg_utf_dsplen(constUTF8*s)
415+
pg_utf_dsplen(constunsignedchar*s)
447416
{
448417
return1;/* XXX fix me! */
449418
}
@@ -752,8 +721,8 @@ pg_wchar_tbl pg_wchar_table[] = {
752721
{pg_euckr2wchar_with_len,pg_euckr_mblen,pg_euckr_dsplen,3},/* 3; PG_EUC_KR */
753722
{pg_euctw2wchar_with_len,pg_euctw_mblen,pg_euctw_dsplen,3},/* 4; PG_EUC_TW */
754723
{pg_johab2wchar_with_len,pg_johab_mblen,pg_johab_dsplen,3},/* 5; PG_JOHAB */
755-
{pg_utf2wchar_with_len,pg_utf_mblen,pg_utf_dsplen,6},/* 6; PG_UNICODE */
756-
{pg_mule2wchar_with_len,pg_mule_mblen,pg_mule_dsplen,3},/* 7; PG_MULE_INTERNAL */
724+
{pg_utf2wchar_with_len,pg_utf_mblen,pg_utf_dsplen,3},/* 6; PG_UNICODE */
725+
{pg_mule2wchar_with_len,pg_mule_mblen,pg_mule_dsplen,3},/* 7; PG_MULE_INTERNAL */
757726
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 8; PG_LATIN1 */
758727
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 9; PG_LATIN2 */
759728
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 10; PG_LATIN3 */
@@ -775,11 +744,11 @@ pg_wchar_tbl pg_wchar_table[] = {
775744
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 26; ISO-8859-7 */
776745
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 27; ISO-8859-8 */
777746
{pg_latin12wchar_with_len,pg_latin1_mblen,pg_latin1_dsplen,1},/* 28; PG_WIN1250 */
778-
{0,pg_sjis_mblen,pg_sjis_dsplen,2},/* 29; PG_SJIS */
779-
{0,pg_big5_mblen,pg_big5_dsplen,2},/* 30; PG_BIG5 */
780-
{0,pg_gbk_mblen,pg_gbk_dsplen,2},/* 31; PG_GBK */
781-
{0,pg_uhc_mblen,pg_uhc_dsplen,2},/* 32; PG_UHC */
782-
{0,pg_gb18030_mblen,pg_gb18030_dsplen,2}/* 33; PG_GB18030 */
747+
{0,pg_sjis_mblen,pg_sjis_dsplen,2},/* 29; PG_SJIS */
748+
{0,pg_big5_mblen,pg_big5_dsplen,2},/* 30; PG_BIG5 */
749+
{0,pg_gbk_mblen,pg_gbk_dsplen,2},/* 31; PG_GBK */
750+
{0,pg_uhc_mblen,pg_uhc_dsplen,2},/* 32; PG_UHC */
751+
{0,pg_gb18030_mblen,pg_gb18030_dsplen,2}/* 33; PG_GB18030 */
783752
};
784753

785754
/* returns the byte length of a word for mule internal code */
@@ -853,48 +822,51 @@ pg_verifymbstr(const unsigned char *mbstr, int len, bool noError)
853822

854823
while (len>0&&*mbstr)
855824
{
825+
/* special UTF-8 check */
826+
if (encoding==PG_UTF8&& (*mbstr&0xf8)==0xf0)
827+
{
828+
if (noError)
829+
return false;
830+
ereport(ERROR,
831+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
832+
errmsg("Unicode characters greater than or equal to 0x10000 are not supported")));
833+
}
834+
856835
l=pg_mblen(mbstr);
857836

858-
/* special UTF-8 check */
859-
if (encoding==PG_UTF8) {
860-
if(!isLegalUTF8(mbstr,l)) {
861-
if (noError)return false;
862-
ereport(ERROR,(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),errmsg("Invalid UNICODE byte sequence detected near character %c",*mbstr)));
863-
}
864-
}else {
865-
for (i=1;i<l;i++)
837+
for (i=1;i<l;i++)
838+
{
839+
/*
840+
* we expect that every multibyte char consists of bytes
841+
* having the 8th bit set
842+
*/
843+
if (i >=len|| (mbstr[i]&0x80)==0)
866844
{
867-
/*
868-
* we expect that every multibyte char consists of bytes
869-
* having the 8th bit set
870-
*/
871-
if (i >=len|| (mbstr[i]&0x80)==0)
872-
{
873-
charbuf[8*2+1];
874-
char*p=buf;
875-
intj,
845+
charbuf[8*2+1];
846+
char*p=buf;
847+
intj,
876848
jlimit;
877849

878-
if (noError)
879-
return false;
850+
if (noError)
851+
return false;
880852

881-
jlimit=Min(l,len);
882-
jlimit=Min(jlimit,8);/* prevent buffer overrun */
853+
jlimit=Min(l,len);
854+
jlimit=Min(jlimit,8);/* prevent buffer overrun */
883855

884-
for (j=0;j<jlimit;j++)
885-
p+=sprintf(p,"%02x",mbstr[j]);
856+
for (j=0;j<jlimit;j++)
857+
p+=sprintf(p,"%02x",mbstr[j]);
886858

887-
ereport(ERROR,
888-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
889-
errmsg("invalid byte sequence for encoding \"%s\": 0x%s",
890-
GetDatabaseEncodingName(),buf)));
891-
}
859+
ereport(ERROR,
860+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
861+
errmsg("invalid byte sequence for encoding \"%s\": 0x%s",
862+
GetDatabaseEncodingName(),buf)));
892863
}
893-
894864
}
865+
895866
len-=l;
896867
mbstr+=l;
897868
}
869+
898870
return true;
899871
}
900872

‎src/include/mb/pg_wchar.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.54 2004/12/02 22:37:14 momjian Exp $ */
1+
/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.55 2004/12/03 01:20:33 momjian Exp $ */
22

33
#ifndefPG_WCHAR_H
44
#definePG_WCHAR_H
@@ -17,14 +17,6 @@
1717
*/
1818
typedefunsignedintpg_wchar;
1919

20-
21-
/*
22-
* The UTF types
23-
*/
24-
typedefunsignedintUTF32;/* at least 32 bits */
25-
typedefunsigned shortUTF16;/* at least 16 bits */
26-
typedefunsignedcharUTF8;/* typically 8 bits */
27-
2820
/*
2921
* various definitions for EUC
3022
*/
@@ -348,6 +340,4 @@ extern void mic2latin(unsigned char *mic, unsigned char *p, int len, int lc);
348340
externvoidlatin2mic_with_table(unsignedchar*l,unsignedchar*p,intlen,intlc,unsignedchar*tab);
349341
externvoidmic2latin_with_table(unsignedchar*mic,unsignedchar*p,intlen,intlc,unsignedchar*tab);
350342

351-
externboolisLegalUTF8(constUTF8*source,intlen);
352-
353343
#endif/* PG_WCHAR_H */

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp