NotificationsYou must be signed in to change notification settings
Fork5
Star26

Commit0887fa1

committed

Get pg_utf_mblen(), pg_utf2wchar_with_len(), and utf2ucs() all on the same

page about the maximum UTF8 sequence length we support (4 bytes since 8.1,3 before that). pg_utf2wchar_with_len never got updated to support 4-bytecharacters at all, and in any case had a buffer-overrun risk in that itcould produce multiple pg_wchars from what mblen claims to be just one UTF8character. The only reason we don't have a major security hole is that mostcallers allocate worst-case output buffers; the sole exception in releasedversions appears to be pre-8.2 iwchareq() (ie, ILIKE), which can be crasheddue to zeroing out its return address --- but AFAICS that can't be exploitedfor anything more than a crash, due to inability to control what gets writtenthere. Per report from James Russell and Michael Fuhr.Pre-8.1 the risk is much less, but I still think pg_utf2wchar_with_len'sbehavior given an incomplete final character risks buffer overrun, soback-patch that logic change anyway.This patch also makes sure that UTF8 sequences exceeding the supportedlength (whichever it is) are consistently treated as error cases, ratherthan being treated like a valid shorter sequence in some places.

1 parent07cf99a commit0887fa1Copy full SHA for 0887fa1

File tree

1 file changed

+43

-17

lines changed

src/backend/utils/mb
- wchar.c

1 file changed

+43

-17

lines changed

`‎src/backend/utils/mb/wchar.c`

Lines changed: 43 additions & 17 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`/*`
`2`	`2`	`* conversion functions between pg_wchar and multibyte streams.`
`3`	`3`	`* Tatsuo Ishii`
`4`		`- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.58 2006/10/04 00:30:02 momjian Exp $`
	`4`	`+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.59 2007/01/24 17:12:17 tgl Exp $`
`5`	`5`	`*`
`6`	`6`	`* WIN1250 client encoding updated by Pavel Behal`
`7`	`7`	`*`
`@@ -364,46 +364,60 @@ pg_johab_dsplen(const unsigned char *s)`
`364`	`364`	`}`
`365`	`365`
`366`	`366`	`/*`
`367`		`- * convert UTF8 string to pg_wchar (UCS-2)`
`368`		`- * callershould allocate enough space for "to"`
	`367`	`+ * convert UTF8 string to pg_wchar (UCS-4)`
	`368`	`+ * callermust allocate enough space for "to", including a trailing zero!`
`369`	`369`	`* len: length of from.`
`370`	`370`	`* "from" not necessarily null terminated.`
`371`	`371`	`*/`
`372`	`372`	`staticint`
`373`	`373`	`pg_utf2wchar_with_len(constunsignedcharfrom,pg_wcharto,intlen)`
`374`	`374`	`{`
`375`		`-unsignedcharc1,`
`376`		`-c2,`
`377`		`-c3;`
`378`	`375`	`intcnt=0;`
	`376`	`+uint32c1,`
	`377`	`+c2,`
	`378`	`+c3,`
	`379`	`+c4;`
`379`	`380`
`380`	`381`	`while (len>0&&*from)`
`381`	`382`	`{`
`382`		`-if (!IS_HIGHBIT_SET(*from))`
	`383`	`+if ((*from&0x80)==0)`
`383`	`384`	`{`
`384`	`385`	`to=from++;`
`385`	`386`	`len--;`
`386`	`387`	`}`
`387`		`-elseif ((*from&0xe0)==0xc0&&len >=2)`
	`388`	`+elseif ((*from&0xe0)==0xc0)`
`388`	`389`	`{`
	`390`	`+if (len<2)`
	`391`	`+break;/* drop trailing incomplete char */`
`389`	`392`	`c1=*from++&0x1f;`
`390`	`393`	`c2=*from++&0x3f;`
`391`		`-*to=c1 <<6;`
`392`		`-*to \|=c2;`
	`394`	`+*to= (c1 <<6) \|c2;`
`393`	`395`	`len-=2;`
`394`	`396`	`}`
`395`		`-elseif ((*from&0xe0)==0xe0&&len >=3)`
	`397`	`+elseif ((*from&0xf0)==0xe0)`
`396`	`398`	`{`
	`399`	`+if (len<3)`
	`400`	`+break;/* drop trailing incomplete char */`
`397`	`401`	`c1=*from++&0x0f;`
`398`	`402`	`c2=*from++&0x3f;`
`399`	`403`	`c3=*from++&0x3f;`
`400`		`-*to=c1 <<12;`
`401`		`-*to \|=c2 <<6;`
`402`		`-*to \|=c3;`
	`404`	`+*to= (c1 <<12) \| (c2 <<6) \|c3;`
`403`	`405`	`len-=3;`
`404`	`406`	`}`
	`407`	`+elseif ((*from&0xf8)==0xf0)`
	`408`	`+{`
	`409`	`+if (len<4)`
	`410`	`+break;/* drop trailing incomplete char */`
	`411`	`+c1=*from++&0x07;`
	`412`	`+c2=*from++&0x3f;`
	`413`	`+c3=*from++&0x3f;`
	`414`	`+c4=*from++&0x3f;`
	`415`	`+*to= (c1 <<18) \| (c2 <<12) \| (c3 <<6) \|c4;`
	`416`	`+len-=4;`
	`417`	`+}`
`405`	`418`	`else`
`406`	`419`	`{`
	`420`	`+/* treat a bogus char as length 1; not ours to raise error */`
`407`	`421`	`to=from++;`
`408`	`422`	`len--;`
`409`	`423`	`}`
`@@ -415,12 +429,20 @@ pg_utf2wchar_with_len(const unsigned char from, pg_wchar to, int len)`
`415`	`429`	`}`
`416`	`430`
`417`	`431`	`/*`
`418`		`- * returns the byte length of a UTF8 character pointed to by s`
	`432`	`+ * Return the byte length of a UTF8 character pointed to by s`
	`433`	`+ *`
	`434`	`+ * Note: in the current implementation we do not support UTF8 sequences`
	`435`	`+ * of more than 4 bytes; hence do NOT return a value larger than 4.`
	`436`	`+ * We return "1" for any leading byte that is either flat-out illegal or`
	`437`	`+ * indicates a length larger than we support.`
	`438`	`+ *`
	`439`	`+ * pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps`
	`440`	`+ * other places would need to be fixed to change this.`
`419`	`441`	`*/`
`420`	`442`	`int`
`421`	`443`	`pg_utf_mblen(constunsignedchar*s)`
`422`	`444`	`{`
`423`		`-intlen=1;`
	`445`	`+intlen;`
`424`	`446`
`425`	`447`	`if ((*s&0x80)==0)`
`426`	`448`	`len=1;`
`@@ -430,10 +452,14 @@ pg_utf_mblen(const unsigned char *s)`
`430`	`452`	`len=3;`
`431`	`453`	`elseif ((*s&0xf8)==0xf0)`
`432`	`454`	`len=4;`
	`455`	`+#ifdefNOT_USED`
`433`	`456`	`elseif ((*s&0xfc)==0xf8)`
`434`	`457`	`len=5;`
`435`	`458`	`elseif ((*s&0xfe)==0xfc)`
`436`	`459`	`len=6;`
	`460`	`+#endif`
	`461`	`+else`
	`462`	`+len=1;`
`437`	`463`	`returnlen;`
`438`	`464`	`}`
`439`	`465`
`@@ -596,7 +622,7 @@ utf2ucs(const unsigned char *c)`
`596`	`622`	`return (pg_wchar) (((c[0]&0x0f) <<12) \|`
`597`	`623`	`((c[1]&0x3f) <<6) \|`
`598`	`624`	`(c[2]&0x3f));`
`599`		`-elseif ((*c&0xf0)==0xf0)`
	`625`	`+elseif ((*c&0xf8)==0xf0)`
`600`	`626`	`return (pg_wchar) (((c[0]&0x07) <<18) \|`
`601`	`627`	`((c[1]&0x3f) <<12) \|`
`602`	`628`	`((c[2]&0x3f) <<6) \|`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit0887fa1

File tree

1 file changed

1 file changed

`‎src/backend/utils/mb/wchar.c`

0 commit comments