NotificationsYou must be signed in to change notification settings
Fork5k
Star18k

Commitcbadeac

committed

With GB18030, prevent SIGSEGV from reading past end of allocation.

With GB18030 as source encoding, applications could crash the server viaSQL functions convert() or convert_from(). Applications themselvescould crash after passing unterminated GB18030 input to libpq functionsPQescapeLiteral(), PQescapeIdentifier(), PQescapeStringConn(), orPQescapeString(). Extension code could crash by passing unterminatedGB18030 input to jsonapi.h functions. All those functions have beenintended to handle untrusted, unterminated input safely.A crash required allocating the input such that the last byte of theallocation was the last byte of a virtual memory page. Some malloc()implementations take measures against that, making the SIGSEGV hard toreach. Back-patch to v13 (all supported versions).Author: Noah Misch <noah@leadboat.com>Author: Andres Freund <andres@anarazel.de>Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>Backpatch-through: 13Security:CVE-2025-4207

1 parent7279e58 commitcbadeacCopy full SHA for cbadeac

File tree

7 files changed

+171

-17

lines changed

src
- backend/utils/mb
  - mbutils.c
- common
  - jsonapi.c
  - wchar.c
- include/mb
  - pg_wchar.h
- interfaces/libpq
  - fe-exec.c
  - fe-misc.c
- test/modules/test_escape
  - test_escape.c

7 files changed

+171

-17

lines changed

`‎src/backend/utils/mb/mbutils.c`

Lines changed: 13 additions & 5 deletions

Original file line number	Diff line number	Diff line change
`@@ -971,7 +971,7 @@ pg_mbcliplen(const char *mbstr, int len, int limit)`
`971`	`971`	`}`
`972`	`972`
`973`	`973`	`/*`
`974`		`- * pg_mbcliplen with specified encoding`
	`974`	`+ * pg_mbcliplen with specified encoding; string must be valid in encoding`
`975`	`975`	`*/`
`976`	`976`	`int`
`977`	`977`	`pg_encoding_mbcliplen(intencoding,constchar*mbstr,`
`@@ -1569,12 +1569,12 @@ check_encoding_conversion_args(int src_encoding,`
`1569`	`1569`	`* report_invalid_encoding: complain about invalid multibyte character`
`1570`	`1570`	`*`
`1571`	`1571`	`* note: len is remaining length of string, not length of character;`
`1572`		`- * len must be greater than zero, as we always examine the first byte.`
	`1572`	`+ * len must be greater than zero (or we'd neglect initializing "buf").`
`1573`	`1573`	`*/`
`1574`	`1574`	`void`
`1575`	`1575`	`report_invalid_encoding(intencoding,constchar*mbstr,intlen)`
`1576`	`1576`	`{`
`1577`		`-intl=pg_encoding_mblen(encoding,mbstr);`
	`1577`	`+intl=pg_encoding_mblen_or_incomplete(encoding,mbstr,len);`
`1578`	`1578`	`charbuf[8*5+1];`
`1579`	`1579`	`char*p=buf;`
`1580`	`1580`	`intj,`
`@@ -1601,18 +1601,26 @@ report_invalid_encoding(int encoding, const char *mbstr, int len)`
`1601`	`1601`	`* report_untranslatable_char: complain about untranslatable character`
`1602`	`1602`	`*`
`1603`	`1603`	`* note: len is remaining length of string, not length of character;`
`1604`		`- * len must be greater than zero, as we always examine the first byte.`
	`1604`	`+ * len must be greater than zero (or we'd neglect initializing "buf").`
`1605`	`1605`	`*/`
`1606`	`1606`	`void`
`1607`	`1607`	`report_untranslatable_char(intsrc_encoding,intdest_encoding,`
`1608`	`1608`	`constchar*mbstr,intlen)`
`1609`	`1609`	`{`
`1610`		`-intl=pg_encoding_mblen(src_encoding,mbstr);`
	`1610`	`+intl;`
`1611`	`1611`	`charbuf[8*5+1];`
`1612`	`1612`	`char*p=buf;`
`1613`	`1613`	`intj,`
`1614`	`1614`	`jlimit;`
`1615`	`1615`
	`1616`	`+/*`
	`1617`	`+ * We probably could use plain pg_encoding_mblen(), because`
	`1618`	`+ * gb18030_to_utf8() verifies before it converts. All conversions should.`
	`1619`	`+ * For src_encoding!=GB18030, len>0 meets pg_encoding_mblen() needs. Even`
	`1620`	`+ * so, be defensive, since a buggy conversion might pass invalid data.`
	`1621`	`+ * This is not a performance-critical path.`
	`1622`	`+ */`
	`1623`	`+l=pg_encoding_mblen_or_incomplete(src_encoding,mbstr,len);`
`1616`	`1624`	`jlimit=Min(l,len);`
`1617`	`1625`	`jlimit=Min(jlimit,8);/* prevent buffer overrun */`
`1618`	`1626`

`‎src/common/jsonapi.c`

Lines changed: 5 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -698,8 +698,11 @@ json_lex_string(JsonLexContext *lex)`
`698`	`698`	`} while (0)`
`699`	`699`	`#defineFAIL_AT_CHAR_END(code) \`
`700`	`700`	`do { \`
`701`		`-char *term = s + pg_encoding_mblen(lex->input_encoding, s); \`
`702`		`-lex->token_terminator = (term <= end) ? term : end; \`
	`701`	`+ptrdiff_tremaining = end - s; \`
	`702`	`+intcharlen; \`
	`703`	`+charlen = pg_encoding_mblen_or_incomplete(lex->input_encoding, \`
	`704`	`+ s, remaining); \`
	`705`	`+lex->token_terminator = (charlen <= remaining) ? s + charlen : end; \`
`703`	`706`	`return code; \`
`704`	`707`	`} while (0)`
`705`	`708`

`‎src/common/wchar.c`

Lines changed: 45 additions & 6 deletions

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,8 @@`
`12`	`12`	`*/`
`13`	`13`	`#include"c.h"`
`14`	`14`
	`15`	`+#include<limits.h>`
	`16`	`+`
`15`	`17`	`#include"mb/pg_wchar.h"`
`16`	`18`
`17`	`19`
`@@ -1597,10 +1599,27 @@ const pg_wchar_tbl pg_wchar_table[] = {`
`1597`	`1599`	`/*`
`1598`	`1600`	`* Returns the byte length of a multibyte character.`
`1599`	`1601`	`*`
`1600`		`- * Caution: when dealing with text that is not certainly valid in the`
`1601`		`- * specified encoding, the result may exceed the actual remaining`
`1602`		`- * string length. Callers that are not prepared to deal with that`
`1603`		`- * should use pg_encoding_mblen_bounded() instead.`
	`1602`	`+ * Choose "mblen" functions based on the input string characteristics.`
	`1603`	`+ * pg_encoding_mblen() can be used when ANY of these conditions are met:`
	`1604`	`+ *`
	`1605`	`+ * - The input string is zero-terminated`
	`1606`	`+ *`
	`1607`	`+ * - The input string is known to be valid in the encoding (e.g., string`
	`1608`	`+ * converted from database encoding)`
	`1609`	`+ *`
	`1610`	`+ * - The encoding is not GB18030 (e.g., when only database encodings are`
	`1611`	`+ * passed to 'encoding' parameter)`
	`1612`	`+ *`
	`1613`	`+ * encoding==GB18030 requires examining up to two bytes to determine character`
	`1614`	`+ * length. Therefore, callers satisfying none of those conditions must use`
	`1615`	`+ * pg_encoding_mblen_or_incomplete() instead, as access to mbstr[1] cannot be`
	`1616`	`+ * guaranteed to be within allocation bounds.`
	`1617`	`+ *`
	`1618`	`+ * When dealing with text that is not certainly valid in the specified`
	`1619`	`+ * encoding, the result may exceed the actual remaining string length.`
	`1620`	`+ * Callers that are not prepared to deal with that should use Min(remaining,`
	`1621`	`+ * pg_encoding_mblen_or_incomplete()). For zero-terminated strings, that and`
	`1622`	`+ * pg_encoding_mblen_bounded() are interchangeable.`
`1604`	`1623`	`*/`
`1605`	`1624`	`int`
`1606`	`1625`	`pg_encoding_mblen(intencoding,constchar*mbstr)`
`@@ -1611,8 +1630,28 @@ pg_encoding_mblen(int encoding, const char *mbstr)`
`1611`	`1630`	`}`
`1612`	`1631`
`1613`	`1632`	`/*`
`1614`		`- * Returns the byte length of a multibyte character; but not more than`
`1615`		`- * the distance to end of string.`
	`1633`	`+ * Returns the byte length of a multibyte character (possibly not`
	`1634`	`+ * zero-terminated), or INT_MAX if too few bytes remain to determine a length.`
	`1635`	`+ */`
	`1636`	`+int`
	`1637`	`+pg_encoding_mblen_or_incomplete(intencoding,constchar*mbstr,`
	`1638`	`+size_tremaining)`
	`1639`	`+{`
	`1640`	`+/*`
	`1641`	`+ * Define zero remaining as too few, even for single-byte encodings.`
	`1642`	`+ * pg_gb18030_mblen() reads one or two bytes; single-byte encodings read`
	`1643`	`+ * zero; others read one.`
	`1644`	`+ */`
	`1645`	`+if (remaining<1\|\|`
	`1646`	`+(encoding==PG_GB18030&&IS_HIGHBIT_SET(*mbstr)&&remaining<2))`
	`1647`	`+returnINT_MAX;`
	`1648`	`+returnpg_encoding_mblen(encoding,mbstr);`
	`1649`	`+}`
	`1650`	`+`
	`1651`	`+/*`
	`1652`	`+ * Returns the byte length of a multibyte character; but not more than the`
	`1653`	`+ * distance to the terminating zero byte. For input that might lack a`
	`1654`	`+ * terminating zero, use Min(remaining, pg_encoding_mblen_or_incomplete()).`
`1616`	`1655`	`*/`
`1617`	`1656`	`int`
`1618`	`1657`	`pg_encoding_mblen_bounded(intencoding,constchar*mbstr)`

`‎src/include/mb/pg_wchar.h`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -554,6 +554,8 @@ extern intpg_valid_server_encoding_id(int encoding);`
`554`	`554`	`*/`
`555`	`555`	`externvoidpg_encoding_set_invalid(intencoding,char*dst);`
`556`	`556`	`externintpg_encoding_mblen(intencoding,constchar*mbstr);`
	`557`	`+externintpg_encoding_mblen_or_incomplete(intencoding,constchar*mbstr,`
	`558`	`+size_tremaining);`
`557`	`559`	`externintpg_encoding_mblen_bounded(intencoding,constchar*mbstr);`
`558`	`560`	`externintpg_encoding_dsplen(intencoding,constchar*mbstr);`
`559`	`561`	`externintpg_encoding_verifymb(intencoding,constchar*mbstr,intlen);`

`‎src/interfaces/libpq/fe-exec.c`

Lines changed: 4 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -3373,7 +3373,8 @@ PQescapeStringInternal(PGconn *conn,`
`3373`	`3373`	`}`
`3374`	`3374`
`3375`	`3375`	`/* Slow path for possible multibyte characters */`
`3376`		`-charlen=pg_encoding_mblen(encoding,source);`
	`3376`	`+charlen=pg_encoding_mblen_or_incomplete(encoding,`
	`3377`	`+source,remaining);`
`3377`	`3378`
`3378`	`3379`	`if (remaining<charlen\|\|`
`3379`	`3380`	`pg_encoding_verifymbchar(encoding,source,charlen)==-1)`
`@@ -3513,7 +3514,8 @@ PQescapeInternal(PGconn conn, const char str, size_t len, bool as_ident)`
`3513`	`3514`	`intcharlen;`
`3514`	`3515`
`3515`	`3516`	`/* Slow path for possible multibyte characters */`
`3516`		`-charlen=pg_encoding_mblen(conn->client_encoding,s);`
	`3517`	`+charlen=pg_encoding_mblen_or_incomplete(conn->client_encoding,`
	`3518`	`+s,remaining);`
`3517`	`3519`
`3518`	`3520`	`if (charlen>remaining)`
`3519`	`3521`	`{`

`‎src/interfaces/libpq/fe-misc.c`

Lines changed: 3 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -1227,8 +1227,9 @@ pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time)`
`1227`	`1227`	`*/`
`1228`	`1228`
`1229`	`1229`	`/*`
`1230`		`- * returns the byte length of the character beginning at s, using the`
`1231`		`- * specified encoding.`
	`1230`	`+ * Like pg_encoding_mblen(). Use this in callers that want the`
	`1231`	`+ * dynamically-linked libpq's stance on encodings, even if that means`
	`1232`	`+ * different behavior in different startups of the executable.`
`1232`	`1233`	`*/`
`1233`	`1234`	`int`
`1234`	`1235`	`PQmblen(constchar*s,intencoding)`

`‎src/test/modules/test_escape/test_escape.c`

Lines changed: 99 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@`
`12`	`12`	`#include<string.h>`
`13`	`13`	`#include<stdio.h>`
`14`	`14`
	`15`	`+#include"common/jsonapi.h"`
`15`	`16`	`#include"fe_utils/psqlscan.h"`
`16`	`17`	`#include"fe_utils/string_utils.h"`
`17`	`18`	`#include"getopt_long.h"`
`@@ -164,6 +165,91 @@ encoding_conflicts_ascii(int encoding)`
`164`	`165`	`}`
`165`	`166`
`166`	`167`
	`168`	`+/*`
	`169`	`+ * Confirm escaping doesn't read past the end of an allocation. Consider the`
	`170`	`+ * result of malloc(4096), in the absence of freelist entries satisfying the`
	`171`	`+ * allocation. On OpenBSD, reading one byte past the end of that object`
	`172`	`+ * yields SIGSEGV.`
	`173`	`+ *`
	`174`	`+ * Run this test before the program's other tests, so freelists are minimal.`
	`175`	`+ * len=4096 didn't SIGSEGV, likely due to free() calls in libpq. len=8192`
	`176`	`+ * did. Use 128 KiB, to somewhat insulate the outcome from distant new free()`
	`177`	`+ * calls and libc changes.`
	`178`	`+ */`
	`179`	`+staticvoid`
	`180`	`+test_gb18030_page_multiple(pe_test_config*tc)`
	`181`	`+{`
	`182`	`+PQExpBuffertestname;`
	`183`	`+size_tinput_len=0x20000;`
	`184`	`+char*input;`
	`185`	`+`
	`186`	`+/* prepare input */`
	`187`	`+input=pg_malloc(input_len);`
	`188`	`+memset(input,'-',input_len-1);`
	`189`	`+input[input_len-1]=0xfe;`
	`190`	`+`
	`191`	`+/* name to describe the test */`
	`192`	`+testname=createPQExpBuffer();`
	`193`	`+appendPQExpBuffer(testname,">repeat(%c, %zu)",input[0],input_len-1);`
	`194`	`+escapify(testname,input+input_len-1,1);`
	`195`	`+appendPQExpBuffer(testname,"< - GB18030 - PQescapeLiteral");`
	`196`	`+`
	`197`	`+/* test itself */`
	`198`	`+PQsetClientEncoding(tc->conn,"GB18030");`
	`199`	`+report_result(tc,PQescapeLiteral(tc->conn,input,input_len)==NULL,`
	`200`	`+testname->data,"",`
	`201`	`+"input validity vs escape success","ok");`
	`202`	`+`
	`203`	`+destroyPQExpBuffer(testname);`
	`204`	`+pg_free(input);`
	`205`	`+}`
	`206`	`+`
	`207`	`+/*`
	`208`	`+ * Confirm json parsing doesn't read past the end of an allocation. This`
	`209`	`+ * exercises wchar.c infrastructure like the true "escape" tests do, but this`
	`210`	`+ * isn't an "escape" test.`
	`211`	`+ */`
	`212`	`+staticvoid`
	`213`	`+test_gb18030_json(pe_test_config*tc)`
	`214`	`+{`
	`215`	`+PQExpBufferraw_buf;`
	`216`	`+PQExpBuffertestname;`
	`217`	`+constcharinput[]="{\"\\u\xFE";`
	`218`	`+size_tinput_len=sizeof(input)-1;`
	`219`	`+JsonLexContext*lex;`
	`220`	`+JsonSemActionsem= {0};/* no callbacks */`
	`221`	`+JsonParseErrorTypejson_error;`
	`222`	`+char*error_str;`
	`223`	`+`
	`224`	`+/* prepare input like test_one_vector_escape() does */`
	`225`	`+raw_buf=createPQExpBuffer();`
	`226`	`+appendBinaryPQExpBuffer(raw_buf,input,input_len);`
	`227`	`+appendPQExpBufferStr(raw_buf,NEVER_ACCESS_STR);`
	`228`	`+VALGRIND_MAKE_MEM_NOACCESS(&raw_buf->data[input_len],`
	`229`	`+raw_buf->len-input_len);`
	`230`	`+`
	`231`	`+/* name to describe the test */`
	`232`	`+testname=createPQExpBuffer();`
	`233`	`+appendPQExpBuffer(testname,">");`
	`234`	`+escapify(testname,input,input_len);`
	`235`	`+appendPQExpBuffer(testname,"< - GB18030 - pg_parse_json");`
	`236`	`+`
	`237`	`+/* test itself */`
	`238`	`+lex=makeJsonLexContextCstringLen(raw_buf->data,input_len,`
	`239`	`+PG_GB18030, false);`
	`240`	`+json_error=pg_parse_json(lex,&sem);`
	`241`	`+error_str=psprintf("JsonParseErrorType %d",json_error);`
	`242`	`+report_result(tc,json_error==JSON_UNICODE_ESCAPE_FORMAT,`
	`243`	`+testname->data,"",`
	`244`	`+"diagnosed",error_str);`
	`245`	`+`
	`246`	`+pfree(error_str);`
	`247`	`+pfree(lex);`
	`248`	`+destroyPQExpBuffer(testname);`
	`249`	`+destroyPQExpBuffer(raw_buf);`
	`250`	`+}`
	`251`	`+`
	`252`	`+`
`167`	`253`	`staticbool`
`168`	`254`	`escape_literal(PGconn*conn,PQExpBuffertarget,`
`169`	`255`	`constchar*unescaped,size_tunescaped_len,`
`@@ -454,8 +540,18 @@ static pe_test_vector pe_test_vectors[] =`
`454`	`540`	`* Testcases that are not null terminated for the specified input length.`
`455`	`541`	`* That's interesting to verify that escape functions don't read beyond`
`456`	`542`	`* the intended input length.`
	`543`	`+ *`
	`544`	`+ * One interesting special case is GB18030, which has the odd behaviour`
	`545`	`+ * needing to read beyond the first byte to determine the length of a`
	`546`	`+ * multi-byte character.`
`457`	`547`	`*/`
`458`	`548`	`TV_LEN("gbk","\x80",1),`
	`549`	`+TV_LEN("GB18030","\x80",1),`
	`550`	`+TV_LEN("GB18030","\x80\0",2),`
	`551`	`+TV_LEN("GB18030","\x80\x30",2),`
	`552`	`+TV_LEN("GB18030","\x80\x30\0",3),`
	`553`	`+TV_LEN("GB18030","\x80\x30\x30",3),`
	`554`	`+TV_LEN("GB18030","\x80\x30\x30\0",4),`
`459`	`555`	`TV_LEN("UTF-8","\xC3\xb6 ",1),`
`460`	`556`	`TV_LEN("UTF-8","\xC3\xb6 ",2),`
`461`	`557`	`};`
`@@ -864,6 +960,9 @@ main(int argc, char *argv[])`
`864`	`960`	`exit(1);`
`865`	`961`	`}`
`866`	`962`
	`963`	`+test_gb18030_page_multiple(&tc);`
	`964`	`+test_gb18030_json(&tc);`
	`965`	`+`
`867`	`966`	`for (inti=0;i<lengthof(pe_test_vectors);i++)`
`868`	`967`	`{`
`869`	`968`	`test_one_vector(&tc,&pe_test_vectors[i]);`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitcbadeac

File tree

7 files changed

7 files changed

`‎src/backend/utils/mb/mbutils.c`

`‎src/common/jsonapi.c`

`‎src/common/wchar.c`

`‎src/include/mb/pg_wchar.h`

`‎src/interfaces/libpq/fe-exec.c`

`‎src/interfaces/libpq/fe-misc.c`

`‎src/test/modules/test_escape/test_escape.c`

0 commit comments