NotificationsYou must be signed in to change notification settings
Fork4.9k
Star17.7k

Commit5bc33cb

committed

Add pg_encoding_set_invalid()

There are cases where we cannot / do not want to error out for invalidlyencoded input. In such cases it can be useful to replace e.g. an incompletemulti-byte characters with bytes that will trigger an error when gettingvalidated as part of a larger string.Unfortunately, until now, for some encoding no such sequence existed. Forthose encodings this commit removes one previously accepted input combination- we consider that to be ok, as the chosen bytes are outside of the validranges for the encodings, we just previously failed to detect that.As we cannot add a new field to pg_wchar_table without breaking ABI, this isimplemented "in-line" in the newly added function.Author: Noah Misch <noah@leadboat.com>Reviewed-by: Andres Freund <andres@anarazel.de>Backpatch-through: 13Security:CVE-2025-1094

1 parent04f31c8 commit5bc33cbCopy full SHA for 5bc33cb

File tree

7 files changed

+121

-2

lines changed

src
- common
  - wchar.c
- include/mb
  - pg_wchar.h
- test/regress
  - expected
    - conversion.out
  - input
    - create_function_0.source
  - output
    - create_function_0.source
  - regress.c
  - sql
    - conversion.sql

7 files changed

+121

-2

lines changed

`‎src/common/wchar.c`

Lines changed: 54 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,25 @@`
`15`	`15`	`#include"mb/pg_wchar.h"`
`16`	`16`
`17`	`17`
	`18`	`+/*`
	`19`	`+ * In today's multibyte encodings other than UTF8, this two-byte sequence`
	`20`	`+ * ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.`
	`21`	`+ *`
	`22`	`+ * For historical reasons, several verifychar implementations opt to reject`
	`23`	`+ * this pair specifically. Byte pair range constraints, in encoding`
	`24`	`+ * originator documentation, always excluded this pair. No core conversion`
	`25`	`+ * could translate it. However, longstanding verifychar implementations`
	`26`	`+ * accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate`
	`27`	`+ * pairs not valid per encoding originator documentation. To avoid tightening`
	`28`	`+ * core or non-core conversions in a security patch, we sought this one pair.`
	`29`	`+ *`
	`30`	`+ * PQescapeString() historically used spaces for BYTE1; many other values`
	`31`	`+ * could suffice for BYTE1.`
	`32`	`+ */`
	`33`	`+#defineNONUTF8_INVALID_BYTE0 (0x8d)`
	`34`	`+#defineNONUTF8_INVALID_BYTE1 (' ')`
	`35`	`+`
	`36`	`+`
`18`	`37`	`/*`
`19`	`38`	`* Operations on multi-byte encodings are driven by a table of helper`
`20`	`39`	`* functions.`
`@@ -1532,6 +1551,11 @@ pg_big5_verifychar(const unsigned char *s, int len)`
`1532`	`1551`	`if (len<l)`
`1533`	`1552`	`return-1;`
`1534`	`1553`
	`1554`	`+if (l==2&&`
	`1555`	`+s[0]==NONUTF8_INVALID_BYTE0&&`
	`1556`	`+s[1]==NONUTF8_INVALID_BYTE1)`
	`1557`	`+return-1;`
	`1558`	`+`
`1535`	`1559`	`while (--l>0)`
`1536`	`1560`	`{`
`1537`	`1561`	`if (*++s=='\0')`
`@@ -1581,6 +1605,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)`
`1581`	`1605`	`if (len<l)`
`1582`	`1606`	`return-1;`
`1583`	`1607`
	`1608`	`+if (l==2&&`
	`1609`	`+s[0]==NONUTF8_INVALID_BYTE0&&`
	`1610`	`+s[1]==NONUTF8_INVALID_BYTE1)`
	`1611`	`+return-1;`
	`1612`	`+`
`1584`	`1613`	`while (--l>0)`
`1585`	`1614`	`{`
`1586`	`1615`	`if (*++s=='\0')`
`@@ -1630,6 +1659,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)`
`1630`	`1659`	`if (len<l)`
`1631`	`1660`	`return-1;`
`1632`	`1661`
	`1662`	`+if (l==2&&`
	`1663`	`+s[0]==NONUTF8_INVALID_BYTE0&&`
	`1664`	`+s[1]==NONUTF8_INVALID_BYTE1)`
	`1665`	`+return-1;`
	`1666`	`+`
`1633`	`1667`	`while (--l>0)`
`1634`	`1668`	`{`
`1635`	`1669`	`if (*++s=='\0')`
`@@ -1858,6 +1892,19 @@ pg_utf8_islegal(const unsigned char *source, int length)`
`1858`	`1892`	`}`
`1859`	`1893`
`1860`	`1894`
	`1895`	`+/*`
	`1896`	`+ * Fills the provided buffer with two bytes such that:`
	`1897`	`+ * pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0`
	`1898`	`+ */`
	`1899`	`+void`
	`1900`	`+pg_encoding_set_invalid(intencoding,char*dst)`
	`1901`	`+{`
	`1902`	`+Assert(pg_encoding_max_length(encoding)>1);`
	`1903`	`+`
	`1904`	`+dst[0]= (encoding==PG_UTF8 ?0xc0 :NONUTF8_INVALID_BYTE0);`
	`1905`	`+dst[1]=NONUTF8_INVALID_BYTE1;`
	`1906`	`+}`
	`1907`	`+`
`1861`	`1908`	`/*`
`1862`	`1909`	`*-------------------------------------------------------------------`
`1863`	`1910`	`* encoding info table`
`@@ -1980,5 +2027,11 @@ pg_encoding_max_length(int encoding)`
`1980`	`2027`	`{`
`1981`	`2028`	`Assert(PG_VALID_ENCODING(encoding));`
`1982`	`2029`
`1983`		`-returnpg_wchar_table[encoding].maxmblen;`
	`2030`	`+/*`
	`2031`	`+ * Check for the encoding despite the assert, due to some mingw versions`
	`2032`	`+ * otherwise issuing bogus warnings.`
	`2033`	`+ */`
	`2034`	`+returnPG_VALID_ENCODING(encoding) ?`
	`2035`	`+pg_wchar_table[encoding].maxmblen :`
	`2036`	`+pg_wchar_table[PG_SQL_ASCII].maxmblen;`
`1984`	`2037`	`}`

`‎src/include/mb/pg_wchar.h`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,7 @@ typedef struct pg_enc2name`
`359`	`359`	`#endif`
`360`	`360`	`}pg_enc2name;`
`361`	`361`
`362`		`-externconstpg_enc2namepg_enc2name_tbl[];`
	`362`	`+externPGDLLIMPORTconstpg_enc2namepg_enc2name_tbl[];`
`363`	`363`
`364`	`364`	`/*`
`365`	`365`	`* Encoding names for gettext`
`@@ -573,6 +573,7 @@ extern intpg_valid_server_encoding_id(int encoding);`
`573`	`573`	`* (in addition to the ones just above). The constant tables declared`
`574`	`574`	`* earlier in this file are also available from libpgcommon.`
`575`	`575`	`*/`
	`576`	`+externvoidpg_encoding_set_invalid(intencoding,char*dst);`
`576`	`577`	`externintpg_encoding_mblen(intencoding,constchar*mbstr);`
`577`	`578`	`externintpg_encoding_mblen_bounded(intencoding,constchar*mbstr);`
`578`	`579`	`externintpg_encoding_dsplen(intencoding,constchar*mbstr);`

`‎src/test/regress/expected/conversion.out`

Lines changed: 4 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,10 @@`
`1`	`1`	`--`
`2`	`2`	`-- create user defined conversion`
`3`	`3`	`--`
	`4`	`+SELECT FROM test_enc_setup();`
	`5`	`+--`
	`6`	`+(1 row)`
	`7`	`+`
`4`	`8`	`CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;`
`5`	`9`	`SET SESSION AUTHORIZATION regress_conversion_user;`
`6`	`10`	`CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;`

`‎src/test/regress/input/create_function_0.source`

Lines changed: 5 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -59,6 +59,11 @@ CREATE FUNCTION test_opclass_options_func(internal)`
`59`	`59`	`AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'`
`60`	`60`	`LANGUAGE C;`
`61`	`61`
	`62`	`+`
	`63`	`+CREATE FUNCTION test_enc_setup() RETURNS void`
	`64`	`+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'`
	`65`	`+ LANGUAGE C STRICT;`
	`66`	`+`
`62`	`67`	`CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)`
`63`	`68`	`AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'`
`64`	`69`	`LANGUAGE C STRICT;`

`‎src/test/regress/output/create_function_0.source`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,9 @@ CREATE FUNCTION test_opclass_options_func(internal)`
`46`	`46`	`RETURNS void`
`47`	`47`	`AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'`
`48`	`48`	`LANGUAGE C;`
	`49`	`+CREATE FUNCTION test_enc_setup() RETURNS void`
	`50`	`+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'`
	`51`	`+ LANGUAGE C STRICT;`
`49`	`52`	`CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)`
`50`	`53`	`AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'`
`51`	`54`	`LANGUAGE C STRICT;`

`‎src/test/regress/regress.c`

Lines changed: 50 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1089,6 +1089,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)`
`1089`	`1089`	`PG_RETURN_NULL();`
`1090`	`1090`	`}`
`1091`	`1091`
	`1092`	`+/* one-time tests for encoding infrastructure */`
	`1093`	`+PG_FUNCTION_INFO_V1(test_enc_setup);`
	`1094`	`+Datum`
	`1095`	`+test_enc_setup(PG_FUNCTION_ARGS)`
	`1096`	`+{`
	`1097`	`+/* Test pg_encoding_set_invalid() */`
	`1098`	`+for (inti=0;i<_PG_LAST_ENCODING_;i++)`
	`1099`	`+{`
	`1100`	`+charbuf[2],`
	`1101`	`+bigbuf[16];`
	`1102`	`+intlen,`
	`1103`	`+mblen,`
	`1104`	`+valid;`
	`1105`	`+`
	`1106`	`+if (pg_encoding_max_length(i)==1)`
	`1107`	`+continue;`
	`1108`	`+pg_encoding_set_invalid(i,buf);`
	`1109`	`+len=strnlen(buf,2);`
	`1110`	`+if (len!=2)`
	`1111`	`+elog(WARNING,`
	`1112`	`+"official invalid string for encoding \"%s\" has length %d",`
	`1113`	`+pg_enc2name_tbl[i].name,len);`
	`1114`	`+mblen=pg_encoding_mblen(i,buf);`
	`1115`	`+if (mblen!=2)`
	`1116`	`+elog(WARNING,`
	`1117`	`+"official invalid string for encoding \"%s\" has mblen %d",`
	`1118`	`+pg_enc2name_tbl[i].name,mblen);`
	`1119`	`+valid=pg_encoding_verifymbstr(i,buf,len);`
	`1120`	`+if (valid!=0)`
	`1121`	`+elog(WARNING,`
	`1122`	`+"official invalid string for encoding \"%s\" has valid prefix of length %d",`
	`1123`	`+pg_enc2name_tbl[i].name,valid);`
	`1124`	`+valid=pg_encoding_verifymbstr(i,buf,1);`
	`1125`	`+if (valid!=0)`
	`1126`	`+elog(WARNING,`
	`1127`	`+"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",`
	`1128`	`+pg_enc2name_tbl[i].name,valid);`
	`1129`	`+memset(bigbuf,' ',sizeof(bigbuf));`
	`1130`	`+bigbuf[0]=buf[0];`
	`1131`	`+bigbuf[1]=buf[1];`
	`1132`	`+valid=pg_encoding_verifymbstr(i,bigbuf,sizeof(bigbuf));`
	`1133`	`+if (valid!=0)`
	`1134`	`+elog(WARNING,`
	`1135`	`+"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",`
	`1136`	`+pg_enc2name_tbl[i].name,valid);`
	`1137`	`+}`
	`1138`	`+`
	`1139`	`+PG_RETURN_VOID();`
	`1140`	`+}`
	`1141`	`+`
`1092`	`1142`	`/*`
`1093`	`1143`	`* Call an encoding conversion or verification function.`
`1094`	`1144`	`*`

`‎src/test/regress/sql/conversion.sql`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,9 @@`
`1`	`1`	`--`
`2`	`2`	`-- create user defined conversion`
`3`	`3`	`--`
	`4`	`+`
	`5`	`+SELECTFROM test_enc_setup();`
	`6`	`+`
`4`	`7`	`CREATEUSERregress_conversion_user WITH NOCREATEDB NOCREATEROLE;`
`5`	`8`	`SET SESSION AUTHORIZATION regress_conversion_user;`
`6`	`9`	`CREATECONVERSIONmyconv FOR'LATIN1' TO'UTF8'FROM iso8859_1_to_utf8;`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit5bc33cb

File tree

7 files changed

7 files changed

`‎src/common/wchar.c`

`‎src/include/mb/pg_wchar.h`

`‎src/test/regress/expected/conversion.out`

`‎src/test/regress/input/create_function_0.source`

`‎src/test/regress/output/create_function_0.source`

`‎src/test/regress/regress.c`

`‎src/test/regress/sql/conversion.sql`

0 commit comments