Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit5bc33cb

Browse files
committed
Add pg_encoding_set_invalid()
There are cases where we cannot / do not want to error out for invalidlyencoded input. In such cases it can be useful to replace e.g. an incompletemulti-byte characters with bytes that will trigger an error when gettingvalidated as part of a larger string.Unfortunately, until now, for some encoding no such sequence existed. Forthose encodings this commit removes one previously accepted input combination- we consider that to be ok, as the chosen bytes are outside of the validranges for the encodings, we just previously failed to detect that.As we cannot add a new field to pg_wchar_table without breaking ABI, this isimplemented "in-line" in the newly added function.Author: Noah Misch <noah@leadboat.com>Reviewed-by: Andres Freund <andres@anarazel.de>Backpatch-through: 13Security:CVE-2025-1094
1 parent04f31c8 commit5bc33cb

File tree

7 files changed

+121
-2
lines changed

7 files changed

+121
-2
lines changed

‎src/common/wchar.c

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,25 @@
1515
#include"mb/pg_wchar.h"
1616

1717

18+
/*
19+
* In today's multibyte encodings other than UTF8, this two-byte sequence
20+
* ensures pg_encoding_mblen() == 2 && pg_encoding_verifymbstr() == 0.
21+
*
22+
* For historical reasons, several verifychar implementations opt to reject
23+
* this pair specifically. Byte pair range constraints, in encoding
24+
* originator documentation, always excluded this pair. No core conversion
25+
* could translate it. However, longstanding verifychar implementations
26+
* accepted any non-NUL byte. big5_to_euc_tw and big5_to_mic even translate
27+
* pairs not valid per encoding originator documentation. To avoid tightening
28+
* core or non-core conversions in a security patch, we sought this one pair.
29+
*
30+
* PQescapeString() historically used spaces for BYTE1; many other values
31+
* could suffice for BYTE1.
32+
*/
33+
#defineNONUTF8_INVALID_BYTE0 (0x8d)
34+
#defineNONUTF8_INVALID_BYTE1 (' ')
35+
36+
1837
/*
1938
* Operations on multi-byte encodings are driven by a table of helper
2039
* functions.
@@ -1532,6 +1551,11 @@ pg_big5_verifychar(const unsigned char *s, int len)
15321551
if (len<l)
15331552
return-1;
15341553

1554+
if (l==2&&
1555+
s[0]==NONUTF8_INVALID_BYTE0&&
1556+
s[1]==NONUTF8_INVALID_BYTE1)
1557+
return-1;
1558+
15351559
while (--l>0)
15361560
{
15371561
if (*++s=='\0')
@@ -1581,6 +1605,11 @@ pg_gbk_verifychar(const unsigned char *s, int len)
15811605
if (len<l)
15821606
return-1;
15831607

1608+
if (l==2&&
1609+
s[0]==NONUTF8_INVALID_BYTE0&&
1610+
s[1]==NONUTF8_INVALID_BYTE1)
1611+
return-1;
1612+
15841613
while (--l>0)
15851614
{
15861615
if (*++s=='\0')
@@ -1630,6 +1659,11 @@ pg_uhc_verifychar(const unsigned char *s, int len)
16301659
if (len<l)
16311660
return-1;
16321661

1662+
if (l==2&&
1663+
s[0]==NONUTF8_INVALID_BYTE0&&
1664+
s[1]==NONUTF8_INVALID_BYTE1)
1665+
return-1;
1666+
16331667
while (--l>0)
16341668
{
16351669
if (*++s=='\0')
@@ -1858,6 +1892,19 @@ pg_utf8_islegal(const unsigned char *source, int length)
18581892
}
18591893

18601894

1895+
/*
1896+
* Fills the provided buffer with two bytes such that:
1897+
* pg_encoding_mblen(dst) == 2 && pg_encoding_verifymbstr(dst) == 0
1898+
*/
1899+
void
1900+
pg_encoding_set_invalid(intencoding,char*dst)
1901+
{
1902+
Assert(pg_encoding_max_length(encoding)>1);
1903+
1904+
dst[0]= (encoding==PG_UTF8 ?0xc0 :NONUTF8_INVALID_BYTE0);
1905+
dst[1]=NONUTF8_INVALID_BYTE1;
1906+
}
1907+
18611908
/*
18621909
*-------------------------------------------------------------------
18631910
* encoding info table
@@ -1980,5 +2027,11 @@ pg_encoding_max_length(int encoding)
19802027
{
19812028
Assert(PG_VALID_ENCODING(encoding));
19822029

1983-
returnpg_wchar_table[encoding].maxmblen;
2030+
/*
2031+
* Check for the encoding despite the assert, due to some mingw versions
2032+
* otherwise issuing bogus warnings.
2033+
*/
2034+
returnPG_VALID_ENCODING(encoding) ?
2035+
pg_wchar_table[encoding].maxmblen :
2036+
pg_wchar_table[PG_SQL_ASCII].maxmblen;
19842037
}

‎src/include/mb/pg_wchar.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ typedef struct pg_enc2name
359359
#endif
360360
}pg_enc2name;
361361

362-
externconstpg_enc2namepg_enc2name_tbl[];
362+
externPGDLLIMPORTconstpg_enc2namepg_enc2name_tbl[];
363363

364364
/*
365365
* Encoding names for gettext
@@ -573,6 +573,7 @@ extern intpg_valid_server_encoding_id(int encoding);
573573
* (in addition to the ones just above). The constant tables declared
574574
* earlier in this file are also available from libpgcommon.
575575
*/
576+
externvoidpg_encoding_set_invalid(intencoding,char*dst);
576577
externintpg_encoding_mblen(intencoding,constchar*mbstr);
577578
externintpg_encoding_mblen_bounded(intencoding,constchar*mbstr);
578579
externintpg_encoding_dsplen(intencoding,constchar*mbstr);

‎src/test/regress/expected/conversion.out

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
--
22
-- create user defined conversion
33
--
4+
SELECT FROM test_enc_setup();
5+
--
6+
(1 row)
7+
48
CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
59
SET SESSION AUTHORIZATION regress_conversion_user;
610
CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;

‎src/test/regress/input/create_function_0.source

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ CREATE FUNCTION test_opclass_options_func(internal)
5959
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
6060
LANGUAGE C;
6161

62+
63+
CREATE FUNCTION test_enc_setup() RETURNS void
64+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
65+
LANGUAGE C STRICT;
66+
6267
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
6368
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
6469
LANGUAGE C STRICT;

‎src/test/regress/output/create_function_0.source

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ CREATE FUNCTION test_opclass_options_func(internal)
4646
RETURNS void
4747
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
4848
LANGUAGE C;
49+
CREATE FUNCTION test_enc_setup() RETURNS void
50+
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_setup'
51+
LANGUAGE C STRICT;
4952
CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
5053
AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
5154
LANGUAGE C STRICT;

‎src/test/regress/regress.c

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,56 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
10891089
PG_RETURN_NULL();
10901090
}
10911091

1092+
/* one-time tests for encoding infrastructure */
1093+
PG_FUNCTION_INFO_V1(test_enc_setup);
1094+
Datum
1095+
test_enc_setup(PG_FUNCTION_ARGS)
1096+
{
1097+
/* Test pg_encoding_set_invalid() */
1098+
for (inti=0;i<_PG_LAST_ENCODING_;i++)
1099+
{
1100+
charbuf[2],
1101+
bigbuf[16];
1102+
intlen,
1103+
mblen,
1104+
valid;
1105+
1106+
if (pg_encoding_max_length(i)==1)
1107+
continue;
1108+
pg_encoding_set_invalid(i,buf);
1109+
len=strnlen(buf,2);
1110+
if (len!=2)
1111+
elog(WARNING,
1112+
"official invalid string for encoding \"%s\" has length %d",
1113+
pg_enc2name_tbl[i].name,len);
1114+
mblen=pg_encoding_mblen(i,buf);
1115+
if (mblen!=2)
1116+
elog(WARNING,
1117+
"official invalid string for encoding \"%s\" has mblen %d",
1118+
pg_enc2name_tbl[i].name,mblen);
1119+
valid=pg_encoding_verifymbstr(i,buf,len);
1120+
if (valid!=0)
1121+
elog(WARNING,
1122+
"official invalid string for encoding \"%s\" has valid prefix of length %d",
1123+
pg_enc2name_tbl[i].name,valid);
1124+
valid=pg_encoding_verifymbstr(i,buf,1);
1125+
if (valid!=0)
1126+
elog(WARNING,
1127+
"first byte of official invalid string for encoding \"%s\" has valid prefix of length %d",
1128+
pg_enc2name_tbl[i].name,valid);
1129+
memset(bigbuf,' ',sizeof(bigbuf));
1130+
bigbuf[0]=buf[0];
1131+
bigbuf[1]=buf[1];
1132+
valid=pg_encoding_verifymbstr(i,bigbuf,sizeof(bigbuf));
1133+
if (valid!=0)
1134+
elog(WARNING,
1135+
"trailing data changed official invalid string for encoding \"%s\" to have valid prefix of length %d",
1136+
pg_enc2name_tbl[i].name,valid);
1137+
}
1138+
1139+
PG_RETURN_VOID();
1140+
}
1141+
10921142
/*
10931143
* Call an encoding conversion or verification function.
10941144
*

‎src/test/regress/sql/conversion.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
--
22
-- create user defined conversion
33
--
4+
5+
SELECTFROM test_enc_setup();
6+
47
CREATEUSERregress_conversion_user WITH NOCREATEDB NOCREATEROLE;
58
SET SESSION AUTHORIZATION regress_conversion_user;
69
CREATECONVERSIONmyconv FOR'LATIN1' TO'UTF8'FROM iso8859_1_to_utf8;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp