Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit8b528ca

Browse files
[3.10]gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set asthe object attribute of UnicodeDecodeError, and that bytes object thenreplaces the original data. A pointer to the decoded data will became invalidafter destroying that temporary bytes object. So we need other way to returnthe first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal()._PyBytes_DecodeEscape() does not have such issue, because it does notuse the error handlers registry, but it should be changed for compatibilitywith _PyUnicode_DecodeUnicodeEscapeInternal().(cherry picked from commit9f69a58)(cherry picked from commit6279eb8)(cherry picked from commita75953b)(cherry picked from commit0c33e5b)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent6322edd commit8b528ca

File tree

8 files changed

+165
-41
lines changed

8 files changed

+165
-41
lines changed

‎Include/cpython/bytesobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
2525
intuse_bytearray);
2626

2727
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
28+
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape2(constchar*,Py_ssize_t,
29+
constchar*,
30+
int*,constchar**);
31+
// Export for binary compatibility.
2832
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape(constchar*,Py_ssize_t,
2933
constchar*,constchar**);
3034

‎Include/cpython/unicodeobject.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
844844

845845
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
846846
chars.*/
847+
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
848+
constchar *string,/* Unicode-Escape encoded string*/
849+
Py_ssize_t length,/* size of string*/
850+
constchar *errors,/* error handling*/
851+
Py_ssize_t *consumed,/* bytes consumed*/
852+
int *first_invalid_escape_char,/* on return, if not -1, contain the first
853+
invalid escaped char (<= 0xff) or invalid
854+
octal escape (> 0xff) in string.*/
855+
constchar **first_invalid_escape_ptr);/* on return, if not NULL, may
856+
point to the first invalid escaped
857+
char in string.
858+
May be NULL if errors is not NULL.*/
859+
// Export for binary compatibility.
847860
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
848861
constchar *string,/* Unicode-Escape encoded string*/
849862
Py_ssize_t length,/* size of string*/

‎Lib/test/test_codeccallbacks.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importcodecs
22
importhtml.entities
33
importitertools
4+
importre
45
importsys
56
importunicodedata
67
importunittest
@@ -1124,7 +1125,7 @@ def test_bug828737(self):
11241125
text='abc<def>ghi'*n
11251126
text.translate(charmap)
11261127

1127-
deftest_mutatingdecodehandler(self):
1128+
deftest_mutating_decode_handler(self):
11281129
baddata= [
11291130
("ascii",b"\xff"),
11301131
("utf-7",b"++"),
@@ -1159,6 +1160,40 @@ def mutating(exc):
11591160
for (encoding,data)inbaddata:
11601161
self.assertEqual(data.decode(encoding,"test.mutating"),"\u4242")
11611162

1163+
deftest_mutating_decode_handler_unicode_escape(self):
1164+
decode=codecs.unicode_escape_decode
1165+
defmutating(exc):
1166+
ifisinstance(exc,UnicodeDecodeError):
1167+
r=data.get(exc.object[:exc.end])
1168+
ifrisnotNone:
1169+
exc.object=r[0]+exc.object[exc.end:]
1170+
return ('\u0404',r[1])
1171+
raiseAssertionError("don't know how to handle %r"%exc)
1172+
1173+
codecs.register_error('test.mutating2',mutating)
1174+
data= {
1175+
br'\x0': (b'\\',0),
1176+
br'\x3': (b'xxx\\',3),
1177+
br'\x5': (b'x\\',1),
1178+
}
1179+
defcheck(input,expected,msg):
1180+
withself.assertWarns(DeprecationWarning)ascm:
1181+
self.assertEqual(decode(input,'test.mutating2'), (expected,len(input)))
1182+
self.assertIn(msg,str(cm.warning))
1183+
1184+
check(br'\x0n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1185+
check(br'\x0z','\u0404\\z',r"invalid escape sequence '\z'")
1186+
1187+
check(br'\x3n\zr','\u0404\n\\zr',r"invalid escape sequence '\z'")
1188+
check(br'\x3zr','\u0404\\zr',r"invalid escape sequence '\z'")
1189+
check(br'\x3z5','\u0404\\z5',r"invalid escape sequence '\z'")
1190+
check(memoryview(br'\x3z5x')[:-1],'\u0404\\z5',r"invalid escape sequence '\z'")
1191+
check(memoryview(br'\x3z5xy')[:-2],'\u0404\\z5',r"invalid escape sequence '\z'")
1192+
1193+
check(br'\x5n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1194+
check(br'\x5z','\u0404\\z',r"invalid escape sequence '\z'")
1195+
check(memoryview(br'\x5zy')[:-1],'\u0404\\z',r"invalid escape sequence '\z'")
1196+
11621197
# issue32583
11631198
deftest_crashing_decode_handler(self):
11641199
# better generating one more character to fill the extra space slot

‎Lib/test/test_codecs.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,20 +1181,32 @@ def test_escape(self):
11811181
check(br"[\501]",b"[A]")
11821182
check(br"[\x41]",b"[A]")
11831183
check(br"[\x410]",b"[A0]")
1184+
1185+
deftest_warnings(self):
1186+
decode=codecs.escape_decode
1187+
check=coding_checker(self,decode)
11841188
foriinrange(97,123):
11851189
b=bytes([i])
11861190
ifbnotinb'abfnrtvx':
1187-
withself.assertWarns(DeprecationWarning):
1191+
withself.assertWarnsRegex(DeprecationWarning,
1192+
r"invalid escape sequence '\\%c'"%i):
11881193
check(b"\\"+b,b"\\"+b)
1189-
withself.assertWarns(DeprecationWarning):
1194+
withself.assertWarnsRegex(DeprecationWarning,
1195+
r"invalid escape sequence '\\%c'"% (i-32)):
11901196
check(b"\\"+b.upper(),b"\\"+b.upper())
1191-
withself.assertWarns(DeprecationWarning):
1197+
withself.assertWarnsRegex(DeprecationWarning,
1198+
r"invalid escape sequence '\\8'"):
11921199
check(br"\8",b"\\8")
11931200
withself.assertWarns(DeprecationWarning):
11941201
check(br"\9",b"\\9")
1195-
withself.assertWarns(DeprecationWarning):
1202+
withself.assertWarnsRegex(DeprecationWarning,
1203+
r"invalid escape sequence '\\\xfa'")ascm:
11961204
check(b"\\\xfa",b"\\\xfa")
11971205

1206+
withself.assertWarnsRegex(DeprecationWarning,
1207+
r"invalid escape sequence '\\z'"):
1208+
self.assertEqual(decode(br'\x\z','ignore'), (b'\\z',4))
1209+
11981210
deftest_errors(self):
11991211
decode=codecs.escape_decode
12001212
self.assertRaises(ValueError,decode,br"\x")
@@ -2408,20 +2420,31 @@ def test_escape_decode(self):
24082420
check(br"[\x410]","[A0]")
24092421
check(br"\u20ac","\u20ac")
24102422
check(br"\U0001d120","\U0001d120")
2423+
2424+
deftest_decode_warnings(self):
2425+
decode=codecs.unicode_escape_decode
2426+
check=coding_checker(self,decode)
24112427
foriinrange(97,123):
24122428
b=bytes([i])
24132429
ifbnotinb'abfnrtuvx':
2414-
withself.assertWarns(DeprecationWarning):
2430+
withself.assertWarnsRegex(DeprecationWarning,
2431+
r"invalid escape sequence '\\%c'"%i):
24152432
check(b"\\"+b,"\\"+chr(i))
24162433
ifb.upper()notinb'UN':
2417-
withself.assertWarns(DeprecationWarning):
2434+
withself.assertWarnsRegex(DeprecationWarning,
2435+
r"invalid escape sequence '\\%c'"% (i-32)):
24182436
check(b"\\"+b.upper(),"\\"+chr(i-32))
2419-
withself.assertWarns(DeprecationWarning):
2437+
withself.assertWarnsRegex(DeprecationWarning,
2438+
r"invalid escape sequence '\\8'"):
24202439
check(br"\8","\\8")
24212440
withself.assertWarns(DeprecationWarning):
24222441
check(br"\9","\\9")
2423-
withself.assertWarns(DeprecationWarning):
2442+
withself.assertWarnsRegex(DeprecationWarning,
2443+
r"invalid escape sequence '\\\xfa'")ascm:
24242444
check(b"\\\xfa","\\\xfa")
2445+
withself.assertWarnsRegex(DeprecationWarning,
2446+
r"invalid escape sequence '\\z'"):
2447+
self.assertEqual(decode(br'\x\z','ignore'), ('\\z',4))
24252448

24262449
deftest_decode_errors(self):
24272450
decode=codecs.unicode_escape_decode
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
2+
handler.

‎Objects/bytesobject.c

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,10 +1089,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
10891089
}
10901090

10911091
/* Unescape a backslash-escaped string. */
1092-
PyObject*_PyBytes_DecodeEscape(constchar*s,
1092+
PyObject*_PyBytes_DecodeEscape2(constchar*s,
10931093
Py_ssize_tlen,
10941094
constchar*errors,
1095-
constchar**first_invalid_escape)
1095+
int*first_invalid_escape_char,
1096+
constchar**first_invalid_escape_ptr)
10961097
{
10971098
intc;
10981099
char*p;
@@ -1106,7 +1107,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11061107
returnNULL;
11071108
writer.overallocate=1;
11081109

1109-
*first_invalid_escape=NULL;
1110+
*first_invalid_escape_char=-1;
1111+
*first_invalid_escape_ptr=NULL;
11101112

11111113
end=s+len;
11121114
while (s<end) {
@@ -1181,9 +1183,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11811183
break;
11821184

11831185
default:
1184-
if (*first_invalid_escape==NULL) {
1185-
*first_invalid_escape=s-1;/* Back up one char, since we've
1186-
already incremented s. */
1186+
if (*first_invalid_escape_char==-1) {
1187+
*first_invalid_escape_char= (unsignedchar)s[-1];
1188+
/* Back up one char, since we've already incremented s. */
1189+
*first_invalid_escape_ptr=s-1;
11871190
}
11881191
*p++='\\';
11891192
s--;
@@ -1197,21 +1200,36 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11971200
returnNULL;
11981201
}
11991202

1203+
// Export for binary compatibility.
1204+
PyObject*_PyBytes_DecodeEscape(constchar*s,
1205+
Py_ssize_tlen,
1206+
constchar*errors,
1207+
constchar**first_invalid_escape)
1208+
{
1209+
intfirst_invalid_escape_char;
1210+
return_PyBytes_DecodeEscape2(
1211+
s,len,errors,
1212+
&first_invalid_escape_char,
1213+
first_invalid_escape);
1214+
}
1215+
12001216
PyObject*PyBytes_DecodeEscape(constchar*s,
12011217
Py_ssize_tlen,
12021218
constchar*errors,
12031219
Py_ssize_tPy_UNUSED(unicode),
12041220
constchar*Py_UNUSED(recode_encoding))
12051221
{
1206-
constchar*first_invalid_escape;
1207-
PyObject*result=_PyBytes_DecodeEscape(s,len,errors,
1208-
&first_invalid_escape);
1222+
intfirst_invalid_escape_char;
1223+
constchar*first_invalid_escape_ptr;
1224+
PyObject*result=_PyBytes_DecodeEscape2(s,len,errors,
1225+
&first_invalid_escape_char,
1226+
&first_invalid_escape_ptr);
12091227
if (result==NULL)
12101228
returnNULL;
1211-
if (first_invalid_escape!=NULL) {
1229+
if (first_invalid_escape_char!=-1) {
12121230
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
12131231
"invalid escape sequence '\\%c'",
1214-
(unsignedchar)*first_invalid_escape)<0) {
1232+
first_invalid_escape_char)<0) {
12151233
Py_DECREF(result);
12161234
returnNULL;
12171235
}

‎Objects/unicodeobject.c

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6432,20 +6432,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
64326432
static_PyUnicode_Name_CAPI*ucnhash_capi=NULL;
64336433

64346434
PyObject*
6435-
_PyUnicode_DecodeUnicodeEscapeInternal(constchar*s,
6435+
_PyUnicode_DecodeUnicodeEscapeInternal2(constchar*s,
64366436
Py_ssize_tsize,
64376437
constchar*errors,
64386438
Py_ssize_t*consumed,
6439-
constchar**first_invalid_escape)
6439+
int*first_invalid_escape_char,
6440+
constchar**first_invalid_escape_ptr)
64406441
{
64416442
constchar*starts=s;
6443+
constchar*initial_starts=starts;
64426444
_PyUnicodeWriterwriter;
64436445
constchar*end;
64446446
PyObject*errorHandler=NULL;
64456447
PyObject*exc=NULL;
64466448

64476449
// so we can remember if we've seen an invalid escape char or not
6448-
*first_invalid_escape=NULL;
6450+
*first_invalid_escape_char=-1;
6451+
*first_invalid_escape_ptr=NULL;
64496452

64506453
if (size==0) {
64516454
if (consumed) {
@@ -6628,9 +6631,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
66286631
gotoerror;
66296632

66306633
default:
6631-
if (*first_invalid_escape==NULL) {
6632-
*first_invalid_escape=s-1;/* Back up one char, since we've
6633-
already incremented s. */
6634+
if (*first_invalid_escape_char==-1) {
6635+
*first_invalid_escape_char=c;
6636+
if (starts==initial_starts) {
6637+
/* Back up one char, since we've already incremented s. */
6638+
*first_invalid_escape_ptr=s-1;
6639+
}
66346640
}
66356641
WRITE_ASCII_CHAR('\\');
66366642
WRITE_CHAR(c);
@@ -6669,22 +6675,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
66696675
returnNULL;
66706676
}
66716677

6678+
// Export for binary compatibility.
6679+
PyObject*
6680+
_PyUnicode_DecodeUnicodeEscapeInternal(constchar*s,
6681+
Py_ssize_tsize,
6682+
constchar*errors,
6683+
Py_ssize_t*consumed,
6684+
constchar**first_invalid_escape)
6685+
{
6686+
intfirst_invalid_escape_char;
6687+
return_PyUnicode_DecodeUnicodeEscapeInternal2(
6688+
s,size,errors,consumed,
6689+
&first_invalid_escape_char,
6690+
first_invalid_escape);
6691+
}
6692+
66726693
PyObject*
66736694
_PyUnicode_DecodeUnicodeEscapeStateful(constchar*s,
66746695
Py_ssize_tsize,
66756696
constchar*errors,
66766697
Py_ssize_t*consumed)
66776698
{
6678-
constchar*first_invalid_escape;
6679-
PyObject*result=_PyUnicode_DecodeUnicodeEscapeInternal(s,size,errors,
6699+
intfirst_invalid_escape_char;
6700+
constchar*first_invalid_escape_ptr;
6701+
PyObject*result=_PyUnicode_DecodeUnicodeEscapeInternal2(s,size,errors,
66806702
consumed,
6681-
&first_invalid_escape);
6703+
&first_invalid_escape_char,
6704+
&first_invalid_escape_ptr);
66826705
if (result==NULL)
66836706
returnNULL;
6684-
if (first_invalid_escape!=NULL) {
6707+
if (first_invalid_escape_char!=-1) {
66856708
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
66866709
"invalid escape sequence '\\%c'",
6687-
(unsignedchar)*first_invalid_escape)<0) {
6710+
first_invalid_escape_char)<0) {
66886711
Py_DECREF(result);
66896712
returnNULL;
66906713
}

‎Parser/string_parser.c

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,15 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
114114
len=p-buf;
115115
s=buf;
116116

117-
constchar*first_invalid_escape;
118-
v=_PyUnicode_DecodeUnicodeEscapeInternal(s,len,NULL,NULL,&first_invalid_escape);
119-
120-
if (v!=NULL&&first_invalid_escape!=NULL) {
121-
if (warn_invalid_escape_sequence(parser,*first_invalid_escape,t)<0) {
122-
/* We have not decref u before because first_invalid_escape points
117+
intfirst_invalid_escape_char;
118+
constchar*first_invalid_escape_ptr;
119+
v=_PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len,NULL,NULL,
120+
&first_invalid_escape_char,
121+
&first_invalid_escape_ptr);
122+
123+
if (v!=NULL&&first_invalid_escape_ptr!=NULL) {
124+
if (warn_invalid_escape_sequence(parser,*first_invalid_escape_ptr,t)<0) {
125+
/* We have not decref u before because first_invalid_escape_ptr points
123126
inside u. */
124127
Py_XDECREF(u);
125128
Py_DECREF(v);
@@ -133,14 +136,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
133136
staticPyObject*
134137
decode_bytes_with_escapes(Parser*p,constchar*s,Py_ssize_tlen,Token*t)
135138
{
136-
constchar*first_invalid_escape;
137-
PyObject*result=_PyBytes_DecodeEscape(s,len,NULL,&first_invalid_escape);
139+
intfirst_invalid_escape_char;
140+
constchar*first_invalid_escape_ptr;
141+
PyObject*result=_PyBytes_DecodeEscape2(s,len,NULL,
142+
&first_invalid_escape_char,
143+
&first_invalid_escape_ptr);
138144
if (result==NULL) {
139145
returnNULL;
140146
}
141147

142-
if (first_invalid_escape!=NULL) {
143-
if (warn_invalid_escape_sequence(p,*first_invalid_escape,t)<0) {
148+
if (first_invalid_escape_ptr!=NULL) {
149+
if (warn_invalid_escape_sequence(p,*first_invalid_escape_ptr,t)<0) {
144150
Py_DECREF(result);
145151
returnNULL;
146152
}

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp