Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit0c33e5b

Browse files
[3.11]gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set asthe object attribute of UnicodeDecodeError, and that bytes object thenreplaces the original data. A pointer to the decoded data will became invalidafter destroying that temporary bytes object. So we need other way to returnthe first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal()._PyBytes_DecodeEscape() does not have such issue, because it does notuse the error handlers registry, but it should be changed for compatibilitywith _PyUnicode_DecodeUnicodeEscapeInternal().(cherry picked from commit9f69a58)(cherry picked from commit6279eb8)(cherry picked from commita75953b)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent461ca2c commit0c33e5b

File tree

8 files changed

+198
-57
lines changed

8 files changed

+198
-57
lines changed

‎Include/cpython/bytesobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
2525
intuse_bytearray);
2626

2727
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
28+
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape2(constchar*,Py_ssize_t,
29+
constchar*,
30+
int*,constchar**);
31+
// Export for binary compatibility.
2832
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape(constchar*,Py_ssize_t,
2933
constchar*,constchar**);
3034

‎Include/cpython/unicodeobject.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
914914
);
915915
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
916916
chars.*/
917+
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
918+
constchar *string,/* Unicode-Escape encoded string*/
919+
Py_ssize_t length,/* size of string*/
920+
constchar *errors,/* error handling*/
921+
Py_ssize_t *consumed,/* bytes consumed*/
922+
int *first_invalid_escape_char,/* on return, if not -1, contain the first
923+
invalid escaped char (<= 0xff) or invalid
924+
octal escape (> 0xff) in string.*/
925+
constchar **first_invalid_escape_ptr);/* on return, if not NULL, may
926+
point to the first invalid escaped
927+
char in string.
928+
May be NULL if errors is not NULL.*/
929+
// Export for binary compatibility.
917930
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
918931
constchar *string,/* Unicode-Escape encoded string*/
919932
Py_ssize_t length,/* size of string*/

‎Lib/test/test_codeccallbacks.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importcodecs
22
importhtml.entities
33
importitertools
4+
importre
45
importsys
56
importunicodedata
67
importunittest
@@ -1124,7 +1125,7 @@ def test_bug828737(self):
11241125
text='abc<def>ghi'*n
11251126
text.translate(charmap)
11261127

1127-
deftest_mutatingdecodehandler(self):
1128+
deftest_mutating_decode_handler(self):
11281129
baddata= [
11291130
("ascii",b"\xff"),
11301131
("utf-7",b"++"),
@@ -1159,6 +1160,42 @@ def mutating(exc):
11591160
for (encoding,data)inbaddata:
11601161
self.assertEqual(data.decode(encoding,"test.mutating"),"\u4242")
11611162

1163+
deftest_mutating_decode_handler_unicode_escape(self):
1164+
decode=codecs.unicode_escape_decode
1165+
defmutating(exc):
1166+
ifisinstance(exc,UnicodeDecodeError):
1167+
r=data.get(exc.object[:exc.end])
1168+
ifrisnotNone:
1169+
exc.object=r[0]+exc.object[exc.end:]
1170+
return ('\u0404',r[1])
1171+
raiseAssertionError("don't know how to handle %r"%exc)
1172+
1173+
codecs.register_error('test.mutating2',mutating)
1174+
data= {
1175+
br'\x0': (b'\\',0),
1176+
br'\x3': (b'xxx\\',3),
1177+
br'\x5': (b'x\\',1),
1178+
}
1179+
defcheck(input,expected,msg):
1180+
withself.assertWarns(DeprecationWarning)ascm:
1181+
self.assertEqual(decode(input,'test.mutating2'), (expected,len(input)))
1182+
self.assertIn(msg,str(cm.warning))
1183+
1184+
check(br'\x0n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1185+
check(br'\x0n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1186+
check(br'\x0z','\u0404\\z',r"invalid escape sequence '\z'")
1187+
1188+
check(br'\x3n\zr','\u0404\n\\zr',r"invalid escape sequence '\z'")
1189+
check(br'\x3zr','\u0404\\zr',r"invalid escape sequence '\z'")
1190+
check(br'\x3z5','\u0404\\z5',r"invalid escape sequence '\z'")
1191+
check(memoryview(br'\x3z5x')[:-1],'\u0404\\z5',r"invalid escape sequence '\z'")
1192+
check(memoryview(br'\x3z5xy')[:-2],'\u0404\\z5',r"invalid escape sequence '\z'")
1193+
1194+
check(br'\x5n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1195+
check(br'\x5n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1196+
check(br'\x5z','\u0404\\z',r"invalid escape sequence '\z'")
1197+
check(memoryview(br'\x5zy')[:-1],'\u0404\\z',r"invalid escape sequence '\z'")
1198+
11621199
# issue32583
11631200
deftest_crashing_decode_handler(self):
11641201
# better generating one more character to fill the extra space slot

‎Lib/test/test_codecs.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,23 +1198,39 @@ def test_escape(self):
11981198
check(br"[\1010]",b"[A0]")
11991199
check(br"[\x41]",b"[A]")
12001200
check(br"[\x410]",b"[A0]")
1201+
1202+
deftest_warnings(self):
1203+
decode=codecs.escape_decode
1204+
check=coding_checker(self,decode)
12011205
foriinrange(97,123):
12021206
b=bytes([i])
12031207
ifbnotinb'abfnrtvx':
1204-
withself.assertWarns(DeprecationWarning):
1208+
withself.assertWarnsRegex(DeprecationWarning,
1209+
r"invalid escape sequence '\\%c'"%i):
12051210
check(b"\\"+b,b"\\"+b)
1206-
withself.assertWarns(DeprecationWarning):
1211+
withself.assertWarnsRegex(DeprecationWarning,
1212+
r"invalid escape sequence '\\%c'"% (i-32)):
12071213
check(b"\\"+b.upper(),b"\\"+b.upper())
1208-
withself.assertWarns(DeprecationWarning):
1214+
withself.assertWarnsRegex(DeprecationWarning,
1215+
r"invalid escape sequence '\\8'"):
12091216
check(br"\8",b"\\8")
12101217
withself.assertWarns(DeprecationWarning):
12111218
check(br"\9",b"\\9")
1212-
withself.assertWarns(DeprecationWarning):
1219+
withself.assertWarnsRegex(DeprecationWarning,
1220+
r"invalid escape sequence '\\\xfa'")ascm:
12131221
check(b"\\\xfa",b"\\\xfa")
12141222
foriinrange(0o400,0o1000):
1215-
withself.assertWarns(DeprecationWarning):
1223+
withself.assertWarnsRegex(DeprecationWarning,
1224+
r"invalid octal escape sequence '\\%o'"%i):
12161225
check(rb'\%o'%i,bytes([i&0o377]))
12171226

1227+
withself.assertWarnsRegex(DeprecationWarning,
1228+
r"invalid escape sequence '\\z'"):
1229+
self.assertEqual(decode(br'\x\z','ignore'), (b'\\z',4))
1230+
withself.assertWarnsRegex(DeprecationWarning,
1231+
r"invalid octal escape sequence '\\501'"):
1232+
self.assertEqual(decode(br'\x\501','ignore'), (b'A',6))
1233+
12181234
deftest_errors(self):
12191235
decode=codecs.escape_decode
12201236
self.assertRaises(ValueError,decode,br"\x")
@@ -2487,24 +2503,40 @@ def test_escape_decode(self):
24872503
check(br"[\x410]","[A0]")
24882504
check(br"\u20ac","\u20ac")
24892505
check(br"\U0001d120","\U0001d120")
2506+
2507+
deftest_decode_warnings(self):
2508+
decode=codecs.unicode_escape_decode
2509+
check=coding_checker(self,decode)
24902510
foriinrange(97,123):
24912511
b=bytes([i])
24922512
ifbnotinb'abfnrtuvx':
2493-
withself.assertWarns(DeprecationWarning):
2513+
withself.assertWarnsRegex(DeprecationWarning,
2514+
r"invalid escape sequence '\\%c'"%i):
24942515
check(b"\\"+b,"\\"+chr(i))
24952516
ifb.upper()notinb'UN':
2496-
withself.assertWarns(DeprecationWarning):
2517+
withself.assertWarnsRegex(DeprecationWarning,
2518+
r"invalid escape sequence '\\%c'"% (i-32)):
24972519
check(b"\\"+b.upper(),"\\"+chr(i-32))
2498-
withself.assertWarns(DeprecationWarning):
2520+
withself.assertWarnsRegex(DeprecationWarning,
2521+
r"invalid escape sequence '\\8'"):
24992522
check(br"\8","\\8")
25002523
withself.assertWarns(DeprecationWarning):
25012524
check(br"\9","\\9")
2502-
withself.assertWarns(DeprecationWarning):
2525+
withself.assertWarnsRegex(DeprecationWarning,
2526+
r"invalid escape sequence '\\\xfa'")ascm:
25032527
check(b"\\\xfa","\\\xfa")
25042528
foriinrange(0o400,0o1000):
2505-
withself.assertWarns(DeprecationWarning):
2529+
withself.assertWarnsRegex(DeprecationWarning,
2530+
r"invalid octal escape sequence '\\%o'"%i):
25062531
check(rb'\%o'%i,chr(i))
25072532

2533+
withself.assertWarnsRegex(DeprecationWarning,
2534+
r"invalid escape sequence '\\z'"):
2535+
self.assertEqual(decode(br'\x\z','ignore'), ('\\z',4))
2536+
withself.assertWarnsRegex(DeprecationWarning,
2537+
r"invalid octal escape sequence '\\501'"):
2538+
self.assertEqual(decode(br'\x\501','ignore'), ('\u0141',6))
2539+
25082540
deftest_decode_errors(self):
25092541
decode=codecs.unicode_escape_decode
25102542
forc,din (b'x',2), (b'u',4), (b'U',4):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
2+
handler.

‎Objects/bytesobject.c

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,10 +1057,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
10571057
}
10581058

10591059
/* Unescape a backslash-escaped string. */
1060-
PyObject*_PyBytes_DecodeEscape(constchar*s,
1060+
PyObject*_PyBytes_DecodeEscape2(constchar*s,
10611061
Py_ssize_tlen,
10621062
constchar*errors,
1063-
constchar**first_invalid_escape)
1063+
int*first_invalid_escape_char,
1064+
constchar**first_invalid_escape_ptr)
10641065
{
10651066
intc;
10661067
char*p;
@@ -1074,7 +1075,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
10741075
returnNULL;
10751076
writer.overallocate=1;
10761077

1077-
*first_invalid_escape=NULL;
1078+
*first_invalid_escape_char=-1;
1079+
*first_invalid_escape_ptr=NULL;
10781080

10791081
end=s+len;
10801082
while (s<end) {
@@ -1112,9 +1114,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11121114
c= (c<<3)+*s++-'0';
11131115
}
11141116
if (c>0377) {
1115-
if (*first_invalid_escape==NULL) {
1116-
*first_invalid_escape=s-3;/* Back up 3 chars, since we've
1117-
already incremented s. */
1117+
if (*first_invalid_escape_char==-1) {
1118+
*first_invalid_escape_char=c;
1119+
/* Back up 3 chars, since we've already incremented s. */
1120+
*first_invalid_escape_ptr=s-3;
11181121
}
11191122
}
11201123
*p++=c;
@@ -1155,9 +1158,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11551158
break;
11561159

11571160
default:
1158-
if (*first_invalid_escape==NULL) {
1159-
*first_invalid_escape=s-1;/* Back up one char, since we've
1160-
already incremented s. */
1161+
if (*first_invalid_escape_char==-1) {
1162+
*first_invalid_escape_char= (unsignedchar)s[-1];
1163+
/* Back up one char, since we've already incremented s. */
1164+
*first_invalid_escape_ptr=s-1;
11611165
}
11621166
*p++='\\';
11631167
s--;
@@ -1171,23 +1175,39 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11711175
returnNULL;
11721176
}
11731177

1178+
// Export for binary compatibility.
1179+
PyObject*_PyBytes_DecodeEscape(constchar*s,
1180+
Py_ssize_tlen,
1181+
constchar*errors,
1182+
constchar**first_invalid_escape)
1183+
{
1184+
intfirst_invalid_escape_char;
1185+
return_PyBytes_DecodeEscape2(
1186+
s,len,errors,
1187+
&first_invalid_escape_char,
1188+
first_invalid_escape);
1189+
}
1190+
11741191
PyObject*PyBytes_DecodeEscape(constchar*s,
11751192
Py_ssize_tlen,
11761193
constchar*errors,
11771194
Py_ssize_tPy_UNUSED(unicode),
11781195
constchar*Py_UNUSED(recode_encoding))
11791196
{
1180-
constchar*first_invalid_escape;
1181-
PyObject*result=_PyBytes_DecodeEscape(s,len,errors,
1182-
&first_invalid_escape);
1197+
intfirst_invalid_escape_char;
1198+
constchar*first_invalid_escape_ptr;
1199+
PyObject*result=_PyBytes_DecodeEscape2(s,len,errors,
1200+
&first_invalid_escape_char,
1201+
&first_invalid_escape_ptr);
11831202
if (result==NULL)
11841203
returnNULL;
1185-
if (first_invalid_escape!=NULL) {
1186-
unsignedcharc=*first_invalid_escape;
1187-
if ('4' <=c&&c <='7') {
1204+
if (first_invalid_escape_char!=-1) {
1205+
if (first_invalid_escape_char>0xff) {
1206+
charbuf[12]="";
1207+
snprintf(buf,sizeofbuf,"%o",first_invalid_escape_char);
11881208
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
1189-
"invalid octal escape sequence '\\%.3s'",
1190-
first_invalid_escape)<0)
1209+
"invalid octal escape sequence '\\%s'",
1210+
buf)<0)
11911211
{
11921212
Py_DECREF(result);
11931213
returnNULL;
@@ -1196,7 +1216,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
11961216
else {
11971217
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
11981218
"invalid escape sequence '\\%c'",
1199-
c)<0)
1219+
first_invalid_escape_char)<0)
12001220
{
12011221
Py_DECREF(result);
12021222
returnNULL;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp