Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita75953b

Browse files
[3.12]gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set asthe object attribute of UnicodeDecodeError, and that bytes object thenreplaces the original data. A pointer to the decoded data will became invalidafter destroying that temporary bytes object. So we need other way to returnthe first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal()._PyBytes_DecodeEscape() does not have such issue, because it does notuse the error handlers registry, but it should be changed for compatibilitywith _PyUnicode_DecodeUnicodeEscapeInternal().(cherry picked from commit9f69a58)(cherry picked from commit6279eb8)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent310cd89 commita75953b

File tree

8 files changed

+194
-57
lines changed

8 files changed

+194
-57
lines changed

‎Include/cpython/bytesobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
2525
intuse_bytearray);
2626

2727
/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
28+
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape2(constchar*,Py_ssize_t,
29+
constchar*,
30+
int*,constchar**);
31+
// Export for binary compatibility.
2832
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape(constchar*,Py_ssize_t,
2933
constchar*,constchar**);
3034

‎Include/cpython/unicodeobject.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
684684
);
685685
/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
686686
chars. */
687+
PyAPI_FUNC(PyObject*)_PyUnicode_DecodeUnicodeEscapeInternal2(
688+
constchar*string,/* Unicode-Escape encoded string */
689+
Py_ssize_tlength,/* size of string */
690+
constchar*errors,/* error handling */
691+
Py_ssize_t*consumed,/* bytes consumed */
692+
int*first_invalid_escape_char,/* on return, if not -1, contain the first
693+
invalid escaped char (<= 0xff) or invalid
694+
octal escape (> 0xff) in string. */
695+
constchar**first_invalid_escape_ptr);/* on return, if not NULL, may
696+
point to the first invalid escaped
697+
char in string.
698+
May be NULL if errors is not NULL. */
699+
// Export for binary compatibility.
687700
PyAPI_FUNC(PyObject*)_PyUnicode_DecodeUnicodeEscapeInternal(
688701
constchar*string,/* Unicode-Escape encoded string */
689702
Py_ssize_tlength,/* size of string */

‎Lib/test/test_codeccallbacks.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importcodecs
22
importhtml.entities
33
importitertools
4+
importre
45
importsys
56
importunicodedata
67
importunittest
@@ -1124,7 +1125,7 @@ def test_bug828737(self):
11241125
text='abc<def>ghi'*n
11251126
text.translate(charmap)
11261127

1127-
deftest_mutatingdecodehandler(self):
1128+
deftest_mutating_decode_handler(self):
11281129
baddata= [
11291130
("ascii",b"\xff"),
11301131
("utf-7",b"++"),
@@ -1159,6 +1160,42 @@ def mutating(exc):
11591160
for (encoding,data)inbaddata:
11601161
self.assertEqual(data.decode(encoding,"test.mutating"),"\u4242")
11611162

1163+
deftest_mutating_decode_handler_unicode_escape(self):
1164+
decode=codecs.unicode_escape_decode
1165+
defmutating(exc):
1166+
ifisinstance(exc,UnicodeDecodeError):
1167+
r=data.get(exc.object[:exc.end])
1168+
ifrisnotNone:
1169+
exc.object=r[0]+exc.object[exc.end:]
1170+
return ('\u0404',r[1])
1171+
raiseAssertionError("don't know how to handle %r"%exc)
1172+
1173+
codecs.register_error('test.mutating2',mutating)
1174+
data= {
1175+
br'\x0': (b'\\',0),
1176+
br'\x3': (b'xxx\\',3),
1177+
br'\x5': (b'x\\',1),
1178+
}
1179+
defcheck(input,expected,msg):
1180+
withself.assertWarns(DeprecationWarning)ascm:
1181+
self.assertEqual(decode(input,'test.mutating2'), (expected,len(input)))
1182+
self.assertIn(msg,str(cm.warning))
1183+
1184+
check(br'\x0n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1185+
check(br'\x0n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1186+
check(br'\x0z','\u0404\\z',r"invalid escape sequence '\z'")
1187+
1188+
check(br'\x3n\zr','\u0404\n\\zr',r"invalid escape sequence '\z'")
1189+
check(br'\x3zr','\u0404\\zr',r"invalid escape sequence '\z'")
1190+
check(br'\x3z5','\u0404\\z5',r"invalid escape sequence '\z'")
1191+
check(memoryview(br'\x3z5x')[:-1],'\u0404\\z5',r"invalid escape sequence '\z'")
1192+
check(memoryview(br'\x3z5xy')[:-2],'\u0404\\z5',r"invalid escape sequence '\z'")
1193+
1194+
check(br'\x5n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1195+
check(br'\x5n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1196+
check(br'\x5z','\u0404\\z',r"invalid escape sequence '\z'")
1197+
check(memoryview(br'\x5zy')[:-1],'\u0404\\z',r"invalid escape sequence '\z'")
1198+
11621199
# issue32583
11631200
deftest_crashing_decode_handler(self):
11641201
# better generating one more character to fill the extra space slot

‎Lib/test/test_codecs.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,23 +1196,39 @@ def test_escape(self):
11961196
check(br"[\1010]",b"[A0]")
11971197
check(br"[\x41]",b"[A]")
11981198
check(br"[\x410]",b"[A0]")
1199+
1200+
deftest_warnings(self):
1201+
decode=codecs.escape_decode
1202+
check=coding_checker(self,decode)
11991203
foriinrange(97,123):
12001204
b=bytes([i])
12011205
ifbnotinb'abfnrtvx':
1202-
withself.assertWarns(DeprecationWarning):
1206+
withself.assertWarnsRegex(DeprecationWarning,
1207+
r"invalid escape sequence '\\%c'"%i):
12031208
check(b"\\"+b,b"\\"+b)
1204-
withself.assertWarns(DeprecationWarning):
1209+
withself.assertWarnsRegex(DeprecationWarning,
1210+
r"invalid escape sequence '\\%c'"% (i-32)):
12051211
check(b"\\"+b.upper(),b"\\"+b.upper())
1206-
withself.assertWarns(DeprecationWarning):
1212+
withself.assertWarnsRegex(DeprecationWarning,
1213+
r"invalid escape sequence '\\8'"):
12071214
check(br"\8",b"\\8")
12081215
withself.assertWarns(DeprecationWarning):
12091216
check(br"\9",b"\\9")
1210-
withself.assertWarns(DeprecationWarning):
1217+
withself.assertWarnsRegex(DeprecationWarning,
1218+
r"invalid escape sequence '\\\xfa'")ascm:
12111219
check(b"\\\xfa",b"\\\xfa")
12121220
foriinrange(0o400,0o1000):
1213-
withself.assertWarns(DeprecationWarning):
1221+
withself.assertWarnsRegex(DeprecationWarning,
1222+
r"invalid octal escape sequence '\\%o'"%i):
12141223
check(rb'\%o'%i,bytes([i&0o377]))
12151224

1225+
withself.assertWarnsRegex(DeprecationWarning,
1226+
r"invalid escape sequence '\\z'"):
1227+
self.assertEqual(decode(br'\x\z','ignore'), (b'\\z',4))
1228+
withself.assertWarnsRegex(DeprecationWarning,
1229+
r"invalid octal escape sequence '\\501'"):
1230+
self.assertEqual(decode(br'\x\501','ignore'), (b'A',6))
1231+
12161232
deftest_errors(self):
12171233
decode=codecs.escape_decode
12181234
self.assertRaises(ValueError,decode,br"\x")
@@ -2479,24 +2495,40 @@ def test_escape_decode(self):
24792495
check(br"[\x410]","[A0]")
24802496
check(br"\u20ac","\u20ac")
24812497
check(br"\U0001d120","\U0001d120")
2498+
2499+
deftest_decode_warnings(self):
2500+
decode=codecs.unicode_escape_decode
2501+
check=coding_checker(self,decode)
24822502
foriinrange(97,123):
24832503
b=bytes([i])
24842504
ifbnotinb'abfnrtuvx':
2485-
withself.assertWarns(DeprecationWarning):
2505+
withself.assertWarnsRegex(DeprecationWarning,
2506+
r"invalid escape sequence '\\%c'"%i):
24862507
check(b"\\"+b,"\\"+chr(i))
24872508
ifb.upper()notinb'UN':
2488-
withself.assertWarns(DeprecationWarning):
2509+
withself.assertWarnsRegex(DeprecationWarning,
2510+
r"invalid escape sequence '\\%c'"% (i-32)):
24892511
check(b"\\"+b.upper(),"\\"+chr(i-32))
2490-
withself.assertWarns(DeprecationWarning):
2512+
withself.assertWarnsRegex(DeprecationWarning,
2513+
r"invalid escape sequence '\\8'"):
24912514
check(br"\8","\\8")
24922515
withself.assertWarns(DeprecationWarning):
24932516
check(br"\9","\\9")
2494-
withself.assertWarns(DeprecationWarning):
2517+
withself.assertWarnsRegex(DeprecationWarning,
2518+
r"invalid escape sequence '\\\xfa'")ascm:
24952519
check(b"\\\xfa","\\\xfa")
24962520
foriinrange(0o400,0o1000):
2497-
withself.assertWarns(DeprecationWarning):
2521+
withself.assertWarnsRegex(DeprecationWarning,
2522+
r"invalid octal escape sequence '\\%o'"%i):
24982523
check(rb'\%o'%i,chr(i))
24992524

2525+
withself.assertWarnsRegex(DeprecationWarning,
2526+
r"invalid escape sequence '\\z'"):
2527+
self.assertEqual(decode(br'\x\z','ignore'), ('\\z',4))
2528+
withself.assertWarnsRegex(DeprecationWarning,
2529+
r"invalid octal escape sequence '\\501'"):
2530+
self.assertEqual(decode(br'\x\501','ignore'), ('\u0141',6))
2531+
25002532
deftest_decode_errors(self):
25012533
decode=codecs.unicode_escape_decode
25022534
forc,din (b'x',2), (b'u',4), (b'U',4):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
2+
handler.

‎Objects/bytesobject.c

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,10 +1048,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
10481048
}
10491049

10501050
/* Unescape a backslash-escaped string. */
1051-
PyObject*_PyBytes_DecodeEscape(constchar*s,
1051+
PyObject*_PyBytes_DecodeEscape2(constchar*s,
10521052
Py_ssize_tlen,
10531053
constchar*errors,
1054-
constchar**first_invalid_escape)
1054+
int*first_invalid_escape_char,
1055+
constchar**first_invalid_escape_ptr)
10551056
{
10561057
intc;
10571058
char*p;
@@ -1065,7 +1066,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
10651066
returnNULL;
10661067
writer.overallocate=1;
10671068

1068-
*first_invalid_escape=NULL;
1069+
*first_invalid_escape_char=-1;
1070+
*first_invalid_escape_ptr=NULL;
10691071

10701072
end=s+len;
10711073
while (s<end) {
@@ -1103,9 +1105,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11031105
c= (c<<3)+*s++-'0';
11041106
}
11051107
if (c>0377) {
1106-
if (*first_invalid_escape==NULL) {
1107-
*first_invalid_escape=s-3;/* Back up 3 chars, since we've
1108-
already incremented s. */
1108+
if (*first_invalid_escape_char==-1) {
1109+
*first_invalid_escape_char=c;
1110+
/* Back up 3 chars, since we've already incremented s. */
1111+
*first_invalid_escape_ptr=s-3;
11091112
}
11101113
}
11111114
*p++=c;
@@ -1146,9 +1149,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11461149
break;
11471150

11481151
default:
1149-
if (*first_invalid_escape==NULL) {
1150-
*first_invalid_escape=s-1;/* Back up one char, since we've
1151-
already incremented s. */
1152+
if (*first_invalid_escape_char==-1) {
1153+
*first_invalid_escape_char= (unsignedchar)s[-1];
1154+
/* Back up one char, since we've already incremented s. */
1155+
*first_invalid_escape_ptr=s-1;
11521156
}
11531157
*p++='\\';
11541158
s--;
@@ -1162,23 +1166,37 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11621166
returnNULL;
11631167
}
11641168

1169+
// Export for binary compatibility.
1170+
PyObject*_PyBytes_DecodeEscape(constchar*s,
1171+
Py_ssize_tlen,
1172+
constchar*errors,
1173+
constchar**first_invalid_escape)
1174+
{
1175+
intfirst_invalid_escape_char;
1176+
return_PyBytes_DecodeEscape2(
1177+
s,len,errors,
1178+
&first_invalid_escape_char,
1179+
first_invalid_escape);
1180+
}
1181+
11651182
PyObject*PyBytes_DecodeEscape(constchar*s,
11661183
Py_ssize_tlen,
11671184
constchar*errors,
11681185
Py_ssize_tPy_UNUSED(unicode),
11691186
constchar*Py_UNUSED(recode_encoding))
11701187
{
1171-
constchar*first_invalid_escape;
1172-
PyObject*result=_PyBytes_DecodeEscape(s,len,errors,
1173-
&first_invalid_escape);
1188+
intfirst_invalid_escape_char;
1189+
constchar*first_invalid_escape_ptr;
1190+
PyObject*result=_PyBytes_DecodeEscape2(s,len,errors,
1191+
&first_invalid_escape_char,
1192+
&first_invalid_escape_ptr);
11741193
if (result==NULL)
11751194
returnNULL;
1176-
if (first_invalid_escape!=NULL) {
1177-
unsignedcharc=*first_invalid_escape;
1178-
if ('4' <=c&&c <='7') {
1195+
if (first_invalid_escape_char!=-1) {
1196+
if (first_invalid_escape_char>0xff) {
11791197
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
1180-
"invalid octal escape sequence '\\%.3s'",
1181-
first_invalid_escape)<0)
1198+
"invalid octal escape sequence '\\%o'",
1199+
first_invalid_escape_char)<0)
11821200
{
11831201
Py_DECREF(result);
11841202
returnNULL;
@@ -1187,7 +1205,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
11871205
else {
11881206
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
11891207
"invalid escape sequence '\\%c'",
1190-
c)<0)
1208+
first_invalid_escape_char)<0)
11911209
{
11921210
Py_DECREF(result);
11931211
returnNULL;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp