Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit6279eb8

Browse files
[3.13]gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)
If the error handler is used, a new bytes object is created to set asthe object attribute of UnicodeDecodeError, and that bytes object thenreplaces the original data. A pointer to the decoded data will became invalidafter destroying that temporary bytes object. So we need other way to returnthe first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal()._PyBytes_DecodeEscape() does not have such issue, because it does notuse the error handlers registry, but it should be changed for compatibilitywith _PyUnicode_DecodeUnicodeEscapeInternal().(cherry picked from commit9f69a58)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent0c0fedf commit6279eb8

File tree

8 files changed

+194
-57
lines changed

8 files changed

+194
-57
lines changed

‎Include/internal/pycore_bytesobject.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ extern PyObject* _PyBytes_FromHex(
2020

2121
// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
2222
// Export for test_peg_generator.
23+
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape2(constchar*,Py_ssize_t,
24+
constchar*,
25+
int*,constchar**);
26+
// Export for binary compatibility.
2327
PyAPI_FUNC(PyObject*)_PyBytes_DecodeEscape(constchar*,Py_ssize_t,
2428
constchar*,constchar**);
2529

‎Include/internal/pycore_unicodeobject.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,19 @@ extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful(
142142
// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
143143
// chars.
144144
// Export for test_peg_generator.
145+
PyAPI_FUNC(PyObject*)_PyUnicode_DecodeUnicodeEscapeInternal2(
146+
constchar*string,/* Unicode-Escape encoded string */
147+
Py_ssize_tlength,/* size of string */
148+
constchar*errors,/* error handling */
149+
Py_ssize_t*consumed,/* bytes consumed */
150+
int*first_invalid_escape_char,/* on return, if not -1, contain the first
151+
invalid escaped char (<= 0xff) or invalid
152+
octal escape (> 0xff) in string. */
153+
constchar**first_invalid_escape_ptr);/* on return, if not NULL, may
154+
point to the first invalid escaped
155+
char in string.
156+
May be NULL if errors is not NULL. */
157+
// Export for binary compatibility.
145158
PyAPI_FUNC(PyObject*)_PyUnicode_DecodeUnicodeEscapeInternal(
146159
constchar*string,/* Unicode-Escape encoded string */
147160
Py_ssize_tlength,/* size of string */

‎Lib/test/test_codeccallbacks.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
importcodecs
22
importhtml.entities
33
importitertools
4+
importre
45
importsys
56
importunicodedata
67
importunittest
@@ -1124,7 +1125,7 @@ def test_bug828737(self):
11241125
text='abc<def>ghi'*n
11251126
text.translate(charmap)
11261127

1127-
deftest_mutatingdecodehandler(self):
1128+
deftest_mutating_decode_handler(self):
11281129
baddata= [
11291130
("ascii",b"\xff"),
11301131
("utf-7",b"++"),
@@ -1159,6 +1160,42 @@ def mutating(exc):
11591160
for (encoding,data)inbaddata:
11601161
self.assertEqual(data.decode(encoding,"test.mutating"),"\u4242")
11611162

1163+
deftest_mutating_decode_handler_unicode_escape(self):
1164+
decode=codecs.unicode_escape_decode
1165+
defmutating(exc):
1166+
ifisinstance(exc,UnicodeDecodeError):
1167+
r=data.get(exc.object[:exc.end])
1168+
ifrisnotNone:
1169+
exc.object=r[0]+exc.object[exc.end:]
1170+
return ('\u0404',r[1])
1171+
raiseAssertionError("don't know how to handle %r"%exc)
1172+
1173+
codecs.register_error('test.mutating2',mutating)
1174+
data= {
1175+
br'\x0': (b'\\',0),
1176+
br'\x3': (b'xxx\\',3),
1177+
br'\x5': (b'x\\',1),
1178+
}
1179+
defcheck(input,expected,msg):
1180+
withself.assertWarns(DeprecationWarning)ascm:
1181+
self.assertEqual(decode(input,'test.mutating2'), (expected,len(input)))
1182+
self.assertIn(msg,str(cm.warning))
1183+
1184+
check(br'\x0n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1185+
check(br'\x0n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1186+
check(br'\x0z','\u0404\\z',r"invalid escape sequence '\z'")
1187+
1188+
check(br'\x3n\zr','\u0404\n\\zr',r"invalid escape sequence '\z'")
1189+
check(br'\x3zr','\u0404\\zr',r"invalid escape sequence '\z'")
1190+
check(br'\x3z5','\u0404\\z5',r"invalid escape sequence '\z'")
1191+
check(memoryview(br'\x3z5x')[:-1],'\u0404\\z5',r"invalid escape sequence '\z'")
1192+
check(memoryview(br'\x3z5xy')[:-2],'\u0404\\z5',r"invalid escape sequence '\z'")
1193+
1194+
check(br'\x5n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
1195+
check(br'\x5n\501','\u0404\n\u0141',r"invalid octal escape sequence '\501'")
1196+
check(br'\x5z','\u0404\\z',r"invalid escape sequence '\z'")
1197+
check(memoryview(br'\x5zy')[:-1],'\u0404\\z',r"invalid escape sequence '\z'")
1198+
11621199
# issue32583
11631200
deftest_crashing_decode_handler(self):
11641201
# better generating one more character to fill the extra space slot

‎Lib/test/test_codecs.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,23 +1196,39 @@ def test_escape(self):
11961196
check(br"[\1010]",b"[A0]")
11971197
check(br"[\x41]",b"[A]")
11981198
check(br"[\x410]",b"[A0]")
1199+
1200+
deftest_warnings(self):
1201+
decode=codecs.escape_decode
1202+
check=coding_checker(self,decode)
11991203
foriinrange(97,123):
12001204
b=bytes([i])
12011205
ifbnotinb'abfnrtvx':
1202-
withself.assertWarns(DeprecationWarning):
1206+
withself.assertWarnsRegex(DeprecationWarning,
1207+
r"invalid escape sequence '\\%c'"%i):
12031208
check(b"\\"+b,b"\\"+b)
1204-
withself.assertWarns(DeprecationWarning):
1209+
withself.assertWarnsRegex(DeprecationWarning,
1210+
r"invalid escape sequence '\\%c'"% (i-32)):
12051211
check(b"\\"+b.upper(),b"\\"+b.upper())
1206-
withself.assertWarns(DeprecationWarning):
1212+
withself.assertWarnsRegex(DeprecationWarning,
1213+
r"invalid escape sequence '\\8'"):
12071214
check(br"\8",b"\\8")
12081215
withself.assertWarns(DeprecationWarning):
12091216
check(br"\9",b"\\9")
1210-
withself.assertWarns(DeprecationWarning):
1217+
withself.assertWarnsRegex(DeprecationWarning,
1218+
r"invalid escape sequence '\\\xfa'")ascm:
12111219
check(b"\\\xfa",b"\\\xfa")
12121220
foriinrange(0o400,0o1000):
1213-
withself.assertWarns(DeprecationWarning):
1221+
withself.assertWarnsRegex(DeprecationWarning,
1222+
r"invalid octal escape sequence '\\%o'"%i):
12141223
check(rb'\%o'%i,bytes([i&0o377]))
12151224

1225+
withself.assertWarnsRegex(DeprecationWarning,
1226+
r"invalid escape sequence '\\z'"):
1227+
self.assertEqual(decode(br'\x\z','ignore'), (b'\\z',4))
1228+
withself.assertWarnsRegex(DeprecationWarning,
1229+
r"invalid octal escape sequence '\\501'"):
1230+
self.assertEqual(decode(br'\x\501','ignore'), (b'A',6))
1231+
12161232
deftest_errors(self):
12171233
decode=codecs.escape_decode
12181234
self.assertRaises(ValueError,decode,br"\x")
@@ -2661,24 +2677,40 @@ def test_escape_decode(self):
26612677
check(br"[\x410]","[A0]")
26622678
check(br"\u20ac","\u20ac")
26632679
check(br"\U0001d120","\U0001d120")
2680+
2681+
deftest_decode_warnings(self):
2682+
decode=codecs.unicode_escape_decode
2683+
check=coding_checker(self,decode)
26642684
foriinrange(97,123):
26652685
b=bytes([i])
26662686
ifbnotinb'abfnrtuvx':
2667-
withself.assertWarns(DeprecationWarning):
2687+
withself.assertWarnsRegex(DeprecationWarning,
2688+
r"invalid escape sequence '\\%c'"%i):
26682689
check(b"\\"+b,"\\"+chr(i))
26692690
ifb.upper()notinb'UN':
2670-
withself.assertWarns(DeprecationWarning):
2691+
withself.assertWarnsRegex(DeprecationWarning,
2692+
r"invalid escape sequence '\\%c'"% (i-32)):
26712693
check(b"\\"+b.upper(),"\\"+chr(i-32))
2672-
withself.assertWarns(DeprecationWarning):
2694+
withself.assertWarnsRegex(DeprecationWarning,
2695+
r"invalid escape sequence '\\8'"):
26732696
check(br"\8","\\8")
26742697
withself.assertWarns(DeprecationWarning):
26752698
check(br"\9","\\9")
2676-
withself.assertWarns(DeprecationWarning):
2699+
withself.assertWarnsRegex(DeprecationWarning,
2700+
r"invalid escape sequence '\\\xfa'")ascm:
26772701
check(b"\\\xfa","\\\xfa")
26782702
foriinrange(0o400,0o1000):
2679-
withself.assertWarns(DeprecationWarning):
2703+
withself.assertWarnsRegex(DeprecationWarning,
2704+
r"invalid octal escape sequence '\\%o'"%i):
26802705
check(rb'\%o'%i,chr(i))
26812706

2707+
withself.assertWarnsRegex(DeprecationWarning,
2708+
r"invalid escape sequence '\\z'"):
2709+
self.assertEqual(decode(br'\x\z','ignore'), ('\\z',4))
2710+
withself.assertWarnsRegex(DeprecationWarning,
2711+
r"invalid octal escape sequence '\\501'"):
2712+
self.assertEqual(decode(br'\x\501','ignore'), ('\u0141',6))
2713+
26822714
deftest_decode_errors(self):
26832715
decode=codecs.unicode_escape_decode
26842716
forc,din (b'x',2), (b'u',4), (b'U',4):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
2+
handler.

‎Objects/bytesobject.c

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,10 +1065,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
10651065
}
10661066

10671067
/* Unescape a backslash-escaped string. */
1068-
PyObject*_PyBytes_DecodeEscape(constchar*s,
1068+
PyObject*_PyBytes_DecodeEscape2(constchar*s,
10691069
Py_ssize_tlen,
10701070
constchar*errors,
1071-
constchar**first_invalid_escape)
1071+
int*first_invalid_escape_char,
1072+
constchar**first_invalid_escape_ptr)
10721073
{
10731074
intc;
10741075
char*p;
@@ -1082,7 +1083,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
10821083
returnNULL;
10831084
writer.overallocate=1;
10841085

1085-
*first_invalid_escape=NULL;
1086+
*first_invalid_escape_char=-1;
1087+
*first_invalid_escape_ptr=NULL;
10861088

10871089
end=s+len;
10881090
while (s<end) {
@@ -1120,9 +1122,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11201122
c= (c<<3)+*s++-'0';
11211123
}
11221124
if (c>0377) {
1123-
if (*first_invalid_escape==NULL) {
1124-
*first_invalid_escape=s-3;/* Back up 3 chars, since we've
1125-
already incremented s. */
1125+
if (*first_invalid_escape_char==-1) {
1126+
*first_invalid_escape_char=c;
1127+
/* Back up 3 chars, since we've already incremented s. */
1128+
*first_invalid_escape_ptr=s-3;
11261129
}
11271130
}
11281131
*p++=c;
@@ -1163,9 +1166,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11631166
break;
11641167

11651168
default:
1166-
if (*first_invalid_escape==NULL) {
1167-
*first_invalid_escape=s-1;/* Back up one char, since we've
1168-
already incremented s. */
1169+
if (*first_invalid_escape_char==-1) {
1170+
*first_invalid_escape_char= (unsignedchar)s[-1];
1171+
/* Back up one char, since we've already incremented s. */
1172+
*first_invalid_escape_ptr=s-1;
11691173
}
11701174
*p++='\\';
11711175
s--;
@@ -1179,23 +1183,37 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
11791183
returnNULL;
11801184
}
11811185

1186+
// Export for binary compatibility.
1187+
PyObject*_PyBytes_DecodeEscape(constchar*s,
1188+
Py_ssize_tlen,
1189+
constchar*errors,
1190+
constchar**first_invalid_escape)
1191+
{
1192+
intfirst_invalid_escape_char;
1193+
return_PyBytes_DecodeEscape2(
1194+
s,len,errors,
1195+
&first_invalid_escape_char,
1196+
first_invalid_escape);
1197+
}
1198+
11821199
PyObject*PyBytes_DecodeEscape(constchar*s,
11831200
Py_ssize_tlen,
11841201
constchar*errors,
11851202
Py_ssize_tPy_UNUSED(unicode),
11861203
constchar*Py_UNUSED(recode_encoding))
11871204
{
1188-
constchar*first_invalid_escape;
1189-
PyObject*result=_PyBytes_DecodeEscape(s,len,errors,
1190-
&first_invalid_escape);
1205+
intfirst_invalid_escape_char;
1206+
constchar*first_invalid_escape_ptr;
1207+
PyObject*result=_PyBytes_DecodeEscape2(s,len,errors,
1208+
&first_invalid_escape_char,
1209+
&first_invalid_escape_ptr);
11911210
if (result==NULL)
11921211
returnNULL;
1193-
if (first_invalid_escape!=NULL) {
1194-
unsignedcharc=*first_invalid_escape;
1195-
if ('4' <=c&&c <='7') {
1212+
if (first_invalid_escape_char!=-1) {
1213+
if (first_invalid_escape_char>0xff) {
11961214
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
1197-
"invalid octal escape sequence '\\%.3s'",
1198-
first_invalid_escape)<0)
1215+
"invalid octal escape sequence '\\%o'",
1216+
first_invalid_escape_char)<0)
11991217
{
12001218
Py_DECREF(result);
12011219
returnNULL;
@@ -1204,7 +1222,7 @@ PyObject *PyBytes_DecodeEscape(const char *s,
12041222
else {
12051223
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
12061224
"invalid escape sequence '\\%c'",
1207-
c)<0)
1225+
first_invalid_escape_char)<0)
12081226
{
12091227
Py_DECREF(result);
12101228
returnNULL;

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp