Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

[3.10] gh-133767: Fix use-after-free in the unicode-escape decoder with an error handler (GH-129648) (GH-133944)#134345

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
serhiy-storchaka wants to merge1 commit intopython:3.10
base:3.10
Choose a base branch
Loading
fromserhiy-storchaka:backport-0c33e5b-3.10
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[3.10]gh-133767: Fix use-after-free in the unicode-escape decoder wi…
…th an error handler (GH-129648) (GH-133944)If the error handler is used, a new bytes object is created to set asthe object attribute of UnicodeDecodeError, and that bytes object thenreplaces the original data. A pointer to the decoded data will became invalidafter destroying that temporary bytes object. So we need other way to returnthe first invalid escape from _PyUnicode_DecodeUnicodeEscapeInternal()._PyBytes_DecodeEscape() does not have such issue, because it does notuse the error handlers registry, but it should be changed for compatibilitywith _PyUnicode_DecodeUnicodeEscapeInternal().(cherry picked from commit9f69a58)(cherry picked from commit6279eb8)(cherry picked from commita75953b)(cherry picked from commit0c33e5b)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
  • Loading branch information
@serhiy-storchaka
serhiy-storchaka committedMay 20, 2025
commit8b528cacbbde60504f6ac62784d04889d285f18b
4 changes: 4 additions & 0 deletionsInclude/cpython/bytesobject.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -25,6 +25,10 @@ PyAPI_FUNC(PyObject*) _PyBytes_FromHex(
int use_bytearray);

/* Helper for PyBytes_DecodeEscape that detects invalid escape chars. */
PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
const char *,
int *, const char **);
// Export for binary compatibility.
PyAPI_FUNC(PyObject *) _PyBytes_DecodeEscape(const char *, Py_ssize_t,
const char *, const char **);

Expand Down
13 changes: 13 additions & 0 deletionsInclude/cpython/unicodeobject.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -844,6 +844,19 @@ PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(

/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
chars.*/
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
constchar *string,/* Unicode-Escape encoded string*/
Py_ssize_t length,/* size of string*/
constchar *errors,/* error handling*/
Py_ssize_t *consumed,/* bytes consumed*/
int *first_invalid_escape_char,/* on return, if not -1, contain the first
invalid escaped char (<= 0xff) or invalid
octal escape (> 0xff) in string.*/
constchar **first_invalid_escape_ptr);/* on return, if not NULL, may
point to the first invalid escaped
char in string.
May be NULL if errors is not NULL.*/
// Export for binary compatibility.
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
constchar *string,/* Unicode-Escape encoded string*/
Py_ssize_t length,/* size of string*/
Expand Down
37 changes: 36 additions & 1 deletionLib/test/test_codeccallbacks.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
importcodecs
importhtml.entities
importitertools
importre
importsys
importunicodedata
importunittest
Expand DownExpand Up@@ -1124,7 +1125,7 @@ def test_bug828737(self):
text='abc<def>ghi'*n
text.translate(charmap)

deftest_mutatingdecodehandler(self):
deftest_mutating_decode_handler(self):
baddata= [
("ascii",b"\xff"),
("utf-7",b"++"),
Expand DownExpand Up@@ -1159,6 +1160,40 @@ def mutating(exc):
for (encoding,data)inbaddata:
self.assertEqual(data.decode(encoding,"test.mutating"),"\u4242")

deftest_mutating_decode_handler_unicode_escape(self):
decode=codecs.unicode_escape_decode
defmutating(exc):
ifisinstance(exc,UnicodeDecodeError):
r=data.get(exc.object[:exc.end])
ifrisnotNone:
exc.object=r[0]+exc.object[exc.end:]
return ('\u0404',r[1])
raiseAssertionError("don't know how to handle %r"%exc)

codecs.register_error('test.mutating2',mutating)
data= {
br'\x0': (b'\\',0),
br'\x3': (b'xxx\\',3),
br'\x5': (b'x\\',1),
}
defcheck(input,expected,msg):
withself.assertWarns(DeprecationWarning)ascm:
self.assertEqual(decode(input,'test.mutating2'), (expected,len(input)))
self.assertIn(msg,str(cm.warning))

check(br'\x0n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
check(br'\x0z','\u0404\\z',r"invalid escape sequence '\z'")

check(br'\x3n\zr','\u0404\n\\zr',r"invalid escape sequence '\z'")
check(br'\x3zr','\u0404\\zr',r"invalid escape sequence '\z'")
check(br'\x3z5','\u0404\\z5',r"invalid escape sequence '\z'")
check(memoryview(br'\x3z5x')[:-1],'\u0404\\z5',r"invalid escape sequence '\z'")
check(memoryview(br'\x3z5xy')[:-2],'\u0404\\z5',r"invalid escape sequence '\z'")

check(br'\x5n\z','\u0404\n\\z',r"invalid escape sequence '\z'")
check(br'\x5z','\u0404\\z',r"invalid escape sequence '\z'")
check(memoryview(br'\x5zy')[:-1],'\u0404\\z',r"invalid escape sequence '\z'")

# issue32583
deftest_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot
Expand Down
39 changes: 31 additions & 8 deletionsLib/test/test_codecs.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1181,20 +1181,32 @@ def test_escape(self):
check(br"[\501]",b"[A]")
check(br"[\x41]",b"[A]")
check(br"[\x410]",b"[A0]")

deftest_warnings(self):
decode=codecs.escape_decode
check=coding_checker(self,decode)
foriinrange(97,123):
b=bytes([i])
ifbnotinb'abfnrtvx':
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\%c'"%i):
check(b"\\"+b,b"\\"+b)
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\%c'"% (i-32)):
check(b"\\"+b.upper(),b"\\"+b.upper())
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\8'"):
check(br"\8",b"\\8")
withself.assertWarns(DeprecationWarning):
check(br"\9",b"\\9")
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\\xfa'")ascm:
check(b"\\\xfa",b"\\\xfa")

withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\z'"):
self.assertEqual(decode(br'\x\z','ignore'), (b'\\z',4))

deftest_errors(self):
decode=codecs.escape_decode
self.assertRaises(ValueError,decode,br"\x")
Expand DownExpand Up@@ -2408,20 +2420,31 @@ def test_escape_decode(self):
check(br"[\x410]","[A0]")
check(br"\u20ac","\u20ac")
check(br"\U0001d120","\U0001d120")

deftest_decode_warnings(self):
decode=codecs.unicode_escape_decode
check=coding_checker(self,decode)
foriinrange(97,123):
b=bytes([i])
ifbnotinb'abfnrtuvx':
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\%c'"%i):
check(b"\\"+b,"\\"+chr(i))
ifb.upper()notinb'UN':
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\%c'"% (i-32)):
check(b"\\"+b.upper(),"\\"+chr(i-32))
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\8'"):
check(br"\8","\\8")
withself.assertWarns(DeprecationWarning):
check(br"\9","\\9")
withself.assertWarns(DeprecationWarning):
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\\xfa'")ascm:
check(b"\\\xfa","\\\xfa")
withself.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence '\\z'"):
self.assertEqual(decode(br'\x\z','ignore'), ('\\z',4))

deftest_decode_errors(self):
decode=codecs.unicode_escape_decode
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
handler.
40 changes: 29 additions & 11 deletionsObjects/bytesobject.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1089,10 +1089,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
}

/* Unescape a backslash-escaped string. */
PyObject*_PyBytes_DecodeEscape(constchar*s,
PyObject*_PyBytes_DecodeEscape2(constchar*s,
Py_ssize_tlen,
constchar*errors,
constchar**first_invalid_escape)
int*first_invalid_escape_char,
constchar**first_invalid_escape_ptr)
{
intc;
char*p;
Expand All@@ -1106,7 +1107,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
returnNULL;
writer.overallocate=1;

*first_invalid_escape=NULL;
*first_invalid_escape_char=-1;
*first_invalid_escape_ptr=NULL;

end=s+len;
while (s<end) {
Expand DownExpand Up@@ -1181,9 +1183,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
break;

default:
if (*first_invalid_escape==NULL) {
*first_invalid_escape=s-1;/* Back up one char, since we've
already incremented s. */
if (*first_invalid_escape_char==-1) {
*first_invalid_escape_char= (unsignedchar)s[-1];
/* Back up one char, since we've already incremented s. */
*first_invalid_escape_ptr=s-1;
}
*p++='\\';
s--;
Expand All@@ -1197,21 +1200,36 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
returnNULL;
}

// Export for binary compatibility.
PyObject*_PyBytes_DecodeEscape(constchar*s,
Py_ssize_tlen,
constchar*errors,
constchar**first_invalid_escape)
{
intfirst_invalid_escape_char;
return_PyBytes_DecodeEscape2(
s,len,errors,
&first_invalid_escape_char,
first_invalid_escape);
}

PyObject*PyBytes_DecodeEscape(constchar*s,
Py_ssize_tlen,
constchar*errors,
Py_ssize_tPy_UNUSED(unicode),
constchar*Py_UNUSED(recode_encoding))
{
constchar*first_invalid_escape;
PyObject*result=_PyBytes_DecodeEscape(s,len,errors,
&first_invalid_escape);
intfirst_invalid_escape_char;
constchar*first_invalid_escape_ptr;
PyObject*result=_PyBytes_DecodeEscape2(s,len,errors,
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (result==NULL)
returnNULL;
if (first_invalid_escape!=NULL) {
if (first_invalid_escape_char!=-1) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
"invalid escape sequence '\\%c'",
(unsignedchar)*first_invalid_escape)<0) {
first_invalid_escape_char)<0) {
Py_DECREF(result);
returnNULL;
}
Expand Down
45 changes: 34 additions & 11 deletionsObjects/unicodeobject.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -6432,20 +6432,23 @@ PyUnicode_AsUTF16String(PyObject *unicode)
static_PyUnicode_Name_CAPI*ucnhash_capi=NULL;

PyObject*
_PyUnicode_DecodeUnicodeEscapeInternal(constchar*s,
_PyUnicode_DecodeUnicodeEscapeInternal2(constchar*s,
Py_ssize_tsize,
constchar*errors,
Py_ssize_t*consumed,
constchar**first_invalid_escape)
int*first_invalid_escape_char,
constchar**first_invalid_escape_ptr)
{
constchar*starts=s;
constchar*initial_starts=starts;
_PyUnicodeWriterwriter;
constchar*end;
PyObject*errorHandler=NULL;
PyObject*exc=NULL;

// so we can remember if we've seen an invalid escape char or not
*first_invalid_escape=NULL;
*first_invalid_escape_char=-1;
*first_invalid_escape_ptr=NULL;

if (size==0) {
if (consumed) {
Expand DownExpand Up@@ -6628,9 +6631,12 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
gotoerror;

default:
if (*first_invalid_escape==NULL) {
*first_invalid_escape=s-1;/* Back up one char, since we've
already incremented s. */
if (*first_invalid_escape_char==-1) {
*first_invalid_escape_char=c;
if (starts==initial_starts) {
/* Back up one char, since we've already incremented s. */
*first_invalid_escape_ptr=s-1;
}
}
WRITE_ASCII_CHAR('\\');
WRITE_CHAR(c);
Expand DownExpand Up@@ -6669,22 +6675,39 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
returnNULL;
}

// Export for binary compatibility.
PyObject*
_PyUnicode_DecodeUnicodeEscapeInternal(constchar*s,
Py_ssize_tsize,
constchar*errors,
Py_ssize_t*consumed,
constchar**first_invalid_escape)
{
intfirst_invalid_escape_char;
return_PyUnicode_DecodeUnicodeEscapeInternal2(
s,size,errors,consumed,
&first_invalid_escape_char,
first_invalid_escape);
}

PyObject*
_PyUnicode_DecodeUnicodeEscapeStateful(constchar*s,
Py_ssize_tsize,
constchar*errors,
Py_ssize_t*consumed)
{
constchar*first_invalid_escape;
PyObject*result=_PyUnicode_DecodeUnicodeEscapeInternal(s,size,errors,
intfirst_invalid_escape_char;
constchar*first_invalid_escape_ptr;
PyObject*result=_PyUnicode_DecodeUnicodeEscapeInternal2(s,size,errors,
consumed,
&first_invalid_escape);
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (result==NULL)
returnNULL;
if (first_invalid_escape!=NULL) {
if (first_invalid_escape_char!=-1) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning,1,
"invalid escape sequence '\\%c'",
(unsignedchar)*first_invalid_escape)<0) {
first_invalid_escape_char)<0) {
Py_DECREF(result);
returnNULL;
}
Expand Down
26 changes: 16 additions & 10 deletionsParser/string_parser.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -114,12 +114,15 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
len=p-buf;
s=buf;

constchar*first_invalid_escape;
v=_PyUnicode_DecodeUnicodeEscapeInternal(s,len,NULL,NULL,&first_invalid_escape);

if (v!=NULL&&first_invalid_escape!=NULL) {
if (warn_invalid_escape_sequence(parser,*first_invalid_escape,t)<0) {
/* We have not decref u before because first_invalid_escape points
intfirst_invalid_escape_char;
constchar*first_invalid_escape_ptr;
v=_PyUnicode_DecodeUnicodeEscapeInternal2(s, (Py_ssize_t)len,NULL,NULL,
&first_invalid_escape_char,
&first_invalid_escape_ptr);

if (v!=NULL&&first_invalid_escape_ptr!=NULL) {
if (warn_invalid_escape_sequence(parser,*first_invalid_escape_ptr,t)<0) {
/* We have not decref u before because first_invalid_escape_ptr points
inside u. */
Py_XDECREF(u);
Py_DECREF(v);
Expand All@@ -133,14 +136,17 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t)
staticPyObject*
decode_bytes_with_escapes(Parser*p,constchar*s,Py_ssize_tlen,Token*t)
{
constchar*first_invalid_escape;
PyObject*result=_PyBytes_DecodeEscape(s,len,NULL,&first_invalid_escape);
intfirst_invalid_escape_char;
constchar*first_invalid_escape_ptr;
PyObject*result=_PyBytes_DecodeEscape2(s,len,NULL,
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (result==NULL) {
returnNULL;
}

if (first_invalid_escape!=NULL) {
if (warn_invalid_escape_sequence(p,*first_invalid_escape,t)<0) {
if (first_invalid_escape_ptr!=NULL) {
if (warn_invalid_escape_sequence(p,*first_invalid_escape_ptr,t)<0) {
Py_DECREF(result);
returnNULL;
}
Expand Down
Loading

[8]ページ先頭

©2009-2025 Movatter.jp