Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

[3.12] Fix use-after-free in the unicode-escape decoder with error handler (GH-133767)#134255

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Open
mcepl wants to merge1 commit intopython:3.12
base:3.12
Choose a base branch
Loading
fromopenSUSE-Python:CVE-2025-4516-DecodeError-handler
Open
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletionsInclude/internal/pycore_bytesobject.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -8,6 +8,11 @@ extern "C" {
# error "this header requires Py_BUILD_CORE define"
#endif

// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
// Export for test_peg_generator.
PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
const char *,
int *, const char **);

/* Substring Search.

Expand Down
16 changes: 16 additions & 0 deletionsInclude/internal/pycore_unicodeobject.h
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -79,6 +79,22 @@ extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
// Like PyUnicode_AsUTF8(), but check for embedded null characters.
extern const char* _PyUnicode_AsUTF8NoNUL(PyObject *);

// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
// chars.
// Export for test_peg_generator.
PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2(
const char *string, /* Unicode-Escape encoded string */
Py_ssize_t length, /* size of string */
const char *errors, /* error handling */
Py_ssize_t *consumed, /* bytes consumed */
int *first_invalid_escape_char, /* on return, if not -1, contain the first
invalid escaped char (<= 0xff) or invalid
octal escape (> 0xff) in string. */
const char **first_invalid_escape_ptr); /* on return, if not NULL, may
point to the first invalid escaped
char in string.
May be NULL if errors is not NULL. */


#ifdef __cplusplus
}
Expand Down
39 changes: 38 additions & 1 deletionLib/test/test_codeccallbacks.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
import codecs
import html.entities
import itertools
import re
import sys
import unicodedata
import unittest
Expand DownExpand Up@@ -1124,7 +1125,7 @@ def test_bug828737(self):
text = 'abc<def>ghi'*n
text.translate(charmap)

deftest_mutatingdecodehandler(self):
deftest_mutating_decode_handler(self):
baddata = [
("ascii", b"\xff"),
("utf-7", b"++"),
Expand DownExpand Up@@ -1159,6 +1160,42 @@ def mutating(exc):
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")

def test_mutating_decode_handler_unicode_escape(self):
decode = codecs.unicode_escape_decode
def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
r = data.get(exc.object[:exc.end])
if r is not None:
exc.object = r[0] + exc.object[exc.end:]
return ('\u0404', r[1])
raise AssertionError("don't know how to handle %r" % exc)

codecs.register_error('test.mutating2', mutating)
data = {
br'\x0': (b'\\', 0),
br'\x3': (b'xxx\\', 3),
br'\x5': (b'x\\', 1),
}
def check(input, expected, msg):
with self.assertWarns(DeprecationWarning) as cm:
self.assertEqual(decode(input, 'test.mutating2'), (expected, len(input)))
self.assertIn(msg, str(cm.warning))

check(br'\x0n\z', '\u0404\n\\z', r"invalid escape sequence")
check(br'\x0n\501', '\u0404\n\u0141', r'invalid octal escape sequence')
check(br'\x0z', '\u0404\\z', r'invalid escape sequence')

check(br'\x3n\zr', '\u0404\n\\zr', r'invalid escape sequence')
check(br'\x3zr', '\u0404\\zr', r'invalid escape sequence')
check(br'\x3z5', '\u0404\\z5', r'invalid escape sequence')
check(memoryview(br'\x3z5x')[:-1], '\u0404\\z5', r'invalid escape sequence')
check(memoryview(br'\x3z5xy')[:-2], '\u0404\\z5', r'invalid escape sequence')

check(br'\x5n\z', '\u0404\n\\z', r'invalid escape sequence')
check(br'\x5n\501', '\u0404\n\u0141', r'invalid octal escape sequence')
check(br'\x5z', '\u0404\\z', r'invalid escape sequence')
check(memoryview(br'\x5zy')[:-1], '\u0404\\z', r'invalid escape sequence')

# issue32583
def test_crashing_decode_handler(self):
# better generating one more character to fill the extra space slot
Expand Down
52 changes: 42 additions & 10 deletionsLib/test/test_codecs.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1196,23 +1196,39 @@ def test_escape(self):
check(br"[\1010]", b"[A0]")
check(br"[\x41]", b"[A]")
check(br"[\x410]", b"[A0]")

def test_warnings(self):
decode = codecs.escape_decode
check = coding_checker(self, decode)
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtvx':
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r"'\\%c' is an invalid escape sequence" % i):
check(b"\\" + b, b"\\" + b)
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r"invalid escape sequence"):
check(b"\\" + b.upper(), b"\\" + b.upper())
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r"'\\8' is an invalid escape sequence"):
check(br"\8", b"\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", b"\\9")
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence') as cm:
check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid octal escape sequence'):
check(rb'\%o' % i, bytes([i & 0o377]))

with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence'):
self.assertEqual(decode(br'\x\z', 'ignore'), (b'\\z', 4))
with self.assertWarnsRegex(DeprecationWarning,
r'invalid octal escape sequence'):
self.assertEqual(decode(br'\x\501', 'ignore'), (b'A', 6))

def test_errors(self):
decode = codecs.escape_decode
self.assertRaises(ValueError, decode, br"\x")
Expand DownExpand Up@@ -2479,24 +2495,40 @@ def test_escape_decode(self):
check(br"[\x410]", "[A0]")
check(br"\u20ac", "\u20ac")
check(br"\U0001d120", "\U0001d120")

def test_decode_warnings(self):
decode = codecs.unicode_escape_decode
check = coding_checker(self, decode)
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtuvx':
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence'):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
'invalid escape sequence'):
check(b"\\" + b.upper(), "\\" + chr(i-32))
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence'):
check(br"\8", "\\8")
with self.assertWarns(DeprecationWarning):
check(br"\9", "\\9")
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence') as cm:
check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
with self.assertWarnsRegex(DeprecationWarning,
r'invalid octal escape sequence'):
check(rb'\%o' % i, chr(i))

with self.assertWarnsRegex(DeprecationWarning,
r'invalid escape sequence'):
self.assertEqual(decode(br'\x\z', 'ignore'), ('\\z', 4))
with self.assertWarnsRegex(DeprecationWarning,
r'invalid octal escape sequence'):
self.assertEqual(decode(br'\x\501', 'ignore'), ('\u0141', 6))

def test_decode_errors(self):
decode = codecs.unicode_escape_decode
for c, d in (b'x', 2), (b'u', 4), (b'U', 4):
Expand Down
4 changes: 2 additions & 2 deletionsLib/test/test_codeop.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -281,8 +281,8 @@ def test_filename(self):
def test_warning(self):
# Test that the warning is only returned once.
with warnings_helper.check_warnings(
('"is" with \'str\'literal', SyntaxWarning),
("invalid escape sequence", SyntaxWarning),
(r'"is" with.*literal', SyntaxWarning),
(r'invalid escape sequence', SyntaxWarning),
) as w:
compile_command(r"'\e' is 0")
self.assertEqual(len(w.warnings), 2)
Expand Down
8 changes: 4 additions & 4 deletionsLib/test/test_string_literals.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -116,7 +116,7 @@ def test_eval_str_invalid_escape(self):
warnings.simplefilter('always', category=SyntaxWarning)
eval("'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(str(w[0].message), r"'\z' is aninvalid escape sequence.")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)

Expand DownExpand Up@@ -153,7 +153,7 @@ def test_eval_str_invalid_octal_escape(self):
eval("'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
r"invalid octal escape sequence '\407'")
r"'\407' is aninvalid octal escape sequence.")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)

Expand DownExpand Up@@ -228,7 +228,7 @@ def test_eval_bytes_invalid_escape(self):
warnings.simplefilter('always', category=SyntaxWarning)
eval("b'''\n\\z'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
self.assertEqual(str(w[0].message), r"'\z' is aninvalid escape sequence.")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)

Expand All@@ -252,7 +252,7 @@ def test_eval_bytes_invalid_octal_escape(self):
eval("b'''\n\\407'''")
self.assertEqual(len(w), 1)
self.assertEqual(str(w[0].message),
r"invalid octal escape sequence '\407'")
r"'\407' is aninvalid octal escape sequence.")
self.assertEqual(w[0].filename, '<string>')
self.assertEqual(w[0].lineno, 2)

Expand Down
2 changes: 1 addition & 1 deletionLib/test/test_unparse.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -653,7 +653,7 @@ def test_multiquote_joined_string(self):

def test_backslash_in_format_spec(self):
import re
msg = re.escape("invalid escape sequence '\\ '")
msg = re.escape("invalid escape sequence")
with self.assertWarnsRegex(SyntaxWarning, msg):
self.check_ast_roundtrip("""f"{x:\\ }" """)
self.check_ast_roundtrip("""f"{x:\\n}" """)
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
Fix use-after-free in the "unicode-escape" decoder with a non-"strict" error
handler.
43 changes: 24 additions & 19 deletionsObjects/bytesobject.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1048,10 +1048,11 @@ _PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
}

/* Unescape a backslash-escaped string. */
PyObject *_PyBytes_DecodeEscape(const char *s,
PyObject *_PyBytes_DecodeEscape2(const char *s,
Py_ssize_t len,
const char *errors,
const char **first_invalid_escape)
int *first_invalid_escape_char,
const char **first_invalid_escape_ptr)
{
int c;
char *p;
Expand All@@ -1065,7 +1066,8 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
return NULL;
writer.overallocate = 1;

*first_invalid_escape = NULL;
*first_invalid_escape_char = -1;
*first_invalid_escape_ptr = NULL;

end = s + len;
while (s < end) {
Expand DownExpand Up@@ -1103,9 +1105,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
c = (c<<3) + *s++ - '0';
}
if (c > 0377) {
if (*first_invalid_escape == NULL) {
*first_invalid_escape = s-3; /* Back up 3 chars, since we've
already incremented s. */
if (*first_invalid_escape_char == -1) {
*first_invalid_escape_char = c;
/* Back up 3 chars, since we've already incremented s. */
*first_invalid_escape_ptr = s - 3;
}
}
*p++ = c;
Expand DownExpand Up@@ -1146,9 +1149,10 @@ PyObject *_PyBytes_DecodeEscape(const char *s,
break;

default:
if (*first_invalid_escape == NULL) {
*first_invalid_escape = s-1; /* Back up one char, since we've
already incremented s. */
if (*first_invalid_escape_char == -1) {
*first_invalid_escape_char = (unsigned char)s[-1];
/* Back up one char, since we've already incremented s. */
*first_invalid_escape_ptr = s - 1;
}
*p++ = '\\';
s--;
Expand All@@ -1168,26 +1172,27 @@ PyObject *PyBytes_DecodeEscape(const char *s,
Py_ssize_t Py_UNUSED(unicode),
const char *Py_UNUSED(recode_encoding))
{
const char* first_invalid_escape;
PyObject *result = _PyBytes_DecodeEscape(s, len, errors,
&first_invalid_escape);
int first_invalid_escape_char;
const char *first_invalid_escape_ptr;
PyObject *result = _PyBytes_DecodeEscape2(s, len, errors,
&first_invalid_escape_char,
&first_invalid_escape_ptr);
if (result == NULL)
return NULL;
if (first_invalid_escape != NULL) {
unsigned char c = *first_invalid_escape;
if ('4' <= c && c <= '7') {
if (first_invalid_escape_char != -1) {
if (first_invalid_escape_char > 0xff) {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid octal escape sequence '\\%.3s'",
first_invalid_escape) < 0)
"'\\%o' is aninvalid octal escape sequence.",
first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
}
}
else {
if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"invalid escape sequence'\\%c'",
c) < 0)
"'\\%c' is an invalid escape sequence.",
first_invalid_escape_char) < 0)
{
Py_DECREF(result);
return NULL;
Expand Down
Loading
Loading

[8]ページ先頭

©2009-2025 Movatter.jp