Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

[3.10] gh-98740: Fix validation of conditional expressions in RE (GH-98764)#99046

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Merged
Show file tree
Hide file tree
Changes fromall commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[3.10]gh-98740: Fix validation of conditional expressions in RE (GH-…
…98764)In very rare circumstances the JUMP opcode could be confused with theargument of the opcode in the "then" part which doesn't end with theJUMP opcode. This led to incorrect detection of the final JUMP opcodeand incorrect calculation of the size of the subexpression.NOTE: Changed return value of functions _validate_inner() and_validate_charset() in Modules/_sre/sre.c.  Now they return 0 on success,-1 on failure, and 1 if the last op is JUMP (which usually is a failure).Previously they returned 1 on success and 0 on failure..(cherry picked from commite9ac890)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
  • Loading branch information
@serhiy-storchaka
serhiy-storchaka committedNov 3, 2022
commitc2d02e9828b4db1138fd6302a2d61d789780fc4e
3 changes: 3 additions & 0 deletionsDoc/library/re.rst
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -421,6 +421,9 @@ The special characters are:
some fixed length. Patterns which start with negative lookbehind assertions may
match at the beginning of the string being searched.

.. _re-conditional-expression:
.. index:: single: (?(; in regular expressions

``(?(id/name)yes-pattern|no-pattern)``
Will try to match with ``yes-pattern`` if the group with given *id* or
*name* exists, and with ``no-pattern`` if it doesn't. ``no-pattern`` is
Expand Down
5 changes: 5 additions & 0 deletionsLib/test/test_re.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -578,6 +578,11 @@ def test_re_groupref_exists_errors(self):
self.checkPatternError(r'()(?(2)a)',
"invalid group reference 2", 5)

def test_re_groupref_exists_validation_bug(self):
for i in range(256):
with self.subTest(code=i):
re.compile(r'()(?(1)\x%02x?)' % i)

def test_re_groupref_overflow(self):
from sre_constants import MAXGROUPS
self.checkTemplateError('()', r'\g<%s>' % MAXGROUPS, 'xx',
Expand Down
View file
Open in desktop
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
Fix internal error in the :mod:`re` module which in very rare circumstances
prevented compilation of a regular expression containing a :ref:`conditional
expression <re-conditional-expression>` without the "else" branch.
54 changes: 28 additions & 26 deletionsModules/_sre.c
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1519,7 +1519,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
#endif

/* Report failure */
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return0; } while (0)
#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return-1; } while (0)

/* Extract opcode, argument, or skip count from code array */
#define GET_OP \
Expand All@@ -1543,7 +1543,7 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
skip = *code; \
VTRACE(("%lu (skip to %p)\n", \
(unsigned long)skip, code+skip)); \
if (skip-adj > (uintptr_t)(end - code)) \
if (skip-adj > (uintptr_t)(end - code))\
FAIL; \
code++; \
} while (0)
Expand DownExpand Up@@ -1632,9 +1632,10 @@ _validate_charset(SRE_CODE *code, SRE_CODE *end)
}
}

return1;
return0;
}

/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */
static int
_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
{
Expand DownExpand Up@@ -1712,7 +1713,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
case SRE_OP_IN_LOC_IGNORE:
GET_SKIP;
/* Stop 1 before the end; we check the FAILURE below */
if (!_validate_charset(code, code+skip-2))
if (_validate_charset(code, code+skip-2))
FAIL;
if (code[skip-2] != SRE_OP_FAILURE)
FAIL;
Expand DownExpand Up@@ -1766,7 +1767,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
}
/* Validate the charset */
if (flags & SRE_INFO_CHARSET) {
if (!_validate_charset(code, newcode-1))
if (_validate_charset(code, newcode-1))
FAIL;
if (newcode[-1] != SRE_OP_FAILURE)
FAIL;
Expand All@@ -1787,7 +1788,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (skip == 0)
break;
/* Stop 2 before the end; we check the JUMP below */
if (!_validate_inner(code, code+skip-3, groups))
if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
/* Check that it ends with a JUMP, and that each JUMP
Expand All@@ -1801,6 +1802,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
else if (code+skip-1 != target)
FAIL;
}
if (code != target)
FAIL;
}
break;

Expand All@@ -1815,7 +1818,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-4, groups))
if (_validate_inner(code, code+skip-4, groups))
FAIL;
code += skip-4;
GET_OP;
Expand All@@ -1834,7 +1837,7 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
FAIL;
if (max > SRE_MAXREPEAT)
FAIL;
if (!_validate_inner(code, code+skip-3, groups))
if (_validate_inner(code, code+skip-3, groups))
FAIL;
code += skip-3;
GET_OP;
Expand DownExpand Up@@ -1886,24 +1889,17 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
to allow arbitrary jumps anywhere in the code; so we just look
for a JUMP opcode preceding our skip target.
*/
if (skip >= 3 && skip-3 < (uintptr_t)(end - code) &&
code[skip-3] == SRE_OP_JUMP)
{
VTRACE(("both then and else parts present\n"));
if (!_validate_inner(code+1, code+skip-3, groups))
FAIL;
VTRACE(("then part:\n"));
int rc = _validate_inner(code+1, code+skip-1, groups);
if (rc == 1) {
VTRACE(("else part:\n"));
code += skip-2; /* Position after JUMP, at <skipno> */
GET_SKIP;
if (!_validate_inner(code, code+skip-1, groups))
FAIL;
code += skip-1;
}
else {
VTRACE(("only a then part present\n"));
if (!_validate_inner(code+1, code+skip-1, groups))
FAIL;
code += skip-1;
rc = _validate_inner(code, code+skip-1, groups);
}
if (rc)
FAIL;
code += skip-1;
break;

case SRE_OP_ASSERT:
Expand All@@ -1914,22 +1910,28 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
if (arg & 0x80000000)
FAIL; /* Width too large */
/* Stop 1 before the end; we check the SUCCESS below */
if (!_validate_inner(code+1, code+skip-2, groups))
if (_validate_inner(code+1, code+skip-2, groups))
FAIL;
code += skip-2;
GET_OP;
if (op != SRE_OP_SUCCESS)
FAIL;
break;

case SRE_OP_JUMP:
if (code + 1 != end)
FAIL;
VTRACE(("JUMP: %d\n", __LINE__));
return 1;

default:
FAIL;

}
}

VTRACE(("okay\n"));
return1;
return0;
}

static int
Expand All@@ -1944,7 +1946,7 @@ _validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
static int
_validate(PatternObject *self)
{
if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
if (_validate_outer(self->code, self->code+self->codesize, self->groups))
{
PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
return 0;
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp