Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork32k
gh-133546: Makere.Match
a well-roundedSequence
type#133549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
base:main
Are you sure you want to change the base?
Changes fromall commits
74480a7
a3de846
603b1d1
70b73e4
f51ef45
f218828
5272141
d0aa6fa
5f67be0
17feaa6
fe709f8
4095b52
File filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1378,6 +1378,27 @@ when there is no match, you can test whether there was a match with a simple | ||
if match: | ||
process(match) | ||
Match objects are proper :class:`~collections.abc.Sequence` types. You can access | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. This is not true with this PR, Sequence has a number of other requirements (e.g. an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Nice catch, thanks! I added | ||
match groups via subscripting ``match[...]`` and use familiar | ||
:class:`~collections.abc.Sequence` idioms to iterate over and extract match groups:: | ||
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") | ||
>>> m[1] | ||
"Isaac" | ||
>>> list(m) | ||
["Isaac Newton", "Isaac", "Newton"] | ||
>>> _, first_name, last_name = m | ||
>>> last_name | ||
"Newton" | ||
You can also destructure match objects with python's ``match`` statement:: | ||
>>> match re.match(r"(\d+)-(\d+)-(\d+)", "2000-10-16"): | ||
... case [_, year, month, day]: | ||
... year | ||
... | ||
"2000" | ||
.. class:: Match | ||
Match object returned by successful ``match``\ es and ``search``\ es. | ||
@@ -1473,6 +1494,37 @@ when there is no match, you can test whether there was a match with a simple | ||
.. versionadded:: 3.6 | ||
.. versionchanged:: next | ||
Negative indexing is now supported. This allows accessing match groups | ||
from the end, starting from the last group defined in the pattern:: | ||
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") | ||
>>> m[-1] # The first parenthesized subgroup starting from the end. | ||
'Newton' | ||
>>> m[-2] # The second parenthesized subgroup starting from the end. | ||
'Isaac' | ||
>>> m[-3] # The entire match starting from the end. | ||
'Isaac Newton' | ||
You can also use slicing to extract multiple groups as a tuple:: | ||
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") | ||
>>> m[1:] | ||
('Isaac', 'Newton') | ||
.. method:: Match.__len__() | ||
Returns the number of groups accessible through the subscript syntax provided by | ||
:meth:`~Match.__getitem__`. This includes group ``0`` representing the entire match:: | ||
>>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") | ||
>>> len(m) | ||
3 | ||
.. versionadded:: next | ||
.. method:: Match.groups(default=None) | ||
@@ -1538,6 +1590,19 @@ when there is no match, you can test whether there was a match with a simple | ||
that if *group* did not contribute to the match, this is ``(-1, -1)``. | ||
*group* defaults to zero, the entire match. | ||
.. method:: Match.index(value, start=0, stop=sys.maxsize, /) | ||
picnixz marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
Return the index of the first occurrence of the value among the matched groups. | ||
Raises :exc:`ValueError` if the value is not present. | ||
.. versionadded:: next | ||
.. method:: Match.count(value, /) | ||
Return the number of occurrences of the value among the matched groups. | ||
.. versionadded:: next | ||
.. attribute:: Match.pos | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -8,6 +8,7 @@ | ||
import sys | ||
import unittest | ||
import warnings | ||
from collections.abc import Sequence | ||
from re import Scanner | ||
from weakref import proxy | ||
@@ -570,10 +571,14 @@ def test_match_getitem(self): | ||
self.assertEqual(m[1], 'a') | ||
self.assertEqual(m[2], None) | ||
self.assertEqual(m[3], None) | ||
self.assertEqual(m[-1], None) | ||
self.assertEqual(m[-2], None) | ||
self.assertEqual(m[-3], 'a') | ||
self.assertEqual(m[-4], 'a') | ||
with self.assertRaisesRegex(IndexError, 'no such group'): | ||
m['X'] | ||
with self.assertRaisesRegex(IndexError, 'no such group'): | ||
m[-5] | ||
with self.assertRaisesRegex(IndexError, 'no such group'): | ||
m[4] | ||
with self.assertRaisesRegex(IndexError, 'no such group'): | ||
@@ -594,13 +599,156 @@ def test_match_getitem(self): | ||
self.assertEqual(m[1], 'a') | ||
self.assertEqual(m[2], None) | ||
self.assertEqual(m[3], 'c') | ||
self.assertEqual(m[-1], 'c') | ||
self.assertEqual(m[-2], None) | ||
self.assertEqual(m[-3], 'a') | ||
self.assertEqual(m[-4], 'ac') | ||
# Cannot assign. | ||
with self.assertRaises(TypeError): | ||
m[0] = 1 | ||
def test_match_getitem_slice(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
self.assertEqual(m[:0], ()) | ||
self.assertEqual(m[:1], ("abc",)) | ||
self.assertEqual(m[:2], ("abc", "a")) | ||
self.assertEqual(m[:3], ("abc", "a", "b")) | ||
self.assertEqual(m[:4], ("abc", "a", "b", "c")) | ||
self.assertEqual(m[0:], ("abc", "a", "b", "c")) | ||
self.assertEqual(m[1:], ("a", "b", "c")) | ||
self.assertEqual(m[2:], ("b", "c")) | ||
self.assertEqual(m[3:], ("c",)) | ||
self.assertEqual(m[4:], ()) | ||
self.assertEqual(m[:-4], ()) | ||
self.assertEqual(m[:-3], ("abc",)) | ||
self.assertEqual(m[:-2], ("abc", "a")) | ||
self.assertEqual(m[:-1], ("abc", "a", "b")) | ||
self.assertEqual(m[-4:], ("abc", "a", "b", "c")) | ||
self.assertEqual(m[-3:], ("a", "b", "c")) | ||
self.assertEqual(m[-2:], ("b", "c")) | ||
self.assertEqual(m[-1:], ("c",)) | ||
self.assertEqual(m[1:-1], ("a", "b")) | ||
self.assertEqual(m[::-1], ("c", "b", "a", "abc")) | ||
Comment on lines +631 to +632 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Let's check more slices. | ||
self.assertEqual(m[::4], ("abc",)) | ||
self.assertEqual(m[2:2], ()) | ||
self.assertEqual(m[3:1], ()) | ||
self.assertEqual(m[1:3], ("a", "b")) | ||
self.assertEqual(m[-1::-2], ("c", "a")) | ||
def test_match_sequence(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
self.assertIsInstance(m, Sequence) | ||
self.assertEqual(len(m), 4) | ||
self.assertEqual(tuple(m), ("abc", "a", "b", "c")) | ||
self.assertEqual(list(m), ["abc", "a", "b", "c"]) | ||
abc, a, b, c = m | ||
self.assertEqual(abc, "abc") | ||
self.assertEqual(a, "a") | ||
self.assertEqual(b, "b") | ||
self.assertEqual(c, "c") | ||
self.assertIn("abc", m) | ||
self.assertIn("a", m) | ||
self.assertIn("b", m) | ||
self.assertIn("c", m) | ||
self.assertNotIn("123", m) | ||
self.assertEqual(list(reversed(m)), ["c", "b", "a", "abc"]) | ||
for s, k, v in re.finditer(r"(\w+):(\w+)", "abc:123"): | ||
self.assertEqual(s, "abc:123") | ||
self.assertEqual(k, "abc") | ||
self.assertEqual(v, "123") | ||
def test_match_iter(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
it = iter(m) | ||
self.assertEqual(next(it), "abc") | ||
self.assertEqual(next(it), "a") | ||
self.assertEqual(next(it), "b") | ||
self.assertEqual(next(it), "c") | ||
with self.assertRaises(StopIteration): | ||
next(it) | ||
def test_match_index(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
self.assertEqual(m.index("abc"), 0) | ||
self.assertEqual(m.index("a"), 1) | ||
self.assertEqual(m.index("b"), 2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Let's test with start and stop values as well. As well as non-string values. | ||
self.assertEqual(m.index("c"), 3) | ||
self.assertRaises(ValueError, m.index, "123") | ||
# With start index. | ||
self.assertRaises(ValueError, m.index, "abc", 1) | ||
self.assertEqual(m.index("a", 1), 1) | ||
self.assertEqual(m.index("b", 1), 2) | ||
self.assertEqual(m.index("c", 1), 3) | ||
self.assertRaises(ValueError, m.index, "123", 1) | ||
self.assertRaises(ValueError, m.index, "abc", 2) | ||
self.assertRaises(ValueError, m.index, "a", 2) | ||
self.assertEqual(m.index("b", 2), 2) | ||
self.assertEqual(m.index("c", 2), 3) | ||
self.assertRaises(ValueError, m.index, "123", 2) | ||
self.assertRaises(ValueError, m.index, "abc", 3) | ||
self.assertRaises(ValueError, m.index, "a", 3) | ||
self.assertRaises(ValueError, m.index, "b", 3) | ||
self.assertEqual(m.index("c", 3), 3) | ||
self.assertRaises(ValueError, m.index, "123", 3) | ||
self.assertRaises(ValueError, m.index, "abc", 4) | ||
self.assertRaises(ValueError, m.index, "a", 4) | ||
self.assertRaises(ValueError, m.index, "b", 4) | ||
self.assertRaises(ValueError, m.index, "c", 4) | ||
self.assertRaises(ValueError, m.index, "123", 4) | ||
# With start index and stop index. | ||
self.assertRaises(ValueError, m.index, "b", 0, 2) | ||
self.assertEqual(m.index("b", 1, 3), 2) | ||
self.assertEqual(m.index("b", 2, 4), 2) | ||
self.assertRaises(ValueError, m.index, "b", 3, 4) | ||
self.assertRaises(ValueError, m.index, "b", -1, 0) | ||
# Non-string objects. | ||
self.assertRaises(ValueError, m.index, 123) | ||
self.assertRaises(ValueError, m.index, [1, 2, 3]) | ||
self.assertRaises(ValueError, m.index, object()) | ||
def test_match_count(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
self.assertEqual(m.count("abc"), 1) | ||
self.assertEqual(m.count("a"), 1) | ||
self.assertEqual(m.count("b"), 1) | ||
self.assertEqual(m.count("c"), 1) | ||
self.assertEqual(m.count("123"), 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others.Learn more. Let's check with non-string objects. | ||
# Non-string objects. | ||
self.assertEqual(m.count(123), 0) | ||
self.assertEqual(m.count([1, 2, 3]), 0) | ||
self.assertEqual(m.count(object()), 0) | ||
def test_match_match_case(self): | ||
m = re.match(r"(a)(b)(c)", "abc") | ||
match m: | ||
case [abc, "a", "b", "c"]: | ||
self.assertEqual(abc, "abc") | ||
case _: | ||
self.fail() | ||
match re.match(r"(\d+)-(\d+)-(\d+)", "2025-05-07"): | ||
case [date, year, month, day]: | ||
self.assertEqual(date, "2025-05-07") | ||
self.assertEqual(year, "2025") | ||
self.assertEqual(month, "05") | ||
self.assertEqual(day, "07") | ||
case _: | ||
self.fail() | ||
def test_re_fullmatch(self): | ||
# Issue 16203: Proposal: add re.fullmatch() method. | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Make :class:`re.Match` a well-rounded :class:`~collections.abc.Sequence` | ||
type. |
Some generated files are not rendered by default. Learn more abouthow customized files appear on GitHub.
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.
Uh oh!
There was an error while loading.Please reload this page.