Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita380dc6

Browse files
[3.13]gh-124130: Increase test coverage for \b and \B in regular expressions (GH-124330) (GH-124413)
(cherry picked from commitb82f076)Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent566983d commita380dc6

File tree

1 file changed

+113
-7
lines changed

1 file changed

+113
-7
lines changed

‎Lib/test/test_re.py‎

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
884884
self.checkPatternError(br'\N{LESS-THAN SIGN}',r'bad escape \N',0)
885885
self.checkPatternError(br'[\N{LESS-THAN SIGN}]',r'bad escape \N',1)
886886

887-
deftest_string_boundaries(self):
887+
deftest_word_boundaries(self):
888888
# See http://bugs.python.org/issue10713
889-
self.assertEqual(re.search(r"\b(abc)\b","abc").group(1),
890-
"abc")
889+
self.assertEqual(re.search(r"\b(abc)\b","abc").group(1),"abc")
890+
self.assertEqual(re.search(r"\b(abc)\b","abc",re.ASCII).group(1),"abc")
891+
self.assertEqual(re.search(br"\b(abc)\b",b"abc").group(1),b"abc")
892+
self.assertEqual(re.search(br"\b(abc)\b",b"abc",re.LOCALE).group(1),b"abc")
893+
self.assertEqual(re.search(r"\b(ьюя)\b","ьюя").group(1),"ьюя")
894+
self.assertIsNone(re.search(r"\b(ьюя)\b","ьюя",re.ASCII))
895+
# There's a word boundary between a word and a non-word.
896+
self.assertTrue(re.match(r".\b","a="))
897+
self.assertTrue(re.match(r".\b","a=",re.ASCII))
898+
self.assertTrue(re.match(br".\b",b"a="))
899+
self.assertTrue(re.match(br".\b",b"a=",re.LOCALE))
900+
self.assertTrue(re.match(r".\b","я="))
901+
self.assertIsNone(re.match(r".\b","я=",re.ASCII))
902+
# There's a word boundary between a non-word and a word.
903+
self.assertTrue(re.match(r".\b","=a"))
904+
self.assertTrue(re.match(r".\b","=a",re.ASCII))
905+
self.assertTrue(re.match(br".\b",b"=a"))
906+
self.assertTrue(re.match(br".\b",b"=a",re.LOCALE))
907+
self.assertTrue(re.match(r".\b","=я"))
908+
self.assertIsNone(re.match(r".\b","=я",re.ASCII))
909+
# There is no word boundary inside a word.
910+
self.assertIsNone(re.match(r".\b","ab"))
911+
self.assertIsNone(re.match(r".\b","ab",re.ASCII))
912+
self.assertIsNone(re.match(br".\b",b"ab"))
913+
self.assertIsNone(re.match(br".\b",b"ab",re.LOCALE))
914+
self.assertIsNone(re.match(r".\b","юя"))
915+
self.assertIsNone(re.match(r".\b","юя",re.ASCII))
916+
# There is no word boundary between a non-word characters.
917+
self.assertIsNone(re.match(r".\b","=-"))
918+
self.assertIsNone(re.match(r".\b","=-",re.ASCII))
919+
self.assertIsNone(re.match(br".\b",b"=-"))
920+
self.assertIsNone(re.match(br".\b",b"=-",re.LOCALE))
921+
# There is no non-boundary match between a word and a non-word.
922+
self.assertIsNone(re.match(r".\B","a="))
923+
self.assertIsNone(re.match(r".\B","a=",re.ASCII))
924+
self.assertIsNone(re.match(br".\B",b"a="))
925+
self.assertIsNone(re.match(br".\B",b"a=",re.LOCALE))
926+
self.assertIsNone(re.match(r".\B","я="))
927+
self.assertTrue(re.match(r".\B","я=",re.ASCII))
928+
# There is no non-boundary match between a non-word and a word.
929+
self.assertIsNone(re.match(r".\B","=a"))
930+
self.assertIsNone(re.match(r".\B","=a",re.ASCII))
931+
self.assertIsNone(re.match(br".\B",b"=a"))
932+
self.assertIsNone(re.match(br".\B",b"=a",re.LOCALE))
933+
self.assertIsNone(re.match(r".\B","=я"))
934+
self.assertTrue(re.match(r".\B","=я",re.ASCII))
935+
# There's a non-boundary match inside a word.
936+
self.assertTrue(re.match(r".\B","ab"))
937+
self.assertTrue(re.match(r".\B","ab",re.ASCII))
938+
self.assertTrue(re.match(br".\B",b"ab"))
939+
self.assertTrue(re.match(br".\B",b"ab",re.LOCALE))
940+
self.assertTrue(re.match(r".\B","юя"))
941+
self.assertTrue(re.match(r".\B","юя",re.ASCII))
942+
# There's a non-boundary match between a non-word characters.
943+
self.assertTrue(re.match(r".\B","=-"))
944+
self.assertTrue(re.match(r".\B","=-",re.ASCII))
945+
self.assertTrue(re.match(br".\B",b"=-"))
946+
self.assertTrue(re.match(br".\B",b"=-",re.LOCALE))
891947
# There's a word boundary at the start of a string.
892948
self.assertTrue(re.match(r"\b","abc"))
949+
self.assertTrue(re.match(r"\b","abc",re.ASCII))
950+
self.assertTrue(re.match(br"\b",b"abc"))
951+
self.assertTrue(re.match(br"\b",b"abc",re.LOCALE))
952+
self.assertTrue(re.match(r"\b","ьюя"))
953+
self.assertIsNone(re.match(r"\b","ьюя",re.ASCII))
954+
# There's a word boundary at the end of a string.
955+
self.assertTrue(re.fullmatch(r".+\b","abc"))
956+
self.assertTrue(re.fullmatch(r".+\b","abc",re.ASCII))
957+
self.assertTrue(re.fullmatch(br".+\b",b"abc"))
958+
self.assertTrue(re.fullmatch(br".+\b",b"abc",re.LOCALE))
959+
self.assertTrue(re.fullmatch(r".+\b","ьюя"))
960+
self.assertIsNone(re.search(r"\b","ьюя",re.ASCII))
893961
# A non-empty string includes a non-boundary zero-length match.
894-
self.assertTrue(re.search(r"\B","abc"))
962+
self.assertEqual(re.search(r"\B","abc").span(), (1,1))
963+
self.assertEqual(re.search(r"\B","abc",re.ASCII).span(), (1,1))
964+
self.assertEqual(re.search(br"\B",b"abc").span(), (1,1))
965+
self.assertEqual(re.search(br"\B",b"abc",re.LOCALE).span(), (1,1))
966+
self.assertEqual(re.search(r"\B","ьюя").span(), (1,1))
967+
self.assertEqual(re.search(r"\B","ьюя",re.ASCII).span(), (0,0))
895968
# There is no non-boundary match at the start of a string.
896-
self.assertFalse(re.match(r"\B","abc"))
969+
self.assertIsNone(re.match(r"\B","abc"))
970+
self.assertIsNone(re.match(r"\B","abc",re.ASCII))
971+
self.assertIsNone(re.match(br"\B",b"abc"))
972+
self.assertIsNone(re.match(br"\B",b"abc",re.LOCALE))
973+
self.assertIsNone(re.match(r"\B","ьюя"))
974+
self.assertTrue(re.match(r"\B","ьюя",re.ASCII))
975+
# There is no non-boundary match at the end of a string.
976+
self.assertIsNone(re.fullmatch(r".+\B","abc"))
977+
self.assertIsNone(re.fullmatch(r".+\B","abc",re.ASCII))
978+
self.assertIsNone(re.fullmatch(br".+\B",b"abc"))
979+
self.assertIsNone(re.fullmatch(br".+\B",b"abc",re.LOCALE))
980+
self.assertIsNone(re.fullmatch(r".+\B","ьюя"))
981+
self.assertTrue(re.fullmatch(r".+\B","ьюя",re.ASCII))
897982
# However, an empty string contains no word boundaries, and also no
898983
# non-boundaries.
899-
self.assertIsNone(re.search(r"\B",""))
984+
self.assertIsNone(re.search(r"\b",""))
985+
self.assertIsNone(re.search(r"\b","",re.ASCII))
986+
self.assertIsNone(re.search(br"\b",b""))
987+
self.assertIsNone(re.search(br"\b",b"",re.LOCALE))
900988
# This one is questionable and different from the perlre behaviour,
901989
# but describes current behavior.
902-
self.assertIsNone(re.search(r"\b",""))
990+
self.assertIsNone(re.search(r"\B",""))
991+
self.assertIsNone(re.search(r"\B","",re.ASCII))
992+
self.assertIsNone(re.search(br"\B",b""))
993+
self.assertIsNone(re.search(br"\B",b"",re.LOCALE))
903994
# A single word-character string has two boundaries, but no
904995
# non-boundary gaps.
905996
self.assertEqual(len(re.findall(r"\b","a")),2)
997+
self.assertEqual(len(re.findall(r"\b","a",re.ASCII)),2)
998+
self.assertEqual(len(re.findall(br"\b",b"a")),2)
999+
self.assertEqual(len(re.findall(br"\b",b"a",re.LOCALE)),2)
9061000
self.assertEqual(len(re.findall(r"\B","a")),0)
1001+
self.assertEqual(len(re.findall(r"\B","a",re.ASCII)),0)
1002+
self.assertEqual(len(re.findall(br"\B",b"a")),0)
1003+
self.assertEqual(len(re.findall(br"\B",b"a",re.LOCALE)),0)
9071004
# If there are no words, there are no boundaries
9081005
self.assertEqual(len(re.findall(r"\b"," ")),0)
1006+
self.assertEqual(len(re.findall(r"\b"," ",re.ASCII)),0)
1007+
self.assertEqual(len(re.findall(br"\b",b" ")),0)
1008+
self.assertEqual(len(re.findall(br"\b",b" ",re.LOCALE)),0)
9091009
self.assertEqual(len(re.findall(r"\b"," ")),0)
1010+
self.assertEqual(len(re.findall(r"\b"," ",re.ASCII)),0)
1011+
self.assertEqual(len(re.findall(br"\b",b" ")),0)
1012+
self.assertEqual(len(re.findall(br"\b",b" ",re.LOCALE)),0)
9101013
# Can match around the whitespace.
9111014
self.assertEqual(len(re.findall(r"\B"," ")),2)
1015+
self.assertEqual(len(re.findall(r"\B"," ",re.ASCII)),2)
1016+
self.assertEqual(len(re.findall(br"\B",b" ")),2)
1017+
self.assertEqual(len(re.findall(br"\B",b" ",re.LOCALE)),2)
9121018

9131019
deftest_bigcharset(self):
9141020
self.assertEqual(re.match("([\u2222\u2223])",

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp