@@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
884884self .checkPatternError (br'\N{LESS-THAN SIGN}' ,r'bad escape \N' ,0 )
885885self .checkPatternError (br'[\N{LESS-THAN SIGN}]' ,r'bad escape \N' ,1 )
886886
887- def test_string_boundaries (self ):
887+ def test_word_boundaries (self ):
888888# See http://bugs.python.org/issue10713
889- self .assertEqual (re .search (r"\b(abc)\b" ,"abc" ).group (1 ),
890- "abc" )
889+ self .assertEqual (re .search (r"\b(abc)\b" ,"abc" ).group (1 ),"abc" )
890+ self .assertEqual (re .search (r"\b(abc)\b" ,"abc" ,re .ASCII ).group (1 ),"abc" )
891+ self .assertEqual (re .search (br"\b(abc)\b" ,b"abc" ).group (1 ),b"abc" )
892+ self .assertEqual (re .search (br"\b(abc)\b" ,b"abc" ,re .LOCALE ).group (1 ),b"abc" )
893+ self .assertEqual (re .search (r"\b(ьюя)\b" ,"ьюя" ).group (1 ),"ьюя" )
894+ self .assertIsNone (re .search (r"\b(ьюя)\b" ,"ьюя" ,re .ASCII ))
895+ # There's a word boundary between a word and a non-word.
896+ self .assertTrue (re .match (r".\b" ,"a=" ))
897+ self .assertTrue (re .match (r".\b" ,"a=" ,re .ASCII ))
898+ self .assertTrue (re .match (br".\b" ,b"a=" ))
899+ self .assertTrue (re .match (br".\b" ,b"a=" ,re .LOCALE ))
900+ self .assertTrue (re .match (r".\b" ,"я=" ))
901+ self .assertIsNone (re .match (r".\b" ,"я=" ,re .ASCII ))
902+ # There's a word boundary between a non-word and a word.
903+ self .assertTrue (re .match (r".\b" ,"=a" ))
904+ self .assertTrue (re .match (r".\b" ,"=a" ,re .ASCII ))
905+ self .assertTrue (re .match (br".\b" ,b"=a" ))
906+ self .assertTrue (re .match (br".\b" ,b"=a" ,re .LOCALE ))
907+ self .assertTrue (re .match (r".\b" ,"=я" ))
908+ self .assertIsNone (re .match (r".\b" ,"=я" ,re .ASCII ))
909+ # There is no word boundary inside a word.
910+ self .assertIsNone (re .match (r".\b" ,"ab" ))
911+ self .assertIsNone (re .match (r".\b" ,"ab" ,re .ASCII ))
912+ self .assertIsNone (re .match (br".\b" ,b"ab" ))
913+ self .assertIsNone (re .match (br".\b" ,b"ab" ,re .LOCALE ))
914+ self .assertIsNone (re .match (r".\b" ,"юя" ))
915+ self .assertIsNone (re .match (r".\b" ,"юя" ,re .ASCII ))
916+ # There is no word boundary between a non-word characters.
917+ self .assertIsNone (re .match (r".\b" ,"=-" ))
918+ self .assertIsNone (re .match (r".\b" ,"=-" ,re .ASCII ))
919+ self .assertIsNone (re .match (br".\b" ,b"=-" ))
920+ self .assertIsNone (re .match (br".\b" ,b"=-" ,re .LOCALE ))
921+ # There is no non-boundary match between a word and a non-word.
922+ self .assertIsNone (re .match (r".\B" ,"a=" ))
923+ self .assertIsNone (re .match (r".\B" ,"a=" ,re .ASCII ))
924+ self .assertIsNone (re .match (br".\B" ,b"a=" ))
925+ self .assertIsNone (re .match (br".\B" ,b"a=" ,re .LOCALE ))
926+ self .assertIsNone (re .match (r".\B" ,"я=" ))
927+ self .assertTrue (re .match (r".\B" ,"я=" ,re .ASCII ))
928+ # There is no non-boundary match between a non-word and a word.
929+ self .assertIsNone (re .match (r".\B" ,"=a" ))
930+ self .assertIsNone (re .match (r".\B" ,"=a" ,re .ASCII ))
931+ self .assertIsNone (re .match (br".\B" ,b"=a" ))
932+ self .assertIsNone (re .match (br".\B" ,b"=a" ,re .LOCALE ))
933+ self .assertIsNone (re .match (r".\B" ,"=я" ))
934+ self .assertTrue (re .match (r".\B" ,"=я" ,re .ASCII ))
935+ # There's a non-boundary match inside a word.
936+ self .assertTrue (re .match (r".\B" ,"ab" ))
937+ self .assertTrue (re .match (r".\B" ,"ab" ,re .ASCII ))
938+ self .assertTrue (re .match (br".\B" ,b"ab" ))
939+ self .assertTrue (re .match (br".\B" ,b"ab" ,re .LOCALE ))
940+ self .assertTrue (re .match (r".\B" ,"юя" ))
941+ self .assertTrue (re .match (r".\B" ,"юя" ,re .ASCII ))
942+ # There's a non-boundary match between a non-word characters.
943+ self .assertTrue (re .match (r".\B" ,"=-" ))
944+ self .assertTrue (re .match (r".\B" ,"=-" ,re .ASCII ))
945+ self .assertTrue (re .match (br".\B" ,b"=-" ))
946+ self .assertTrue (re .match (br".\B" ,b"=-" ,re .LOCALE ))
891947# There's a word boundary at the start of a string.
892948self .assertTrue (re .match (r"\b" ,"abc" ))
949+ self .assertTrue (re .match (r"\b" ,"abc" ,re .ASCII ))
950+ self .assertTrue (re .match (br"\b" ,b"abc" ))
951+ self .assertTrue (re .match (br"\b" ,b"abc" ,re .LOCALE ))
952+ self .assertTrue (re .match (r"\b" ,"ьюя" ))
953+ self .assertIsNone (re .match (r"\b" ,"ьюя" ,re .ASCII ))
954+ # There's a word boundary at the end of a string.
955+ self .assertTrue (re .fullmatch (r".+\b" ,"abc" ))
956+ self .assertTrue (re .fullmatch (r".+\b" ,"abc" ,re .ASCII ))
957+ self .assertTrue (re .fullmatch (br".+\b" ,b"abc" ))
958+ self .assertTrue (re .fullmatch (br".+\b" ,b"abc" ,re .LOCALE ))
959+ self .assertTrue (re .fullmatch (r".+\b" ,"ьюя" ))
960+ self .assertIsNone (re .search (r"\b" ,"ьюя" ,re .ASCII ))
893961# A non-empty string includes a non-boundary zero-length match.
894- self .assertTrue (re .search (r"\B" ,"abc" ))
962+ self .assertEqual (re .search (r"\B" ,"abc" ).span (), (1 ,1 ))
963+ self .assertEqual (re .search (r"\B" ,"abc" ,re .ASCII ).span (), (1 ,1 ))
964+ self .assertEqual (re .search (br"\B" ,b"abc" ).span (), (1 ,1 ))
965+ self .assertEqual (re .search (br"\B" ,b"abc" ,re .LOCALE ).span (), (1 ,1 ))
966+ self .assertEqual (re .search (r"\B" ,"ьюя" ).span (), (1 ,1 ))
967+ self .assertEqual (re .search (r"\B" ,"ьюя" ,re .ASCII ).span (), (0 ,0 ))
895968# There is no non-boundary match at the start of a string.
896- self .assertFalse (re .match (r"\B" ,"abc" ))
969+ self .assertIsNone (re .match (r"\B" ,"abc" ))
970+ self .assertIsNone (re .match (r"\B" ,"abc" ,re .ASCII ))
971+ self .assertIsNone (re .match (br"\B" ,b"abc" ))
972+ self .assertIsNone (re .match (br"\B" ,b"abc" ,re .LOCALE ))
973+ self .assertIsNone (re .match (r"\B" ,"ьюя" ))
974+ self .assertTrue (re .match (r"\B" ,"ьюя" ,re .ASCII ))
975+ # There is no non-boundary match at the end of a string.
976+ self .assertIsNone (re .fullmatch (r".+\B" ,"abc" ))
977+ self .assertIsNone (re .fullmatch (r".+\B" ,"abc" ,re .ASCII ))
978+ self .assertIsNone (re .fullmatch (br".+\B" ,b"abc" ))
979+ self .assertIsNone (re .fullmatch (br".+\B" ,b"abc" ,re .LOCALE ))
980+ self .assertIsNone (re .fullmatch (r".+\B" ,"ьюя" ))
981+ self .assertTrue (re .fullmatch (r".+\B" ,"ьюя" ,re .ASCII ))
897982# However, an empty string contains no word boundaries, and also no
898983# non-boundaries.
899- self .assertIsNone (re .search (r"\B" ,"" ))
984+ self .assertIsNone (re .search (r"\b" ,"" ))
985+ self .assertIsNone (re .search (r"\b" ,"" ,re .ASCII ))
986+ self .assertIsNone (re .search (br"\b" ,b"" ))
987+ self .assertIsNone (re .search (br"\b" ,b"" ,re .LOCALE ))
900988# This one is questionable and different from the perlre behaviour,
901989# but describes current behavior.
902- self .assertIsNone (re .search (r"\b" ,"" ))
990+ self .assertIsNone (re .search (r"\B" ,"" ))
991+ self .assertIsNone (re .search (r"\B" ,"" ,re .ASCII ))
992+ self .assertIsNone (re .search (br"\B" ,b"" ))
993+ self .assertIsNone (re .search (br"\B" ,b"" ,re .LOCALE ))
903994# A single word-character string has two boundaries, but no
904995# non-boundary gaps.
905996self .assertEqual (len (re .findall (r"\b" ,"a" )),2 )
997+ self .assertEqual (len (re .findall (r"\b" ,"a" ,re .ASCII )),2 )
998+ self .assertEqual (len (re .findall (br"\b" ,b"a" )),2 )
999+ self .assertEqual (len (re .findall (br"\b" ,b"a" ,re .LOCALE )),2 )
9061000self .assertEqual (len (re .findall (r"\B" ,"a" )),0 )
1001+ self .assertEqual (len (re .findall (r"\B" ,"a" ,re .ASCII )),0 )
1002+ self .assertEqual (len (re .findall (br"\B" ,b"a" )),0 )
1003+ self .assertEqual (len (re .findall (br"\B" ,b"a" ,re .LOCALE )),0 )
9071004# If there are no words, there are no boundaries
9081005self .assertEqual (len (re .findall (r"\b" ," " )),0 )
1006+ self .assertEqual (len (re .findall (r"\b" ," " ,re .ASCII )),0 )
1007+ self .assertEqual (len (re .findall (br"\b" ,b" " )),0 )
1008+ self .assertEqual (len (re .findall (br"\b" ,b" " ,re .LOCALE )),0 )
9091009self .assertEqual (len (re .findall (r"\b" ," " )),0 )
1010+ self .assertEqual (len (re .findall (r"\b" ," " ,re .ASCII )),0 )
1011+ self .assertEqual (len (re .findall (br"\b" ,b" " )),0 )
1012+ self .assertEqual (len (re .findall (br"\b" ,b" " ,re .LOCALE )),0 )
9101013# Can match around the whitespace.
9111014self .assertEqual (len (re .findall (r"\B" ," " )),2 )
1015+ self .assertEqual (len (re .findall (r"\B" ," " ,re .ASCII )),2 )
1016+ self .assertEqual (len (re .findall (br"\B" ,b" " )),2 )
1017+ self .assertEqual (len (re .findall (br"\B" ,b" " ,re .LOCALE )),2 )
9121018
9131019def test_bigcharset (self ):
9141020self .assertEqual (re .match ("([\u2222 \u2223 ])" ,