1
1
from test import support
2
2
from test .support import os_helper
3
- from tokenize import (tokenize ,_tokenize , untokenize ,NUMBER ,NAME ,OP ,
3
+ from tokenize import (tokenize ,untokenize ,NUMBER ,NAME ,OP ,
4
4
STRING ,ENDMARKER ,ENCODING ,tok_name ,detect_encoding ,
5
5
open as tokenize_open ,Untokenizer ,generate_tokens ,
6
6
NEWLINE ,_generate_tokens_from_c_tokenizer ,DEDENT ,TokenInfo )
@@ -51,6 +51,25 @@ def check_tokenize(self, s, expected):
51
51
[" ENCODING 'utf-8' (0, 0) (0, 0)" ]+
52
52
expected .rstrip ().splitlines ())
53
53
54
+ def test_invalid_readline (self ):
55
+ def gen ():
56
+ yield "sdfosdg"
57
+ yield "sdfosdg"
58
+ with self .assertRaises (TypeError ):
59
+ list (tokenize (gen ().__next__ ))
60
+
61
+ def gen ():
62
+ yield b"sdfosdg"
63
+ yield b"sdfosdg"
64
+ with self .assertRaises (TypeError ):
65
+ list (generate_tokens (gen ().__next__ ))
66
+
67
+ def gen ():
68
+ yield "sdfosdg"
69
+ 1 / 0
70
+ with self .assertRaises (ZeroDivisionError ):
71
+ list (generate_tokens (gen ().__next__ ))
72
+
54
73
def test_implicit_newline (self ):
55
74
# Make sure that the tokenizer puts in an implicit NEWLINE
56
75
# when the input lacks a trailing new line.
@@ -1161,7 +1180,8 @@ class TestTokenizerAdheresToPep0263(TestCase):
1161
1180
1162
1181
def _testFile (self ,filename ):
1163
1182
path = os .path .join (os .path .dirname (__file__ ),filename )
1164
- TestRoundtrip .check_roundtrip (self ,open (path ,'rb' ))
1183
+ with open (path ,'rb' )as f :
1184
+ TestRoundtrip .check_roundtrip (self ,f )
1165
1185
1166
1186
def test_utf8_coding_cookie_and_no_utf8_bom (self ):
1167
1187
f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
@@ -1206,7 +1226,8 @@ def readline():
1206
1226
yield b''
1207
1227
1208
1228
# skip the initial encoding token and the end tokens
1209
- tokens = list (_tokenize (readline (),encoding = 'utf-8' ))[:- 2 ]
1229
+ tokens = list (_generate_tokens_from_c_tokenizer (readline ().__next__ ,encoding = 'utf-8' ,
1230
+ extra_tokens = True ))[:- 2 ]
1210
1231
expected_tokens = [TokenInfo (3 ,'"ЉЊЈЁЂ"' , (1 ,0 ), (1 ,7 ),'"ЉЊЈЁЂ"\n ' )]
1211
1232
self .assertEqual (tokens ,expected_tokens ,
1212
1233
"bytes not decoded with encoding" )
@@ -1475,13 +1496,13 @@ def test_tokenize(self):
1475
1496
def mock_detect_encoding (readline ):
1476
1497
return encoding , [b'first' ,b'second' ]
1477
1498
1478
- def mock__tokenize (readline ,encoding ):
1499
+ def mock__tokenize (readline ,encoding , ** kwargs ):
1479
1500
nonlocal encoding_used
1480
1501
encoding_used = encoding
1481
1502
out = []
1482
1503
while True :
1483
1504
try :
1484
- next_line = next ( readline )
1505
+ next_line = readline ( )
1485
1506
except StopIteration :
1486
1507
return out
1487
1508
if next_line :
@@ -1498,16 +1519,16 @@ def mock_readline():
1498
1519
return str (counter ).encode ()
1499
1520
1500
1521
orig_detect_encoding = tokenize_module .detect_encoding
1501
- orig__tokenize = tokenize_module ._tokenize
1522
+ orig_c_token = tokenize_module ._generate_tokens_from_c_tokenizer
1502
1523
tokenize_module .detect_encoding = mock_detect_encoding
1503
- tokenize_module ._tokenize = mock__tokenize
1524
+ tokenize_module ._generate_tokens_from_c_tokenizer = mock__tokenize
1504
1525
try :
1505
1526
results = tokenize (mock_readline )
1506
1527
self .assertEqual (list (results )[1 :],
1507
1528
[b'first' ,b'second' ,b'1' ,b'2' ,b'3' ,b'4' ])
1508
1529
finally :
1509
1530
tokenize_module .detect_encoding = orig_detect_encoding
1510
- tokenize_module ._tokenize = orig__tokenize
1531
+ tokenize_module ._generate_tokens_from_c_tokenizer = orig_c_token
1511
1532
1512
1533
self .assertEqual (encoding_used ,encoding )
1513
1534
@@ -1834,12 +1855,33 @@ class CTokenizeTest(TestCase):
1834
1855
def check_tokenize (self ,s ,expected ):
1835
1856
# Format the tokens in s in a table format.
1836
1857
# The ENDMARKER and final NEWLINE are omitted.
1858
+ f = StringIO (s )
1837
1859
with self .subTest (source = s ):
1838
1860
result = stringify_tokens_from_source (
1839
- _generate_tokens_from_c_tokenizer (s ),s
1861
+ _generate_tokens_from_c_tokenizer (f . readline ),s
1840
1862
)
1841
1863
self .assertEqual (result ,expected .rstrip ().splitlines ())
1842
1864
1865
+ def test_encoding (self ):
1866
+ def readline (encoding ):
1867
+ yield "1+1" .encode (encoding )
1868
+
1869
+ expected = [
1870
+ TokenInfo (type = NUMBER ,string = '1' ,start = (1 ,0 ),end = (1 ,1 ),line = '1+1\n ' ),
1871
+ TokenInfo (type = OP ,string = '+' ,start = (1 ,1 ),end = (1 ,2 ),line = '1+1\n ' ),
1872
+ TokenInfo (type = NUMBER ,string = '1' ,start = (1 ,2 ),end = (1 ,3 ),line = '1+1\n ' ),
1873
+ TokenInfo (type = NEWLINE ,string = '\n ' ,start = (1 ,3 ),end = (1 ,4 ),line = '1+1\n ' ),
1874
+ TokenInfo (type = ENDMARKER ,string = '' ,start = (2 ,0 ),end = (2 ,0 ),line = '' )
1875
+ ]
1876
+ for encoding in ["utf-8" ,"latin-1" ,"utf-16" ]:
1877
+ with self .subTest (encoding = encoding ):
1878
+ tokens = list (_generate_tokens_from_c_tokenizer (
1879
+ readline (encoding ).__next__ ,
1880
+ extra_tokens = True ,
1881
+ encoding = encoding ,
1882
+ ))
1883
+ self .assertEqual (tokens ,expected )
1884
+
1843
1885
def test_int (self ):
1844
1886
1845
1887
self .check_tokenize ('0xff <= 255' ,"""\
@@ -2675,43 +2717,44 @@ def test_unicode(self):
2675
2717
2676
2718
def test_invalid_syntax (self ):
2677
2719
def get_tokens (string ):
2678
- return list (_generate_tokens_from_c_tokenizer (string ))
2679
-
2680
- self .assertRaises (SyntaxError ,get_tokens ,"(1+2]" )
2681
- self .assertRaises (SyntaxError ,get_tokens ,"(1+2}" )
2682
- self .assertRaises (SyntaxError ,get_tokens ,"{1+2]" )
2683
-
2684
- self .assertRaises (SyntaxError ,get_tokens ,"1_" )
2685
- self .assertRaises (SyntaxError ,get_tokens ,"1.2_" )
2686
- self .assertRaises (SyntaxError ,get_tokens ,"1e2_" )
2687
- self .assertRaises (SyntaxError ,get_tokens ,"1e+" )
2688
-
2689
- self .assertRaises (SyntaxError ,get_tokens ,"\xa0 " )
2690
- self .assertRaises (SyntaxError ,get_tokens ,"€" )
2691
-
2692
- self .assertRaises (SyntaxError ,get_tokens ,"0b12" )
2693
- self .assertRaises (SyntaxError ,get_tokens ,"0b1_2" )
2694
- self .assertRaises (SyntaxError ,get_tokens ,"0b2" )
2695
- self .assertRaises (SyntaxError ,get_tokens ,"0b1_" )
2696
- self .assertRaises (SyntaxError ,get_tokens ,"0b" )
2697
- self .assertRaises (SyntaxError ,get_tokens ,"0o18" )
2698
- self .assertRaises (SyntaxError ,get_tokens ,"0o1_8" )
2699
- self .assertRaises (SyntaxError ,get_tokens ,"0o8" )
2700
- self .assertRaises (SyntaxError ,get_tokens ,"0o1_" )
2701
- self .assertRaises (SyntaxError ,get_tokens ,"0o" )
2702
- self .assertRaises (SyntaxError ,get_tokens ,"0x1_" )
2703
- self .assertRaises (SyntaxError ,get_tokens ,"0x" )
2704
- self .assertRaises (SyntaxError ,get_tokens ,"1_" )
2705
- self .assertRaises (SyntaxError ,get_tokens ,"012" )
2706
- self .assertRaises (SyntaxError ,get_tokens ,"1.2_" )
2707
- self .assertRaises (SyntaxError ,get_tokens ,"1e2_" )
2708
- self .assertRaises (SyntaxError ,get_tokens ,"1e+" )
2709
-
2710
- self .assertRaises (SyntaxError ,get_tokens ,"'sdfsdf" )
2711
- self .assertRaises (SyntaxError ,get_tokens ,"'''sdfsdf''" )
2712
-
2713
- self .assertRaises (SyntaxError ,get_tokens ,"(" * 1000 + "a" + ")" * 1000 )
2714
- self .assertRaises (SyntaxError ,get_tokens ,"]" )
2720
+ the_string = StringIO (string )
2721
+ return list (_generate_tokens_from_c_tokenizer (the_string .readline ))
2722
+
2723
+ for case in [
2724
+ "(1+2]" ,
2725
+ "(1+2}" ,
2726
+ "{1+2]" ,
2727
+ "1_" ,
2728
+ "1.2_" ,
2729
+ "1e2_" ,
2730
+ "1e+" ,
2731
+
2732
+ "\xa0 " ,
2733
+ "€" ,
2734
+ "0b12" ,
2735
+ "0b1_2" ,
2736
+ "0b2" ,
2737
+ "0b1_" ,
2738
+ "0b" ,
2739
+ "0o18" ,
2740
+ "0o1_8" ,
2741
+ "0o8" ,
2742
+ "0o1_" ,
2743
+ "0o" ,
2744
+ "0x1_" ,
2745
+ "0x" ,
2746
+ "1_" ,
2747
+ "012" ,
2748
+ "1.2_" ,
2749
+ "1e2_" ,
2750
+ "1e+" ,
2751
+ "'sdfsdf" ,
2752
+ "'''sdfsdf''" ,
2753
+ "(" * 1000 + "a" + ")" * 1000 ,
2754
+ "]" ,
2755
+ ]:
2756
+ with self .subTest (case = case ):
2757
+ self .assertRaises (SyntaxError ,get_tokens ,case )
2715
2758
2716
2759
def test_max_indent (self ):
2717
2760
MAXINDENT = 100
@@ -2722,20 +2765,24 @@ def generate_source(indents):
2722
2765
return source
2723
2766
2724
2767
valid = generate_source (MAXINDENT - 1 )
2725
- tokens = list (_generate_tokens_from_c_tokenizer (valid ))
2768
+ the_input = StringIO (valid )
2769
+ tokens = list (_generate_tokens_from_c_tokenizer (the_input .readline ))
2726
2770
self .assertEqual (tokens [- 2 ].type ,DEDENT )
2727
2771
self .assertEqual (tokens [- 1 ].type ,ENDMARKER )
2728
2772
compile (valid ,"<string>" ,"exec" )
2729
2773
2730
2774
invalid = generate_source (MAXINDENT )
2731
- self .assertRaises (SyntaxError ,lambda :list (_generate_tokens_from_c_tokenizer (invalid )))
2775
+ the_input = StringIO (invalid )
2776
+ self .assertRaises (SyntaxError ,lambda :list (_generate_tokens_from_c_tokenizer (the_input .readline )))
2732
2777
self .assertRaises (
2733
2778
IndentationError ,compile ,invalid ,"<string>" ,"exec"
2734
2779
)
2735
2780
2736
2781
def test_continuation_lines_indentation (self ):
2737
2782
def get_tokens (string ):
2738
- return [(kind ,string )for (kind ,string ,* _ )in _generate_tokens_from_c_tokenizer (string )]
2783
+ the_string = StringIO (string )
2784
+ return [(kind ,string )for (kind ,string ,* _ )
2785
+ in _generate_tokens_from_c_tokenizer (the_string .readline )]
2739
2786
2740
2787
code = dedent ("""
2741
2788
def fib(n):