Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitc687946

Browse files
[3.12]gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (GH-105070) (#105119)
gh-105069: Add a readline-like callable to the tokenizer to consume input iteratively (GH-105070)(cherry picked from commit9216e69)Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent2f8c22f commitc687946

File tree

7 files changed

+274
-96
lines changed

7 files changed

+274
-96
lines changed

‎Lib/inspect.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2203,7 +2203,7 @@ def _signature_strip_non_python_syntax(signature):
22032203
add(string)
22042204
if (string==','):
22052205
add(' ')
2206-
clean_signature=''.join(text).strip()
2206+
clean_signature=''.join(text).strip().replace("\n","")
22072207
returnclean_signature,self_parameter
22082208

22092209

‎Lib/test/test_tokenize.py

Lines changed: 96 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
fromtestimportsupport
22
fromtest.supportimportos_helper
3-
fromtokenizeimport (tokenize,_tokenize,untokenize,NUMBER,NAME,OP,
3+
fromtokenizeimport (tokenize,untokenize,NUMBER,NAME,OP,
44
STRING,ENDMARKER,ENCODING,tok_name,detect_encoding,
55
openastokenize_open,Untokenizer,generate_tokens,
66
NEWLINE,_generate_tokens_from_c_tokenizer,DEDENT,TokenInfo)
@@ -51,6 +51,25 @@ def check_tokenize(self, s, expected):
5151
[" ENCODING 'utf-8' (0, 0) (0, 0)"]+
5252
expected.rstrip().splitlines())
5353

54+
deftest_invalid_readline(self):
55+
defgen():
56+
yield"sdfosdg"
57+
yield"sdfosdg"
58+
withself.assertRaises(TypeError):
59+
list(tokenize(gen().__next__))
60+
61+
defgen():
62+
yieldb"sdfosdg"
63+
yieldb"sdfosdg"
64+
withself.assertRaises(TypeError):
65+
list(generate_tokens(gen().__next__))
66+
67+
defgen():
68+
yield"sdfosdg"
69+
1/0
70+
withself.assertRaises(ZeroDivisionError):
71+
list(generate_tokens(gen().__next__))
72+
5473
deftest_implicit_newline(self):
5574
# Make sure that the tokenizer puts in an implicit NEWLINE
5675
# when the input lacks a trailing new line.
@@ -1161,7 +1180,8 @@ class TestTokenizerAdheresToPep0263(TestCase):
11611180

11621181
def_testFile(self,filename):
11631182
path=os.path.join(os.path.dirname(__file__),filename)
1164-
TestRoundtrip.check_roundtrip(self,open(path,'rb'))
1183+
withopen(path,'rb')asf:
1184+
TestRoundtrip.check_roundtrip(self,f)
11651185

11661186
deftest_utf8_coding_cookie_and_no_utf8_bom(self):
11671187
f='tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
@@ -1206,7 +1226,8 @@ def readline():
12061226
yieldb''
12071227

12081228
# skip the initial encoding token and the end tokens
1209-
tokens=list(_tokenize(readline(),encoding='utf-8'))[:-2]
1229+
tokens=list(_generate_tokens_from_c_tokenizer(readline().__next__,encoding='utf-8',
1230+
extra_tokens=True))[:-2]
12101231
expected_tokens= [TokenInfo(3,'"ЉЊЈЁЂ"', (1,0), (1,7),'"ЉЊЈЁЂ"\n')]
12111232
self.assertEqual(tokens,expected_tokens,
12121233
"bytes not decoded with encoding")
@@ -1475,13 +1496,13 @@ def test_tokenize(self):
14751496
defmock_detect_encoding(readline):
14761497
returnencoding, [b'first',b'second']
14771498

1478-
defmock__tokenize(readline,encoding):
1499+
defmock__tokenize(readline,encoding,**kwargs):
14791500
nonlocalencoding_used
14801501
encoding_used=encoding
14811502
out= []
14821503
whileTrue:
14831504
try:
1484-
next_line=next(readline)
1505+
next_line=readline()
14851506
exceptStopIteration:
14861507
returnout
14871508
ifnext_line:
@@ -1498,16 +1519,16 @@ def mock_readline():
14981519
returnstr(counter).encode()
14991520

15001521
orig_detect_encoding=tokenize_module.detect_encoding
1501-
orig__tokenize=tokenize_module._tokenize
1522+
orig_c_token=tokenize_module._generate_tokens_from_c_tokenizer
15021523
tokenize_module.detect_encoding=mock_detect_encoding
1503-
tokenize_module._tokenize=mock__tokenize
1524+
tokenize_module._generate_tokens_from_c_tokenizer=mock__tokenize
15041525
try:
15051526
results=tokenize(mock_readline)
15061527
self.assertEqual(list(results)[1:],
15071528
[b'first',b'second',b'1',b'2',b'3',b'4'])
15081529
finally:
15091530
tokenize_module.detect_encoding=orig_detect_encoding
1510-
tokenize_module._tokenize=orig__tokenize
1531+
tokenize_module._generate_tokens_from_c_tokenizer=orig_c_token
15111532

15121533
self.assertEqual(encoding_used,encoding)
15131534

@@ -1834,12 +1855,33 @@ class CTokenizeTest(TestCase):
18341855
defcheck_tokenize(self,s,expected):
18351856
# Format the tokens in s in a table format.
18361857
# The ENDMARKER and final NEWLINE are omitted.
1858+
f=StringIO(s)
18371859
withself.subTest(source=s):
18381860
result=stringify_tokens_from_source(
1839-
_generate_tokens_from_c_tokenizer(s),s
1861+
_generate_tokens_from_c_tokenizer(f.readline),s
18401862
)
18411863
self.assertEqual(result,expected.rstrip().splitlines())
18421864

1865+
deftest_encoding(self):
1866+
defreadline(encoding):
1867+
yield"1+1".encode(encoding)
1868+
1869+
expected= [
1870+
TokenInfo(type=NUMBER,string='1',start=(1,0),end=(1,1),line='1+1\n'),
1871+
TokenInfo(type=OP,string='+',start=(1,1),end=(1,2),line='1+1\n'),
1872+
TokenInfo(type=NUMBER,string='1',start=(1,2),end=(1,3),line='1+1\n'),
1873+
TokenInfo(type=NEWLINE,string='\n',start=(1,3),end=(1,4),line='1+1\n'),
1874+
TokenInfo(type=ENDMARKER,string='',start=(2,0),end=(2,0),line='')
1875+
]
1876+
forencodingin ["utf-8","latin-1","utf-16"]:
1877+
withself.subTest(encoding=encoding):
1878+
tokens=list(_generate_tokens_from_c_tokenizer(
1879+
readline(encoding).__next__,
1880+
extra_tokens=True,
1881+
encoding=encoding,
1882+
))
1883+
self.assertEqual(tokens,expected)
1884+
18431885
deftest_int(self):
18441886

18451887
self.check_tokenize('0xff <= 255',"""\
@@ -2675,43 +2717,44 @@ def test_unicode(self):
26752717

26762718
deftest_invalid_syntax(self):
26772719
defget_tokens(string):
2678-
returnlist(_generate_tokens_from_c_tokenizer(string))
2679-
2680-
self.assertRaises(SyntaxError,get_tokens,"(1+2]")
2681-
self.assertRaises(SyntaxError,get_tokens,"(1+2}")
2682-
self.assertRaises(SyntaxError,get_tokens,"{1+2]")
2683-
2684-
self.assertRaises(SyntaxError,get_tokens,"1_")
2685-
self.assertRaises(SyntaxError,get_tokens,"1.2_")
2686-
self.assertRaises(SyntaxError,get_tokens,"1e2_")
2687-
self.assertRaises(SyntaxError,get_tokens,"1e+")
2688-
2689-
self.assertRaises(SyntaxError,get_tokens,"\xa0")
2690-
self.assertRaises(SyntaxError,get_tokens,"€")
2691-
2692-
self.assertRaises(SyntaxError,get_tokens,"0b12")
2693-
self.assertRaises(SyntaxError,get_tokens,"0b1_2")
2694-
self.assertRaises(SyntaxError,get_tokens,"0b2")
2695-
self.assertRaises(SyntaxError,get_tokens,"0b1_")
2696-
self.assertRaises(SyntaxError,get_tokens,"0b")
2697-
self.assertRaises(SyntaxError,get_tokens,"0o18")
2698-
self.assertRaises(SyntaxError,get_tokens,"0o1_8")
2699-
self.assertRaises(SyntaxError,get_tokens,"0o8")
2700-
self.assertRaises(SyntaxError,get_tokens,"0o1_")
2701-
self.assertRaises(SyntaxError,get_tokens,"0o")
2702-
self.assertRaises(SyntaxError,get_tokens,"0x1_")
2703-
self.assertRaises(SyntaxError,get_tokens,"0x")
2704-
self.assertRaises(SyntaxError,get_tokens,"1_")
2705-
self.assertRaises(SyntaxError,get_tokens,"012")
2706-
self.assertRaises(SyntaxError,get_tokens,"1.2_")
2707-
self.assertRaises(SyntaxError,get_tokens,"1e2_")
2708-
self.assertRaises(SyntaxError,get_tokens,"1e+")
2709-
2710-
self.assertRaises(SyntaxError,get_tokens,"'sdfsdf")
2711-
self.assertRaises(SyntaxError,get_tokens,"'''sdfsdf''")
2712-
2713-
self.assertRaises(SyntaxError,get_tokens,"("*1000+"a"+")"*1000)
2714-
self.assertRaises(SyntaxError,get_tokens,"]")
2720+
the_string=StringIO(string)
2721+
returnlist(_generate_tokens_from_c_tokenizer(the_string.readline))
2722+
2723+
forcasein [
2724+
"(1+2]",
2725+
"(1+2}",
2726+
"{1+2]",
2727+
"1_",
2728+
"1.2_",
2729+
"1e2_",
2730+
"1e+",
2731+
2732+
"\xa0",
2733+
"€",
2734+
"0b12",
2735+
"0b1_2",
2736+
"0b2",
2737+
"0b1_",
2738+
"0b",
2739+
"0o18",
2740+
"0o1_8",
2741+
"0o8",
2742+
"0o1_",
2743+
"0o",
2744+
"0x1_",
2745+
"0x",
2746+
"1_",
2747+
"012",
2748+
"1.2_",
2749+
"1e2_",
2750+
"1e+",
2751+
"'sdfsdf",
2752+
"'''sdfsdf''",
2753+
"("*1000+"a"+")"*1000,
2754+
"]",
2755+
]:
2756+
withself.subTest(case=case):
2757+
self.assertRaises(SyntaxError,get_tokens,case)
27152758

27162759
deftest_max_indent(self):
27172760
MAXINDENT=100
@@ -2722,20 +2765,24 @@ def generate_source(indents):
27222765
returnsource
27232766

27242767
valid=generate_source(MAXINDENT-1)
2725-
tokens=list(_generate_tokens_from_c_tokenizer(valid))
2768+
the_input=StringIO(valid)
2769+
tokens=list(_generate_tokens_from_c_tokenizer(the_input.readline))
27262770
self.assertEqual(tokens[-2].type,DEDENT)
27272771
self.assertEqual(tokens[-1].type,ENDMARKER)
27282772
compile(valid,"<string>","exec")
27292773

27302774
invalid=generate_source(MAXINDENT)
2731-
self.assertRaises(SyntaxError,lambda:list(_generate_tokens_from_c_tokenizer(invalid)))
2775+
the_input=StringIO(invalid)
2776+
self.assertRaises(SyntaxError,lambda:list(_generate_tokens_from_c_tokenizer(the_input.readline)))
27322777
self.assertRaises(
27332778
IndentationError,compile,invalid,"<string>","exec"
27342779
)
27352780

27362781
deftest_continuation_lines_indentation(self):
27372782
defget_tokens(string):
2738-
return [(kind,string)for (kind,string,*_)in_generate_tokens_from_c_tokenizer(string)]
2783+
the_string=StringIO(string)
2784+
return [(kind,string)for (kind,string,*_)
2785+
in_generate_tokens_from_c_tokenizer(the_string.readline)]
27392786

27402787
code=dedent("""
27412788
def fib(n):

‎Lib/tokenize.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
importsys
3535
fromtokenimport*
3636
fromtokenimportEXACT_TOKEN_TYPES
37+
import_tokenize
3738

3839
cookie_re=re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)',re.ASCII)
3940
blank_re=re.compile(br'^[ \t\f]*(?:[#\r\n]|$)',re.ASCII)
@@ -443,29 +444,15 @@ def tokenize(readline):
443444
# BOM will already have been stripped.
444445
encoding="utf-8"
445446
yieldTokenInfo(ENCODING,encoding, (0,0), (0,0),'')
446-
yieldfrom_tokenize(rl_gen,encoding)
447-
448-
def_tokenize(rl_gen,encoding):
449-
source=b"".join(rl_gen).decode(encoding)
450-
fortokenin_generate_tokens_from_c_tokenizer(source,extra_tokens=True):
451-
yieldtoken
447+
yieldfrom_generate_tokens_from_c_tokenizer(rl_gen.__next__,encoding,extra_tokens=True)
452448

453449
defgenerate_tokens(readline):
454450
"""Tokenize a source reading Python code as unicode strings.
455451
456452
This has the same API as tokenize(), except that it expects the *readline*
457453
callable to return str objects instead of bytes.
458454
"""
459-
def_gen():
460-
whileTrue:
461-
try:
462-
line=readline()
463-
exceptStopIteration:
464-
return
465-
ifnotline:
466-
return
467-
yieldline.encode()
468-
return_tokenize(_gen(),'utf-8')
455+
return_generate_tokens_from_c_tokenizer(readline,extra_tokens=True)
469456

470457
defmain():
471458
importargparse
@@ -502,9 +489,9 @@ def error(message, filename=None, location=None):
502489
tokens=list(tokenize(f.readline))
503490
else:
504491
filename="<stdin>"
505-
tokens=_tokenize(
492+
tokens=_generate_tokens_from_c_tokenizer(
506493
(x.encode('utf-8')forxiniter(sys.stdin.readline,"")
507-
),"utf-8")
494+
),"utf-8",extra_tokens=True)
508495

509496

510497
# Output the tokenization
@@ -531,10 +518,13 @@ def error(message, filename=None, location=None):
531518
perror("unexpected error: %s"%err)
532519
raise
533520

534-
def_generate_tokens_from_c_tokenizer(source,extra_tokens=False):
521+
def_generate_tokens_from_c_tokenizer(source,encoding=None,extra_tokens=False):
535522
"""Tokenize a source reading Python code as unicode strings using the internal C tokenizer"""
536-
import_tokenizeasc_tokenizer
537-
forinfoinc_tokenizer.TokenizerIter(source,extra_tokens=extra_tokens):
523+
ifencodingisNone:
524+
it=_tokenize.TokenizerIter(source,extra_tokens=extra_tokens)
525+
else:
526+
it=_tokenize.TokenizerIter(source,encoding=encoding,extra_tokens=extra_tokens)
527+
forinfoinit:
538528
yieldTokenInfo._make(info)
539529

540530

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp