Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitb047fa5

Browse files
authored
pythongh-105549: Tokenize separately NUMBER and NAME tokens and allow 0-prefixed literals (python#105555)
1 parent00b599a commitb047fa5

File tree

3 files changed

+45
-3
lines changed

3 files changed

+45
-3
lines changed

‎Lib/test/test_tokenize.py‎

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,12 @@ def number_token(s):
284284
# this won't work with compound complex inputs
285285
continue
286286
self.assertEqual(number_token(lit),lit)
287+
# Valid cases with extra underscores in the tokenize module
288+
# See gh-105549 for context
289+
extra_valid_cases= {"0_7","09_99"}
287290
forlitinINVALID_UNDERSCORE_LITERALS:
291+
iflitinextra_valid_cases:
292+
continue
288293
try:
289294
number_token(lit)
290295
exceptTokenError:
@@ -1873,6 +1878,34 @@ def test_indentation_semantics_retained(self):
18731878
self.check_roundtrip(code)
18741879

18751880

1881+
classInvalidPythonTests(TestCase):
1882+
deftest_number_followed_by_name(self):
1883+
# See issue #gh-105549
1884+
source="2sin(x)"
1885+
expected_tokens= [
1886+
TokenInfo(type=token.NUMBER,string='2',start=(1,0),end=(1,1),line='2sin(x)'),
1887+
TokenInfo(type=token.NAME,string='sin',start=(1,1),end=(1,4),line='2sin(x)'),
1888+
TokenInfo(type=token.OP,string='(',start=(1,4),end=(1,5),line='2sin(x)'),
1889+
TokenInfo(type=token.NAME,string='x',start=(1,5),end=(1,6),line='2sin(x)'),
1890+
TokenInfo(type=token.OP,string=')',start=(1,6),end=(1,7),line='2sin(x)'),
1891+
TokenInfo(type=token.NEWLINE,string='',start=(1,7),end=(1,8),line='2sin(x)'),
1892+
TokenInfo(type=token.ENDMARKER,string='',start=(2,0),end=(2,0),line='')
1893+
]
1894+
1895+
tokens=list(generate_tokens(StringIO(source).readline))
1896+
self.assertEqual(tokens,expected_tokens)
1897+
1898+
deftest_number_starting_with_zero(self):
1899+
source="01234"
1900+
expected_tokens= [
1901+
TokenInfo(type=token.NUMBER,string='01234',start=(1,0),end=(1,5),line='01234'),
1902+
TokenInfo(type=token.NEWLINE,string='',start=(1,5),end=(1,6),line='01234'),
1903+
TokenInfo(type=token.ENDMARKER,string='',start=(2,0),end=(2,0),line='')
1904+
]
1905+
1906+
tokens=list(generate_tokens(StringIO(source).readline))
1907+
self.assertEqual(tokens,expected_tokens)
1908+
18761909
classCTokenizeTest(TestCase):
18771910
defcheck_tokenize(self,s,expected):
18781911
# Format the tokens in s in a table format.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Tokenize separately `NUMBER` and `NAME` tokens that are not ambiguous. Patch
2+
by Pablo Galindo

‎Parser/tokenizer.c‎

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,8 +1600,12 @@ lookahead(struct tok_state *tok, const char *test)
16001600
}
16011601

16021602
staticint
1603-
verify_end_of_number(structtok_state*tok,intc,constchar*kind)
1604-
{
1603+
verify_end_of_number(structtok_state*tok,intc,constchar*kind) {
1604+
if (tok->tok_extra_tokens) {
1605+
// When we are parsing extra tokens, we don't want to emit warnings
1606+
// about invalid literals, because we want to be a bit more liberal.
1607+
return1;
1608+
}
16051609
/* Emit a deprecation warning only if the numeric literal is immediately
16061610
* followed by one of keywords which can occur after a numeric literal
16071611
* in valid code: "and", "else", "for", "if", "in", "is" and "or".
@@ -1659,6 +1663,9 @@ verify_end_of_number(struct tok_state *tok, int c, const char *kind)
16591663
staticint
16601664
verify_identifier(structtok_state*tok)
16611665
{
1666+
if (tok->tok_extra_tokens) {
1667+
return1;
1668+
}
16621669
PyObject*s;
16631670
if (tok->decoding_erred)
16641671
return0;
@@ -2318,7 +2325,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
23182325
elseif (c=='j'||c=='J') {
23192326
gotoimaginary;
23202327
}
2321-
elseif (nonzero) {
2328+
elseif (nonzero&& !tok->tok_extra_tokens) {
23222329
/* Old-style octal: now disallowed. */
23232330
tok_backup(tok,c);
23242331
returnMAKE_TOKEN(syntaxerror_known_range(

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp