NotificationsYou must be signed in to change notification settings
Fork6
Star10

Commitb047fa5

authored

pythongh-105549: Tokenize separately NUMBER and NAME tokens and allow 0-prefixed literals (python#105555)

1 parent00b599a commitb047fa5Copy full SHA for b047fa5

File tree

3 files changed

+45

-3

lines changed

Lib/test
- test_tokenize.py
Misc/NEWS.d/next/Core and Builtins
- 2023-06-09-12-59-18.gh-issue-105549.PYfTNp.rst
Parser
- tokenizer.c

3 files changed

+45

-3

lines changed

`‎Lib/test/test_tokenize.py‎`

Lines changed: 33 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -284,7 +284,12 @@ def number_token(s):`
`284`	`284`	`# this won't work with compound complex inputs`
`285`	`285`	`continue`
`286`	`286`	`self.assertEqual(number_token(lit),lit)`
	`287`	`+# Valid cases with extra underscores in the tokenize module`
	`288`	`+# See gh-105549 for context`
	`289`	`+extra_valid_cases= {"0_7","09_99"}`
`287`	`290`	`forlitinINVALID_UNDERSCORE_LITERALS:`
	`291`	`+iflitinextra_valid_cases:`
	`292`	`+continue`
`288`	`293`	`try:`
`289`	`294`	`number_token(lit)`
`290`	`295`	`exceptTokenError:`
`@@ -1873,6 +1878,34 @@ def test_indentation_semantics_retained(self):`
`1873`	`1878`	`self.check_roundtrip(code)`
`1874`	`1879`
`1875`	`1880`
	`1881`	`+classInvalidPythonTests(TestCase):`
	`1882`	`+deftest_number_followed_by_name(self):`
	`1883`	`+# See issue #gh-105549`
	`1884`	`+source="2sin(x)"`
	`1885`	`+expected_tokens= [`
	`1886`	`+TokenInfo(type=token.NUMBER,string='2',start=(1,0),end=(1,1),line='2sin(x)'),`
	`1887`	`+TokenInfo(type=token.NAME,string='sin',start=(1,1),end=(1,4),line='2sin(x)'),`
	`1888`	`+TokenInfo(type=token.OP,string='(',start=(1,4),end=(1,5),line='2sin(x)'),`
	`1889`	`+TokenInfo(type=token.NAME,string='x',start=(1,5),end=(1,6),line='2sin(x)'),`
	`1890`	`+TokenInfo(type=token.OP,string=')',start=(1,6),end=(1,7),line='2sin(x)'),`
	`1891`	`+TokenInfo(type=token.NEWLINE,string='',start=(1,7),end=(1,8),line='2sin(x)'),`
	`1892`	`+TokenInfo(type=token.ENDMARKER,string='',start=(2,0),end=(2,0),line='')`
	`1893`	`+ ]`
	`1894`	`+`
	`1895`	`+tokens=list(generate_tokens(StringIO(source).readline))`
	`1896`	`+self.assertEqual(tokens,expected_tokens)`
	`1897`	`+`
	`1898`	`+deftest_number_starting_with_zero(self):`
	`1899`	`+source="01234"`
	`1900`	`+expected_tokens= [`
	`1901`	`+TokenInfo(type=token.NUMBER,string='01234',start=(1,0),end=(1,5),line='01234'),`
	`1902`	`+TokenInfo(type=token.NEWLINE,string='',start=(1,5),end=(1,6),line='01234'),`
	`1903`	`+TokenInfo(type=token.ENDMARKER,string='',start=(2,0),end=(2,0),line='')`
	`1904`	`+ ]`
	`1905`	`+`
	`1906`	`+tokens=list(generate_tokens(StringIO(source).readline))`
	`1907`	`+self.assertEqual(tokens,expected_tokens)`
	`1908`	`+`
`1876`	`1909`	`classCTokenizeTest(TestCase):`
`1877`	`1910`	`defcheck_tokenize(self,s,expected):`
`1878`	`1911`	`# Format the tokens in s in a table format.`

`‎Misc/NEWS.d/next/Core and Builtins/2023-06-09-12-59-18.gh-issue-105549.PYfTNp.rst‎`

Lines changed: 2 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	+Tokenize separately `NUMBER` and `NAME` tokens that are not ambiguous. Patch
	`2`	`+by Pablo Galindo`

`‎Parser/tokenizer.c‎`

Lines changed: 10 additions & 3 deletions

Original file line number	Diff line number	Diff line change
`@@ -1600,8 +1600,12 @@ lookahead(struct tok_state tok, const char test)`
`1600`	`1600`	`}`
`1601`	`1601`
`1602`	`1602`	`staticint`
`1603`		`-verify_end_of_number(structtok_statetok,intc,constcharkind)`
`1604`		`-{`
	`1603`	`+verify_end_of_number(structtok_statetok,intc,constcharkind) {`
	`1604`	`+if (tok->tok_extra_tokens) {`
	`1605`	`+// When we are parsing extra tokens, we don't want to emit warnings`
	`1606`	`+// about invalid literals, because we want to be a bit more liberal.`
	`1607`	`+return1;`
	`1608`	`+ }`
`1605`	`1609`	`/* Emit a deprecation warning only if the numeric literal is immediately`
`1606`	`1610`	`* followed by one of keywords which can occur after a numeric literal`
`1607`	`1611`	`* in valid code: "and", "else", "for", "if", "in", "is" and "or".`
`@@ -1659,6 +1663,9 @@ verify_end_of_number(struct tok_state tok, int c, const char kind)`
`1659`	`1663`	`staticint`
`1660`	`1664`	`verify_identifier(structtok_state*tok)`
`1661`	`1665`	`{`
	`1666`	`+if (tok->tok_extra_tokens) {`
	`1667`	`+return1;`
	`1668`	`+ }`
`1662`	`1669`	`PyObject*s;`
`1663`	`1670`	`if (tok->decoding_erred)`
`1664`	`1671`	`return0;`
`@@ -2318,7 +2325,7 @@ tok_get_normal_mode(struct tok_state tok, tokenizer_mode current_tok, struct t`
`2318`	`2325`	`elseif (c=='j'\|\|c=='J') {`
`2319`	`2326`	`gotoimaginary;`
`2320`	`2327`	`}`
`2321`		`-elseif (nonzero) {`
	`2328`	`+elseif (nonzero&& !tok->tok_extra_tokens) {`
`2322`	`2329`	`/* Old-style octal: now disallowed. */`
`2323`	`2330`	`tok_backup(tok,c);`
`2324`	`2331`	`returnMAKE_TOKEN(syntaxerror_known_range(`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commitb047fa5

File tree

3 files changed

3 files changed

`‎Lib/test/test_tokenize.py‎`

`‎Misc/NEWS.d/next/Core and Builtins/2023-06-09-12-59-18.gh-issue-105549.PYfTNp.rst‎`

`‎Parser/tokenizer.c‎`

0 commit comments