NotificationsYou must be signed in to change notification settings
Fork32.1k
Star67.3k

Commit08c78e0

authored

gh-134675: Add t-string prefixes to tokenizer module, lexical analysis doc, and add a test to make sure we catch this error in the future. (#134734)

* Add t-string prefixes to _all_string_prefixes, and add a test to make sure we catch this error in the future.* Update lexical analysis docs for t-string prefixes.

1 parentc60f39a commit08c78e0Copy full SHA for 08c78e0

File tree

3 files changed

+59

-2

lines changed

Doc/reference
- lexical_analysis.rst
Lib
- test
  - test_tokenize.py
- tokenize.py

3 files changed

+59

-2

lines changed

`‎Doc/reference/lexical_analysis.rst`

Lines changed: 2 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -489,8 +489,9 @@ String literals are described by the following lexical definitions:`
`489`	`489`
`490`	`490`	`..productionlist::python-grammar`
`491`	`491`	stringliteral: [`stringprefix`](`shortstring` \| `longstring`)
`492`		`- stringprefix: "r" \| "u" \| "R" \| "U" \| "f" \| "F"`
	`492`	`+ stringprefix: "r" \| "u" \| "R" \| "U" \| "f" \| "F" \| "t" \| "T"`
`493`	`493`	`: \| "fr" \| "Fr" \| "fR" \| "FR" \| "rf" \| "rF" \| "Rf" \| "RF"`
	`494`	`+ : \| "tr" \| "Tr" \| "tR" \| "TR" \| "rt" \| "rT" \| "Rt" \| "RT"`
`494`	`495`	shortstring: "'" `shortstringitem`* "'" \| '"' `shortstringitem`* '"'
`495`	`496`	longstring: "'''" `longstringitem`* "'''" \| '"""' `longstringitem`* '"""'
`496`	`497`	shortstringitem: `shortstringchar` \| `stringescapeseq`

`‎Lib/test/test_tokenize.py`

Lines changed: 56 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,8 @@`
`1`	`1`	`importcontextlib`
	`2`	`+importitertools`
`2`	`3`	`importos`
`3`	`4`	`importre`
	`5`	`+importstring`
`4`	`6`	`importtempfile`
`5`	`7`	`importtoken`
`6`	`8`	`importtokenize`
`@@ -3238,5 +3240,59 @@ def test_exact_flag(self):`
`3238`	`3240`	`self.check_output(source,expect,flag)`
`3239`	`3241`
`3240`	`3242`
	`3243`	`+classStringPrefixTest(unittest.TestCase):`
	`3244`	`+deftest_prefixes(self):`
	`3245`	`+# Get the list of defined string prefixes. I don't see an`
	`3246`	`+# obvious documented way of doing this, but probably the best`
	`3247`	`+# thing is to split apart tokenize.StringPrefix.`
	`3248`	`+`
	`3249`	`+# Make sure StringPrefix begins and ends in parens.`
	`3250`	`+self.assertEqual(tokenize.StringPrefix[0],'(')`
	`3251`	`+self.assertEqual(tokenize.StringPrefix[-1],')')`
	`3252`	`+`
	`3253`	`+# Then split apart everything else by '\|'.`
	`3254`	`+defined_prefixes=set(tokenize.StringPrefix[1:-1].split('\|'))`
	`3255`	`+`
	`3256`	`+# Now compute the actual string prefixes, by exec-ing all`
	`3257`	`+# valid prefix combinations, followed by an empty string.`
	`3258`	`+`
	`3259`	`+# Try all prefix lengths until we find a length that has zero`
	`3260`	`+# valid prefixes. This will miss the case where for example`
	`3261`	`+# there are no valid 3 character prefixes, but there are valid`
	`3262`	`+# 4 character prefixes. That seems extremely unlikely.`
	`3263`	`+`
	`3264`	`+# Note that the empty prefix is being included, because length`
	`3265`	`+# starts at 0. That's expected, since StringPrefix includes`
	`3266`	`+# the empty prefix.`
	`3267`	`+`
	`3268`	`+valid_prefixes=set()`
	`3269`	`+forlengthinitertools.count():`
	`3270`	`+num_at_this_length=0`
	`3271`	`+forprefixin (`
	`3272`	`+"".join(l)forlinlist(itertools.combinations(string.ascii_lowercase,length))`
	`3273`	`+ ):`
	`3274`	`+fortinitertools.permutations(prefix):`
	`3275`	`+foruinitertools.product(*[(c,c.upper())forcint]):`
	`3276`	`+p=''.join(u)`
	`3277`	`+ifp=="not":`
	`3278`	`+# 'not' can never be a string prefix,`
	`3279`	`+# because it's a valid expression: not ""`
	`3280`	`+continue`
	`3281`	`+try:`
	`3282`	`+eval(f'{p}""')`
	`3283`	`+`
	`3284`	`+# No syntax error, so p is a valid string`
	`3285`	`+# prefix.`
	`3286`	`+`
	`3287`	`+valid_prefixes.add(p)`
	`3288`	`+num_at_this_length+=1`
	`3289`	`+exceptSyntaxError:`
	`3290`	`+pass`
	`3291`	`+ifnum_at_this_length==0:`
	`3292`	`+break`
	`3293`	`+`
	`3294`	`+self.assertEqual(defined_prefixes,valid_prefixes)`
	`3295`	`+`
	`3296`	`+`
`3241`	`3297`	`if__name__=="__main__":`
`3242`	`3298`	`unittest.main()`

`‎Lib/tokenize.py`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -86,7 +86,7 @@ def _all_string_prefixes():`
`86`	`86`	`# The valid string prefixes. Only contain the lower case versions,`
`87`	`87`	`# and don't contain any permutations (include 'fr', but not`
`88`	`88`	`# 'rf'). The various permutations will be generated.`
`89`		`-_valid_string_prefixes= ['b','r','u','f','br','fr']`
	`89`	`+_valid_string_prefixes= ['b','r','u','f','t','br','fr','tr']`
`90`	`90`	`# if we add binary f-strings, add: ['fb', 'fbr']`
`91`	`91`	`result= {''}`
`92`	`92`	`forprefixin_valid_string_prefixes:`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commit08c78e0

File tree

3 files changed

3 files changed

`‎Doc/reference/lexical_analysis.rst`

`‎Lib/test/test_tokenize.py`

`‎Lib/tokenize.py`

0 commit comments