NotificationsYou must be signed in to change notification settings
Fork33.3k
Star69.7k

Commitea5bfdd

committed

gh-63161: Add more tests for source encoding

1 parent408154d commitea5bfddCopy full SHA for ea5bfdd

File tree

2 files changed

+178

-20

lines changed

Lib/test
- test_source_encoding.py
- test_tokenize.py

2 files changed

+178

-20

lines changed

`‎Lib/test/test_source_encoding.py‎`

Lines changed: 96 additions & 7 deletions

Original file line number	Diff line number	Diff line change
`@@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):`
`172`	`172`	`os.unlink(TESTFN)`
`173`	`173`
`174`	`174`
	`175`	`+BUFSIZ=2**13`
	`176`	`+`
`175`	`177`	`classAbstractSourceEncodingTest:`
`176`	`178`
`177`	`179`	`deftest_default_coding(self):`
`@@ -184,14 +186,20 @@ def test_first_coding_line(self):`
`184`	`186`	`self.check_script_output(src,br"'\xc3\u20ac'")`
`185`	`187`
`186`	`188`	`deftest_second_coding_line(self):`
`187`		`-src= (b'#\n'`
	`189`	`+src= (b'#!/usr/bin/python\n'`
	`190`	`+b'#coding:iso8859-15\n'`
	`191`	`+b'print(ascii("\xc3\xa4"))\n')`
	`192`	`+self.check_script_output(src,br"'\xc3\u20ac'")`
	`193`	`+`
	`194`	`+deftest_second_coding_line_empty_first_line(self):`
	`195`	`+src= (b'\n'`
`188`	`196`	`b'#coding:iso8859-15\n'`
`189`	`197`	`b'print(ascii("\xc3\xa4"))\n')`
`190`	`198`	`self.check_script_output(src,br"'\xc3\u20ac'")`
`191`	`199`
`192`	`200`	`deftest_third_coding_line(self):`
`193`	`201`	`# Only first two lines are tested for a magic comment.`
`194`		`-src= (b'#\n'`
	`202`	`+src= (b'#!/usr/bin/python\n'`
`195`	`203`	`b'#\n'`
`196`	`204`	`b'#coding:iso8859-15\n'`
`197`	`205`	`b'print(ascii("\xc3\xa4"))\n')`
`@@ -209,13 +217,52 @@ def test_double_coding_same_line(self):`
`209`	`217`	`b'print(ascii("\xc3\xa4"))\n')`
`210`	`218`	`self.check_script_output(src,br"'\xc3\u20ac'")`
`211`	`219`
	`220`	`+deftest_double_coding_utf8(self):`
	`221`	`+src= (b'#coding:utf-8\n'`
	`222`	`+b'#coding:latin1\n'`
	`223`	`+b'print(ascii("\xc3\xa4"))\n')`
	`224`	`+self.check_script_output(src,br"'\xe4'")`
	`225`	`+`
	`226`	`+deftest_long_first_coding_line(self):`
	`227`	`+src= (b'#'+b' '*BUFSIZ+b'coding:iso8859-15\n'`
	`228`	`+b'print(ascii("\xc3\xa4"))\n')`
	`229`	`+self.check_script_output(src,br"'\xc3\u20ac'")`
	`230`	`+`
	`231`	`+deftest_long_second_coding_line(self):`
	`232`	`+src= (b'#!/usr/bin/python\n'`
	`233`	`+b'#'+b' '*BUFSIZ+b'coding:iso8859-15\n'`
	`234`	`+b'print(ascii("\xc3\xa4"))\n')`
	`235`	`+self.check_script_output(src,br"'\xc3\u20ac'")`
	`236`	`+`
	`237`	`+deftest_long_coding_line(self):`
	`238`	`+src= (b'#coding:iso-8859-15'+b' '*BUFSIZ+b'\n'`
	`239`	`+b'print(ascii("\xc3\xa4"))\n')`
	`240`	`+self.check_script_output(src,br"'\xc3\u20ac'")`
	`241`	`+`
	`242`	`+deftest_long_coding_name(self):`
	`243`	`+src= (b'#coding:iso-8859-1-'+b'x'*BUFSIZ+b'\n'`
	`244`	`+b'print(ascii("\xc3\xa4"))\n')`
	`245`	`+self.check_script_output(src,br"'\xc3\xa4'")`
	`246`	`+`
	`247`	`+deftest_long_first_utf8_line(self):`
	`248`	`+src=b'#'+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'`
	`249`	`+self.check_script_output(src,b'')`
	`250`	`+src=b'# '+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'`
	`251`	`+self.check_script_output(src,b'')`
	`252`	`+`
	`253`	`+deftest_long_second_utf8_line(self):`
	`254`	`+src=b'\n#'+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'`
	`255`	`+self.check_script_output(src,b'')`
	`256`	`+src=b'\n# '+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'`
	`257`	`+self.check_script_output(src,b'')`
	`258`	`+`
`212`	`259`	`deftest_first_non_utf8_coding_line(self):`
`213`	`260`	`src= (b'#coding:iso-8859-15\xa4\n'`
`214`	`261`	`b'print(ascii("\xc3\xa4"))\n')`
`215`	`262`	`self.check_script_output(src,br"'\xc3\u20ac'")`
`216`	`263`
`217`	`264`	`deftest_second_non_utf8_coding_line(self):`
`218`		`-src= (b'\n'`
	`265`	`+src= (b'#!/usr/bin/python\n'`
`219`	`266`	`b'#coding:iso-8859-15\xa4\n'`
`220`	`267`	`b'print(ascii("\xc3\xa4"))\n')`
`221`	`268`	`self.check_script_output(src,br"'\xc3\u20ac'")`
`@@ -224,27 +271,56 @@ def test_utf8_bom(self):`
`224`	`271`	`src= (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')`
`225`	`272`	`self.check_script_output(src,br"'\xe4'")`
`226`	`273`
	`274`	`+deftest_utf8_bom_utf8_comments(self):`
	`275`	`+src= (b'\xef\xbb\xbf#\xc3\xa4\n'`
	`276`	`+b'#\xc3\xa4\n'`
	`277`	`+b'print(ascii("\xc3\xa4"))\n')`
	`278`	`+self.check_script_output(src,br"'\xe4'")`
	`279`	`+`
`227`	`280`	`deftest_utf8_bom_and_utf8_coding_line(self):`
`228`	`281`	`src= (b'\xef\xbb\xbf#coding:utf-8\n'`
`229`	`282`	`b'print(ascii("\xc3\xa4"))\n')`
`230`	`283`	`self.check_script_output(src,br"'\xe4'")`
`231`	`284`
	`285`	`+deftest_utf8_non_utf8_comment_line_error(self):`
	`286`	`+src= (b'#coding: utf8\n'`
	`287`	`+b'#\n'`
	`288`	`+b'#\xa4\n'`
	`289`	`+b'raise RuntimeError\n')`
	`290`	`+self.check_script_error(src,`
	`291`	`+br"'utf-8' codec can't decode byte\|"`
	`292`	`+br"encoding problem: utf8")`
	`293`	`+`
`232`	`294`	`deftest_crlf(self):`
`233`	`295`	`src= (b'print(ascii("""\r\n"""))\n')`
`234`		`-out=self.check_script_output(src,br"'\n'")`
	`296`	`+self.check_script_output(src,br"'\n'")`
`235`	`297`
`236`	`298`	`deftest_crcrlf(self):`
`237`	`299`	`src= (b'print(ascii("""\r\r\n"""))\n')`
`238`		`-out=self.check_script_output(src,br"'\n\n'")`
	`300`	`+self.check_script_output(src,br"'\n\n'")`
`239`	`301`
`240`	`302`	`deftest_crcrcrlf(self):`
`241`	`303`	`src= (b'print(ascii("""\r\r\r\n"""))\n')`
`242`		`-out=self.check_script_output(src,br"'\n\n\n'")`
	`304`	`+self.check_script_output(src,br"'\n\n\n'")`
`243`	`305`
`244`	`306`	`deftest_crcrcrlf2(self):`
`245`	`307`	`src= (b'#coding:iso-8859-1\n'`
`246`	`308`	`b'print(ascii("""\r\r\r\n"""))\n')`
`247`		`-out=self.check_script_output(src,br"'\n\n\n'")`
	`309`	`+self.check_script_output(src,br"'\n\n\n'")`
	`310`	`+`
	`311`	`+deftest_nul_in_first_coding_line(self):`
	`312`	`+src= (b'#coding:iso8859-15\x00\n'`
	`313`	`+b'\n'`
	`314`	`+b'\n'`
	`315`	`+b'raise RuntimeError\n')`
	`316`	`+self.check_script_error(src,br"source code (string )?cannot contain null bytes")`
	`317`	`+`
	`318`	`+deftest_nul_in_second_coding_line(self):`
	`319`	`+src= (b'#!/usr/bin/python\n'`
	`320`	`+b'#coding:iso8859-15\x00\n'`
	`321`	`+b'\n'`
	`322`	`+b'raise RuntimeError\n')`
	`323`	`+self.check_script_error(src,br"source code (string )?cannot contain null bytes")`
`248`	`324`
`249`	`325`
`250`	`326`	`classUTF8ValidatorTest(unittest.TestCase):`
`@@ -324,6 +400,11 @@ def check_script_output(self, src, expected):`
`324`	`400`	`out=stdout.getvalue().encode('latin1')`
`325`	`401`	`self.assertEqual(out.rstrip(),expected)`
`326`	`402`
	`403`	`+defcheck_script_error(self,src,expected):`
	`404`	`+withself.assertRaisesRegex(SyntaxError,expected.decode())ascm:`
	`405`	`+exec(src)`
	`406`	`+# self.assertEqual(str(cm.exception).encode(), expected)`
	`407`	`+`
`327`	`408`
`328`	`409`	`classFileSourceEncodingTest(AbstractSourceEncodingTest,unittest.TestCase):`
`329`	`410`
`@@ -335,6 +416,14 @@ def check_script_output(self, src, expected):`
`335`	`416`	`res=script_helper.assert_python_ok(fn)`
`336`	`417`	`self.assertEqual(res.out.rstrip(),expected)`
`337`	`418`
	`419`	`+defcheck_script_error(self,src,expected):`
	`420`	`+withtempfile.TemporaryDirectory()astmpd:`
	`421`	`+fn=os.path.join(tmpd,'test.py')`
	`422`	`+withopen(fn,'wb')asfp:`
	`423`	`+fp.write(src)`
	`424`	`+res=script_helper.assert_python_failure(fn)`
	`425`	`+self.assertRegex(res.err.rstrip().splitlines()[-1],b'SyntaxError: '+expected)`
	`426`	`+`
`338`	`427`
`339`	`428`	`if__name__=="__main__":`
`340`	`429`	`unittest.main()`

`‎Lib/test/test_tokenize.py‎`

Lines changed: 82 additions & 13 deletions

Original file line number	Diff line number	Diff line change
`@@ -1346,24 +1346,63 @@ def readline():`
`1346`	`1346`
`1347`	`1347`	`deftest_no_bom_no_encoding_cookie(self):`
`1348`	`1348`	`lines= (`
`1349`		`-b'# something\n',`
	`1349`	`+b'#!/home/\xc3\xa4/bin/python\n',`
	`1350`	`+b'# something\xe2\x82\xac\n',`
`1350`	`1351`	`b'print(something)\n',`
`1351`	`1352`	`b'do_something(else)\n'`
`1352`	`1353`	`)`
`1353`	`1354`	`encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
`1354`	`1355`	`self.assertEqual(encoding,'utf-8')`
`1355`	`1356`	`self.assertEqual(consumed_lines,list(lines[:2]))`
`1356`	`1357`
	`1358`	`+deftest_no_bom_no_encoding_cookie_first_line_error(self):`
	`1359`	`+lines= (`
	`1360`	`+b'#!/home/\xa4/bin/python\n\n',`
	`1361`	`+b'print(something)\n',`
	`1362`	`+b'do_something(else)\n'`
	`1363`	`+ )`
	`1364`	`+withself.assertRaises(SyntaxError):`
	`1365`	`+tokenize.detect_encoding(self.get_readline(lines))`
	`1366`	`+`
	`1367`	`+deftest_no_bom_no_encoding_cookie_second_line_error(self):`
	`1368`	`+lines= (`
	`1369`	`+b'#!/usr/bin/python\n',`
	`1370`	`+b'# something\xe2\n',`
	`1371`	`+b'print(something)\n',`
	`1372`	`+b'do_something(else)\n'`
	`1373`	`+ )`
	`1374`	`+withself.assertRaises(SyntaxError):`
	`1375`	`+tokenize.detect_encoding(self.get_readline(lines))`
	`1376`	`+`
`1357`	`1377`	`deftest_bom_no_cookie(self):`
`1358`	`1378`	`lines= (`
`1359`		`-b'\xef\xbb\xbf# something\n',`
	`1379`	`+b'\xef\xbb\xbf#!/home/\xc3\xa4/bin/python\n',`
`1360`	`1380`	`b'print(something)\n',`
`1361`	`1381`	`b'do_something(else)\n'`
`1362`	`1382`	`)`
`1363`	`1383`	`encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
`1364`	`1384`	`self.assertEqual(encoding,'utf-8-sig')`
`1365`	`1385`	`self.assertEqual(consumed_lines,`
`1366`		`- [b'# something\n',b'print(something)\n'])`
	`1386`	`+ [b'#!/home/\xc3\xa4/bin/python\n',b'print(something)\n'])`
	`1387`	`+`
	`1388`	`+deftest_bom_no_cookie_first_line_error(self):`
	`1389`	`+lines= (`
	`1390`	`+b'\xef\xbb\xbf#!/home/\xa4/bin/python\n',`
	`1391`	`+b'print(something)\n',`
	`1392`	`+b'do_something(else)\n'`
	`1393`	`+ )`
	`1394`	`+withself.assertRaises(SyntaxError):`
	`1395`	`+tokenize.detect_encoding(self.get_readline(lines))`
	`1396`	`+`
	`1397`	`+deftest_bom_no_cookie_second_line_error(self):`
	`1398`	`+lines= (`
	`1399`	`+b'\xef\xbb\xbf#!/usr/bin/python\n',`
	`1400`	`+b'# something\xe2\n',`
	`1401`	`+b'print(something)\n',`
	`1402`	`+b'do_something(else)\n'`
	`1403`	`+ )`
	`1404`	`+withself.assertRaises(SyntaxError):`
	`1405`	`+tokenize.detect_encoding(self.get_readline(lines))`
`1367`	`1406`
`1368`	`1407`	`deftest_cookie_first_line_no_bom(self):`
`1369`	`1408`	`lines= (`
`@@ -1439,27 +1478,58 @@ def test_cookie_second_line_noncommented_first_line(self):`
`1439`	`1478`	`expected= [b"print('\xc2\xa3')\n"]`
`1440`	`1479`	`self.assertEqual(consumed_lines,expected)`
`1441`	`1480`
`1442`		`-deftest_cookie_second_line_commented_first_line(self):`
	`1481`	`+deftest_cookie_second_line_empty_first_line(self):`
`1443`	`1482`	`lines= (`
`1444`		`-b"#print('\xc2\xa3')\n",`
	`1483`	`+b'\n',`
`1445`	`1484`	`b'# vim: set fileencoding=iso8859-15 :\n',`
`1446`	`1485`	`b"print('\xe2\x82\xac')\n"`
`1447`	`1486`	`)`
`1448`	`1487`	`encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
`1449`	`1488`	`self.assertEqual(encoding,'iso8859-15')`
`1450`		`-expected= [b"#print('\xc2\xa3')\n",b'# vim: set fileencoding=iso8859-15 :\n']`
	`1489`	`+expected= [b'\n',b'# vim: set fileencoding=iso8859-15 :\n']`
`1451`	`1490`	`self.assertEqual(consumed_lines,expected)`
`1452`	`1491`
`1453`		`-deftest_cookie_second_line_empty_first_line(self):`
	`1492`	`+deftest_cookie_third_line(self):`
`1454`	`1493`	`lines= (`
`1455`		`-b'\n',`
`1456`		`-b'# vim: set fileencoding=iso8859-15 :\n',`
`1457`		`-b"print('\xe2\x82\xac')\n"`
	`1494`	`+b'#!/home/\xc3\xa4/bin/python\n',`
	`1495`	`+b'# something\n',`
	`1496`	`+b'# vim: set fileencoding=ascii :\n',`
	`1497`	`+b'print(something)\n',`
	`1498`	`+b'do_something(else)\n'`
	`1499`	`+ )`
	`1500`	`+encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
	`1501`	`+self.assertEqual(encoding,'utf-8')`
	`1502`	`+self.assertEqual(consumed_lines,list(lines[:2]))`
	`1503`	`+`
	`1504`	`+deftest_double_coding_line(self):`
	`1505`	`+# If the first line matches the second line is ignored.`
	`1506`	`+lines= (`
	`1507`	`+b'#coding:iso8859-15\n',`
	`1508`	`+b'#coding:latin1\n',`
	`1509`	`+b'print(something)\n'`
`1458`	`1510`	`)`
`1459`	`1511`	`encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
`1460`	`1512`	`self.assertEqual(encoding,'iso8859-15')`
`1461`		`-expected= [b'\n',b'# vim: set fileencoding=iso8859-15 :\n']`
`1462`		`-self.assertEqual(consumed_lines,expected)`
	`1513`	`+self.assertEqual(consumed_lines,list(lines[:1]))`
	`1514`	`+`
	`1515`	`+deftest_double_coding_same_line(self):`
	`1516`	`+lines= (`
	`1517`	`+b'#coding:iso8859-15 coding:latin1\n',`
	`1518`	`+b'print(something)\n'`
	`1519`	`+ )`
	`1520`	`+encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
	`1521`	`+self.assertEqual(encoding,'iso8859-15')`
	`1522`	`+self.assertEqual(consumed_lines,list(lines[:1]))`
	`1523`	`+`
	`1524`	`+deftest_double_coding_utf8(self):`
	`1525`	`+lines= (`
	`1526`	`+b'#coding:utf-8\n',`
	`1527`	`+b'#coding:latin1\n',`
	`1528`	`+b'print(something)\n'`
	`1529`	`+ )`
	`1530`	`+encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))`
	`1531`	`+self.assertEqual(encoding,'utf-8')`
	`1532`	`+self.assertEqual(consumed_lines,list(lines[:1]))`
`1463`	`1533`
`1464`	`1534`	`deftest_latin1_normalization(self):`
`1465`	`1535`	`# See get_normal_name() in Parser/tokenizer/helpers.c.`
`@@ -1485,7 +1555,6 @@ def test_syntaxerror_latin1(self):`
`1485`	`1555`	`readline=self.get_readline(lines)`
`1486`	`1556`	`self.assertRaises(SyntaxError,tokenize.detect_encoding,readline)`
`1487`	`1557`
`1488`		`-`
`1489`	`1558`	`deftest_utf8_normalization(self):`
`1490`	`1559`	`# See get_normal_name() in Parser/tokenizer/helpers.c.`
`1491`	`1560`	`encodings= ("utf-8","utf-8-mac","utf-8-unix")`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitea5bfdd

File tree

2 files changed

2 files changed

`‎Lib/test/test_source_encoding.py‎`

`‎Lib/test/test_tokenize.py‎`

0 commit comments