Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitea5bfdd

Browse files
gh-63161: Add more tests for source encoding
1 parent408154d commitea5bfdd

File tree

2 files changed

+178
-20
lines changed

2 files changed

+178
-20
lines changed

‎Lib/test/test_source_encoding.py‎

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
172172
os.unlink(TESTFN)
173173

174174

175+
BUFSIZ=2**13
176+
175177
classAbstractSourceEncodingTest:
176178

177179
deftest_default_coding(self):
@@ -184,14 +186,20 @@ def test_first_coding_line(self):
184186
self.check_script_output(src,br"'\xc3\u20ac'")
185187

186188
deftest_second_coding_line(self):
187-
src= (b'#\n'
189+
src= (b'#!/usr/bin/python\n'
190+
b'#coding:iso8859-15\n'
191+
b'print(ascii("\xc3\xa4"))\n')
192+
self.check_script_output(src,br"'\xc3\u20ac'")
193+
194+
deftest_second_coding_line_empty_first_line(self):
195+
src= (b'\n'
188196
b'#coding:iso8859-15\n'
189197
b'print(ascii("\xc3\xa4"))\n')
190198
self.check_script_output(src,br"'\xc3\u20ac'")
191199

192200
deftest_third_coding_line(self):
193201
# Only first two lines are tested for a magic comment.
194-
src= (b'#\n'
202+
src= (b'#!/usr/bin/python\n'
195203
b'#\n'
196204
b'#coding:iso8859-15\n'
197205
b'print(ascii("\xc3\xa4"))\n')
@@ -209,13 +217,52 @@ def test_double_coding_same_line(self):
209217
b'print(ascii("\xc3\xa4"))\n')
210218
self.check_script_output(src,br"'\xc3\u20ac'")
211219

220+
deftest_double_coding_utf8(self):
221+
src= (b'#coding:utf-8\n'
222+
b'#coding:latin1\n'
223+
b'print(ascii("\xc3\xa4"))\n')
224+
self.check_script_output(src,br"'\xe4'")
225+
226+
deftest_long_first_coding_line(self):
227+
src= (b'#'+b' '*BUFSIZ+b'coding:iso8859-15\n'
228+
b'print(ascii("\xc3\xa4"))\n')
229+
self.check_script_output(src,br"'\xc3\u20ac'")
230+
231+
deftest_long_second_coding_line(self):
232+
src= (b'#!/usr/bin/python\n'
233+
b'#'+b' '*BUFSIZ+b'coding:iso8859-15\n'
234+
b'print(ascii("\xc3\xa4"))\n')
235+
self.check_script_output(src,br"'\xc3\u20ac'")
236+
237+
deftest_long_coding_line(self):
238+
src= (b'#coding:iso-8859-15'+b' '*BUFSIZ+b'\n'
239+
b'print(ascii("\xc3\xa4"))\n')
240+
self.check_script_output(src,br"'\xc3\u20ac'")
241+
242+
deftest_long_coding_name(self):
243+
src= (b'#coding:iso-8859-1-'+b'x'*BUFSIZ+b'\n'
244+
b'print(ascii("\xc3\xa4"))\n')
245+
self.check_script_output(src,br"'\xc3\xa4'")
246+
247+
deftest_long_first_utf8_line(self):
248+
src=b'#'+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'
249+
self.check_script_output(src,b'')
250+
src=b'# '+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'
251+
self.check_script_output(src,b'')
252+
253+
deftest_long_second_utf8_line(self):
254+
src=b'\n#'+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'
255+
self.check_script_output(src,b'')
256+
src=b'\n# '+b'\xc3\xa4'*(BUFSIZ//2)+b'\n'
257+
self.check_script_output(src,b'')
258+
212259
deftest_first_non_utf8_coding_line(self):
213260
src= (b'#coding:iso-8859-15\xa4\n'
214261
b'print(ascii("\xc3\xa4"))\n')
215262
self.check_script_output(src,br"'\xc3\u20ac'")
216263

217264
deftest_second_non_utf8_coding_line(self):
218-
src= (b'\n'
265+
src= (b'#!/usr/bin/python\n'
219266
b'#coding:iso-8859-15\xa4\n'
220267
b'print(ascii("\xc3\xa4"))\n')
221268
self.check_script_output(src,br"'\xc3\u20ac'")
@@ -224,27 +271,56 @@ def test_utf8_bom(self):
224271
src= (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')
225272
self.check_script_output(src,br"'\xe4'")
226273

274+
deftest_utf8_bom_utf8_comments(self):
275+
src= (b'\xef\xbb\xbf#\xc3\xa4\n'
276+
b'#\xc3\xa4\n'
277+
b'print(ascii("\xc3\xa4"))\n')
278+
self.check_script_output(src,br"'\xe4'")
279+
227280
deftest_utf8_bom_and_utf8_coding_line(self):
228281
src= (b'\xef\xbb\xbf#coding:utf-8\n'
229282
b'print(ascii("\xc3\xa4"))\n')
230283
self.check_script_output(src,br"'\xe4'")
231284

285+
deftest_utf8_non_utf8_comment_line_error(self):
286+
src= (b'#coding: utf8\n'
287+
b'#\n'
288+
b'#\xa4\n'
289+
b'raise RuntimeError\n')
290+
self.check_script_error(src,
291+
br"'utf-8' codec can't decode byte|"
292+
br"encoding problem: utf8")
293+
232294
deftest_crlf(self):
233295
src= (b'print(ascii("""\r\n"""))\n')
234-
out=self.check_script_output(src,br"'\n'")
296+
self.check_script_output(src,br"'\n'")
235297

236298
deftest_crcrlf(self):
237299
src= (b'print(ascii("""\r\r\n"""))\n')
238-
out=self.check_script_output(src,br"'\n\n'")
300+
self.check_script_output(src,br"'\n\n'")
239301

240302
deftest_crcrcrlf(self):
241303
src= (b'print(ascii("""\r\r\r\n"""))\n')
242-
out=self.check_script_output(src,br"'\n\n\n'")
304+
self.check_script_output(src,br"'\n\n\n'")
243305

244306
deftest_crcrcrlf2(self):
245307
src= (b'#coding:iso-8859-1\n'
246308
b'print(ascii("""\r\r\r\n"""))\n')
247-
out=self.check_script_output(src,br"'\n\n\n'")
309+
self.check_script_output(src,br"'\n\n\n'")
310+
311+
deftest_nul_in_first_coding_line(self):
312+
src= (b'#coding:iso8859-15\x00\n'
313+
b'\n'
314+
b'\n'
315+
b'raise RuntimeError\n')
316+
self.check_script_error(src,br"source code (string )?cannot contain null bytes")
317+
318+
deftest_nul_in_second_coding_line(self):
319+
src= (b'#!/usr/bin/python\n'
320+
b'#coding:iso8859-15\x00\n'
321+
b'\n'
322+
b'raise RuntimeError\n')
323+
self.check_script_error(src,br"source code (string )?cannot contain null bytes")
248324

249325

250326
classUTF8ValidatorTest(unittest.TestCase):
@@ -324,6 +400,11 @@ def check_script_output(self, src, expected):
324400
out=stdout.getvalue().encode('latin1')
325401
self.assertEqual(out.rstrip(),expected)
326402

403+
defcheck_script_error(self,src,expected):
404+
withself.assertRaisesRegex(SyntaxError,expected.decode())ascm:
405+
exec(src)
406+
# self.assertEqual(str(cm.exception).encode(), expected)
407+
327408

328409
classFileSourceEncodingTest(AbstractSourceEncodingTest,unittest.TestCase):
329410

@@ -335,6 +416,14 @@ def check_script_output(self, src, expected):
335416
res=script_helper.assert_python_ok(fn)
336417
self.assertEqual(res.out.rstrip(),expected)
337418

419+
defcheck_script_error(self,src,expected):
420+
withtempfile.TemporaryDirectory()astmpd:
421+
fn=os.path.join(tmpd,'test.py')
422+
withopen(fn,'wb')asfp:
423+
fp.write(src)
424+
res=script_helper.assert_python_failure(fn)
425+
self.assertRegex(res.err.rstrip().splitlines()[-1],b'SyntaxError: '+expected)
426+
338427

339428
if__name__=="__main__":
340429
unittest.main()

‎Lib/test/test_tokenize.py‎

Lines changed: 82 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,24 +1346,63 @@ def readline():
13461346

13471347
deftest_no_bom_no_encoding_cookie(self):
13481348
lines= (
1349-
b'# something\n',
1349+
b'#!/home/\xc3\xa4/bin/python\n',
1350+
b'# something\xe2\x82\xac\n',
13501351
b'print(something)\n',
13511352
b'do_something(else)\n'
13521353
)
13531354
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
13541355
self.assertEqual(encoding,'utf-8')
13551356
self.assertEqual(consumed_lines,list(lines[:2]))
13561357

1358+
deftest_no_bom_no_encoding_cookie_first_line_error(self):
1359+
lines= (
1360+
b'#!/home/\xa4/bin/python\n\n',
1361+
b'print(something)\n',
1362+
b'do_something(else)\n'
1363+
)
1364+
withself.assertRaises(SyntaxError):
1365+
tokenize.detect_encoding(self.get_readline(lines))
1366+
1367+
deftest_no_bom_no_encoding_cookie_second_line_error(self):
1368+
lines= (
1369+
b'#!/usr/bin/python\n',
1370+
b'# something\xe2\n',
1371+
b'print(something)\n',
1372+
b'do_something(else)\n'
1373+
)
1374+
withself.assertRaises(SyntaxError):
1375+
tokenize.detect_encoding(self.get_readline(lines))
1376+
13571377
deftest_bom_no_cookie(self):
13581378
lines= (
1359-
b'\xef\xbb\xbf# something\n',
1379+
b'\xef\xbb\xbf#!/home/\xc3\xa4/bin/python\n',
13601380
b'print(something)\n',
13611381
b'do_something(else)\n'
13621382
)
13631383
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
13641384
self.assertEqual(encoding,'utf-8-sig')
13651385
self.assertEqual(consumed_lines,
1366-
[b'# something\n',b'print(something)\n'])
1386+
[b'#!/home/\xc3\xa4/bin/python\n',b'print(something)\n'])
1387+
1388+
deftest_bom_no_cookie_first_line_error(self):
1389+
lines= (
1390+
b'\xef\xbb\xbf#!/home/\xa4/bin/python\n',
1391+
b'print(something)\n',
1392+
b'do_something(else)\n'
1393+
)
1394+
withself.assertRaises(SyntaxError):
1395+
tokenize.detect_encoding(self.get_readline(lines))
1396+
1397+
deftest_bom_no_cookie_second_line_error(self):
1398+
lines= (
1399+
b'\xef\xbb\xbf#!/usr/bin/python\n',
1400+
b'# something\xe2\n',
1401+
b'print(something)\n',
1402+
b'do_something(else)\n'
1403+
)
1404+
withself.assertRaises(SyntaxError):
1405+
tokenize.detect_encoding(self.get_readline(lines))
13671406

13681407
deftest_cookie_first_line_no_bom(self):
13691408
lines= (
@@ -1439,27 +1478,58 @@ def test_cookie_second_line_noncommented_first_line(self):
14391478
expected= [b"print('\xc2\xa3')\n"]
14401479
self.assertEqual(consumed_lines,expected)
14411480

1442-
deftest_cookie_second_line_commented_first_line(self):
1481+
deftest_cookie_second_line_empty_first_line(self):
14431482
lines= (
1444-
b"#print('\xc2\xa3')\n",
1483+
b'\n',
14451484
b'# vim: set fileencoding=iso8859-15 :\n',
14461485
b"print('\xe2\x82\xac')\n"
14471486
)
14481487
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
14491488
self.assertEqual(encoding,'iso8859-15')
1450-
expected= [b"#print('\xc2\xa3')\n",b'# vim: set fileencoding=iso8859-15 :\n']
1489+
expected= [b'\n',b'# vim: set fileencoding=iso8859-15 :\n']
14511490
self.assertEqual(consumed_lines,expected)
14521491

1453-
deftest_cookie_second_line_empty_first_line(self):
1492+
deftest_cookie_third_line(self):
14541493
lines= (
1455-
b'\n',
1456-
b'# vim: set fileencoding=iso8859-15 :\n',
1457-
b"print('\xe2\x82\xac')\n"
1494+
b'#!/home/\xc3\xa4/bin/python\n',
1495+
b'# something\n',
1496+
b'# vim: set fileencoding=ascii :\n',
1497+
b'print(something)\n',
1498+
b'do_something(else)\n'
1499+
)
1500+
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
1501+
self.assertEqual(encoding,'utf-8')
1502+
self.assertEqual(consumed_lines,list(lines[:2]))
1503+
1504+
deftest_double_coding_line(self):
1505+
# If the first line matches the second line is ignored.
1506+
lines= (
1507+
b'#coding:iso8859-15\n',
1508+
b'#coding:latin1\n',
1509+
b'print(something)\n'
14581510
)
14591511
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
14601512
self.assertEqual(encoding,'iso8859-15')
1461-
expected= [b'\n',b'# vim: set fileencoding=iso8859-15 :\n']
1462-
self.assertEqual(consumed_lines,expected)
1513+
self.assertEqual(consumed_lines,list(lines[:1]))
1514+
1515+
deftest_double_coding_same_line(self):
1516+
lines= (
1517+
b'#coding:iso8859-15 coding:latin1\n',
1518+
b'print(something)\n'
1519+
)
1520+
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
1521+
self.assertEqual(encoding,'iso8859-15')
1522+
self.assertEqual(consumed_lines,list(lines[:1]))
1523+
1524+
deftest_double_coding_utf8(self):
1525+
lines= (
1526+
b'#coding:utf-8\n',
1527+
b'#coding:latin1\n',
1528+
b'print(something)\n'
1529+
)
1530+
encoding,consumed_lines=tokenize.detect_encoding(self.get_readline(lines))
1531+
self.assertEqual(encoding,'utf-8')
1532+
self.assertEqual(consumed_lines,list(lines[:1]))
14631533

14641534
deftest_latin1_normalization(self):
14651535
# See get_normal_name() in Parser/tokenizer/helpers.c.
@@ -1485,7 +1555,6 @@ def test_syntaxerror_latin1(self):
14851555
readline=self.get_readline(lines)
14861556
self.assertRaises(SyntaxError,tokenize.detect_encoding,readline)
14871557

1488-
14891558
deftest_utf8_normalization(self):
14901559
# See get_normal_name() in Parser/tokenizer/helpers.c.
14911560
encodings= ("utf-8","utf-8-mac","utf-8-unix")

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp