@@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
172172os .unlink (TESTFN )
173173
174174
175+ BUFSIZ = 2 ** 13
176+
175177class AbstractSourceEncodingTest :
176178
177179def test_default_coding (self ):
@@ -184,14 +186,20 @@ def test_first_coding_line(self):
184186self .check_script_output (src ,br"'\xc3\u20ac'" )
185187
186188def test_second_coding_line (self ):
187- src = (b'#\n '
189+ src = (b'#!/usr/bin/python\n '
190+ b'#coding:iso8859-15\n '
191+ b'print(ascii("\xc3 \xa4 "))\n ' )
192+ self .check_script_output (src ,br"'\xc3\u20ac'" )
193+
194+ def test_second_coding_line_empty_first_line (self ):
195+ src = (b'\n '
188196b'#coding:iso8859-15\n '
189197b'print(ascii("\xc3 \xa4 "))\n ' )
190198self .check_script_output (src ,br"'\xc3\u20ac'" )
191199
192200def test_third_coding_line (self ):
193201# Only first two lines are tested for a magic comment.
194- src = (b'#\n '
202+ src = (b'#!/usr/bin/python \n '
195203b'#\n '
196204b'#coding:iso8859-15\n '
197205b'print(ascii("\xc3 \xa4 "))\n ' )
@@ -209,13 +217,52 @@ def test_double_coding_same_line(self):
209217b'print(ascii("\xc3 \xa4 "))\n ' )
210218self .check_script_output (src ,br"'\xc3\u20ac'" )
211219
220+ def test_double_coding_utf8 (self ):
221+ src = (b'#coding:utf-8\n '
222+ b'#coding:latin1\n '
223+ b'print(ascii("\xc3 \xa4 "))\n ' )
224+ self .check_script_output (src ,br"'\xe4'" )
225+
226+ def test_long_first_coding_line (self ):
227+ src = (b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
228+ b'print(ascii("\xc3 \xa4 "))\n ' )
229+ self .check_script_output (src ,br"'\xc3\u20ac'" )
230+
231+ def test_long_second_coding_line (self ):
232+ src = (b'#!/usr/bin/python\n '
233+ b'#' + b' ' * BUFSIZ + b'coding:iso8859-15\n '
234+ b'print(ascii("\xc3 \xa4 "))\n ' )
235+ self .check_script_output (src ,br"'\xc3\u20ac'" )
236+
237+ def test_long_coding_line (self ):
238+ src = (b'#coding:iso-8859-15' + b' ' * BUFSIZ + b'\n '
239+ b'print(ascii("\xc3 \xa4 "))\n ' )
240+ self .check_script_output (src ,br"'\xc3\u20ac'" )
241+
242+ def test_long_coding_name (self ):
243+ src = (b'#coding:iso-8859-1-' + b'x' * BUFSIZ + b'\n '
244+ b'print(ascii("\xc3 \xa4 "))\n ' )
245+ self .check_script_output (src ,br"'\xc3\xa4'" )
246+
247+ def test_long_first_utf8_line (self ):
248+ src = b'#' + b'\xc3 \xa4 ' * (BUFSIZ // 2 )+ b'\n '
249+ self .check_script_output (src ,b'' )
250+ src = b'# ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 )+ b'\n '
251+ self .check_script_output (src ,b'' )
252+
253+ def test_long_second_utf8_line (self ):
254+ src = b'\n #' + b'\xc3 \xa4 ' * (BUFSIZ // 2 )+ b'\n '
255+ self .check_script_output (src ,b'' )
256+ src = b'\n # ' + b'\xc3 \xa4 ' * (BUFSIZ // 2 )+ b'\n '
257+ self .check_script_output (src ,b'' )
258+
212259def test_first_non_utf8_coding_line (self ):
213260src = (b'#coding:iso-8859-15\xa4 \n '
214261b'print(ascii("\xc3 \xa4 "))\n ' )
215262self .check_script_output (src ,br"'\xc3\u20ac'" )
216263
217264def test_second_non_utf8_coding_line (self ):
218- src = (b'\n '
265+ src = (b'#!/usr/bin/python \n '
219266b'#coding:iso-8859-15\xa4 \n '
220267b'print(ascii("\xc3 \xa4 "))\n ' )
221268self .check_script_output (src ,br"'\xc3\u20ac'" )
@@ -224,27 +271,56 @@ def test_utf8_bom(self):
224271src = (b'\xef \xbb \xbf print(ascii("\xc3 \xa4 "))\n ' )
225272self .check_script_output (src ,br"'\xe4'" )
226273
274+ def test_utf8_bom_utf8_comments (self ):
275+ src = (b'\xef \xbb \xbf #\xc3 \xa4 \n '
276+ b'#\xc3 \xa4 \n '
277+ b'print(ascii("\xc3 \xa4 "))\n ' )
278+ self .check_script_output (src ,br"'\xe4'" )
279+
227280def test_utf8_bom_and_utf8_coding_line (self ):
228281src = (b'\xef \xbb \xbf #coding:utf-8\n '
229282b'print(ascii("\xc3 \xa4 "))\n ' )
230283self .check_script_output (src ,br"'\xe4'" )
231284
285+ def test_utf8_non_utf8_comment_line_error (self ):
286+ src = (b'#coding: utf8\n '
287+ b'#\n '
288+ b'#\xa4 \n '
289+ b'raise RuntimeError\n ' )
290+ self .check_script_error (src ,
291+ br"'utf-8' codec can't decode byte|"
292+ br"encoding problem: utf8" )
293+
232294def test_crlf (self ):
233295src = (b'print(ascii("""\r \n """))\n ' )
234- out = self .check_script_output (src ,br"'\n'" )
296+ self .check_script_output (src ,br"'\n'" )
235297
236298def test_crcrlf (self ):
237299src = (b'print(ascii("""\r \r \n """))\n ' )
238- out = self .check_script_output (src ,br"'\n\n'" )
300+ self .check_script_output (src ,br"'\n\n'" )
239301
240302def test_crcrcrlf (self ):
241303src = (b'print(ascii("""\r \r \r \n """))\n ' )
242- out = self .check_script_output (src ,br"'\n\n\n'" )
304+ self .check_script_output (src ,br"'\n\n\n'" )
243305
244306def test_crcrcrlf2 (self ):
245307src = (b'#coding:iso-8859-1\n '
246308b'print(ascii("""\r \r \r \n """))\n ' )
247- out = self .check_script_output (src ,br"'\n\n\n'" )
309+ self .check_script_output (src ,br"'\n\n\n'" )
310+
311+ def test_nul_in_first_coding_line (self ):
312+ src = (b'#coding:iso8859-15\x00 \n '
313+ b'\n '
314+ b'\n '
315+ b'raise RuntimeError\n ' )
316+ self .check_script_error (src ,br"source code (string )?cannot contain null bytes" )
317+
318+ def test_nul_in_second_coding_line (self ):
319+ src = (b'#!/usr/bin/python\n '
320+ b'#coding:iso8859-15\x00 \n '
321+ b'\n '
322+ b'raise RuntimeError\n ' )
323+ self .check_script_error (src ,br"source code (string )?cannot contain null bytes" )
248324
249325
250326class UTF8ValidatorTest (unittest .TestCase ):
@@ -324,6 +400,11 @@ def check_script_output(self, src, expected):
324400out = stdout .getvalue ().encode ('latin1' )
325401self .assertEqual (out .rstrip (),expected )
326402
403+ def check_script_error (self ,src ,expected ):
404+ with self .assertRaisesRegex (SyntaxError ,expected .decode ())as cm :
405+ exec (src )
406+ # self.assertEqual(str(cm.exception).encode(), expected)
407+
327408
328409class FileSourceEncodingTest (AbstractSourceEncodingTest ,unittest .TestCase ):
329410
@@ -335,6 +416,14 @@ def check_script_output(self, src, expected):
335416res = script_helper .assert_python_ok (fn )
336417self .assertEqual (res .out .rstrip (),expected )
337418
419+ def check_script_error (self ,src ,expected ):
420+ with tempfile .TemporaryDirectory ()as tmpd :
421+ fn = os .path .join (tmpd ,'test.py' )
422+ with open (fn ,'wb' )as fp :
423+ fp .write (src )
424+ res = script_helper .assert_python_failure (fn )
425+ self .assertRegex (res .err .rstrip ().splitlines ()[- 1 ],b'SyntaxError: ' + expected )
426+
338427
339428if __name__ == "__main__" :
340429unittest .main ()