@@ -3457,11 +3457,12 @@ class ArchiveMaker:
3457
3457
with t.open() as tar:
3458
3458
... # `tar` is now a TarFile with 'filename' in it!
3459
3459
"""
3460
- def __init__ (self ):
3460
+ def __init__ (self , ** kwargs ):
3461
3461
self .bio = io .BytesIO ()
3462
+ self .tar_kwargs = dict (kwargs )
3462
3463
3463
3464
def __enter__ (self ):
3464
- self .tar_w = tarfile .TarFile (mode = 'w' ,fileobj = self .bio )
3465
+ self .tar_w = tarfile .TarFile (mode = 'w' ,fileobj = self .bio , ** self . tar_kwargs )
3465
3466
return self
3466
3467
3467
3468
def __exit__ (self ,* exc ):
@@ -4040,7 +4041,10 @@ def test_tar_filter(self):
4040
4041
# that in the test archive.)
4041
4042
with tarfile .TarFile .open (tarname )as tar :
4042
4043
for tarinfo in tar .getmembers ():
4043
- filtered = tarfile .tar_filter (tarinfo ,'' )
4044
+ try :
4045
+ filtered = tarfile .tar_filter (tarinfo ,'' )
4046
+ except UnicodeEncodeError :
4047
+ continue
4044
4048
self .assertIs (filtered .name ,tarinfo .name )
4045
4049
self .assertIs (filtered .type ,tarinfo .type )
4046
4050
@@ -4051,11 +4055,48 @@ def test_data_filter(self):
4051
4055
for tarinfo in tar .getmembers ():
4052
4056
try :
4053
4057
filtered = tarfile .data_filter (tarinfo ,'' )
4054
- except tarfile .FilterError :
4058
+ except ( tarfile .FilterError , UnicodeEncodeError ) :
4055
4059
continue
4056
4060
self .assertIs (filtered .name ,tarinfo .name )
4057
4061
self .assertIs (filtered .type ,tarinfo .type )
4058
4062
4063
+ @unittest .skipIf (sys .platform == 'win32' ,'requires native bytes paths' )
4064
+ def test_filter_unencodable (self ):
4065
+ # Sanity check using a valid path.
4066
+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
4067
+ filtered = tarfile .tar_filter (tarinfo ,'' )
4068
+ self .assertIs (filtered .name ,tarinfo .name )
4069
+ filtered = tarfile .data_filter (tarinfo ,'' )
4070
+ self .assertIs (filtered .name ,tarinfo .name )
4071
+
4072
+ tarinfo = tarfile .TarInfo ('test\x00 ' )
4073
+ self .assertRaises (ValueError ,tarfile .tar_filter ,tarinfo ,'' )
4074
+ self .assertRaises (ValueError ,tarfile .data_filter ,tarinfo ,'' )
4075
+ tarinfo = tarfile .TarInfo ('\ud800 ' )
4076
+ self .assertRaises (UnicodeEncodeError ,tarfile .tar_filter ,tarinfo ,'' )
4077
+ self .assertRaises (UnicodeEncodeError ,tarfile .data_filter ,tarinfo ,'' )
4078
+
4079
+ @unittest .skipIf (sys .platform == 'win32' ,'requires native bytes paths' )
4080
+ def test_extract_unencodable (self ):
4081
+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
4082
+ # lone surrogate \ud800.
4083
+ with ArchiveMaker (encoding = 'ascii' ,errors = 'surrogateescape' )as arc :
4084
+ arc .add ('\udced \udca0 \udc80 ' )
4085
+ with os_helper .temp_cwd ()as tmp :
4086
+ tar = arc .open (encoding = 'utf-8' ,errors = 'surrogatepass' ,
4087
+ errorlevel = 1 )
4088
+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
4089
+ with self .assertRaises (UnicodeEncodeError ):
4090
+ tar .extractall (filter = tarfile .tar_filter )
4091
+ self .assertEqual (os .listdir (), [])
4092
+
4093
+ tar = arc .open (encoding = 'utf-8' ,errors = 'surrogatepass' ,
4094
+ errorlevel = 0 ,debug = 1 )
4095
+ with support .captured_stderr ()as stderr :
4096
+ tar .extractall (filter = tarfile .tar_filter )
4097
+ self .assertEqual (os .listdir (), [])
4098
+ self .assertIn ('tarfile: UnicodeEncodeError ' ,stderr .getvalue ())
4099
+
4059
4100
def test_default_filter_warns (self ):
4060
4101
"""Ensure the default filter warns"""
4061
4102
with ArchiveMaker ()as arc :