Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit9983c7d

Browse files
gh-133890: Handle UnicodeEncodeError in tarfile (GH-134147)
UnicodeEncodeError is now handled the same way as OSError duringTarFile member extraction.
1 parent5cbc8c6 commit9983c7d

File tree

3 files changed

+49
-6
lines changed

3 files changed

+49
-6
lines changed

‎Lib/tarfile.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2439,7 +2439,7 @@ def _get_extract_tarinfo(self, member, filter_function, path):
24392439
unfiltered=tarinfo
24402440
try:
24412441
tarinfo=filter_function(tarinfo,path)
2442-
except (OSError,FilterError)ase:
2442+
except (OSError,UnicodeEncodeError,FilterError)ase:
24432443
self._handle_fatal_error(e)
24442444
exceptExtractErrorase:
24452445
self._handle_nonfatal_error(e)
@@ -2460,7 +2460,7 @@ def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
24602460
self._extract_member(tarinfo,os.path.join(path,tarinfo.name),
24612461
set_attrs=set_attrs,
24622462
numeric_owner=numeric_owner)
2463-
exceptOSErrorase:
2463+
except(OSError,UnicodeEncodeError)ase:
24642464
self._handle_fatal_error(e)
24652465
exceptExtractErrorase:
24662466
self._handle_nonfatal_error(e)

‎Lib/test/test_tarfile.py

Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3490,11 +3490,12 @@ class ArchiveMaker:
34903490
with t.open() as tar:
34913491
... # `tar` is now a TarFile with 'filename' in it!
34923492
"""
3493-
def__init__(self):
3493+
def__init__(self,**kwargs):
34943494
self.bio=io.BytesIO()
3495+
self.tar_kwargs=dict(kwargs)
34953496

34963497
def__enter__(self):
3497-
self.tar_w=tarfile.TarFile(mode='w',fileobj=self.bio)
3498+
self.tar_w=tarfile.TarFile(mode='w',fileobj=self.bio,**self.tar_kwargs)
34983499
returnself
34993500

35003501
def__exit__(self,*exc):
@@ -4073,7 +4074,10 @@ def test_tar_filter(self):
40734074
# that in the test archive.)
40744075
withtarfile.TarFile.open(tarname)astar:
40754076
fortarinfointar.getmembers():
4076-
filtered=tarfile.tar_filter(tarinfo,'')
4077+
try:
4078+
filtered=tarfile.tar_filter(tarinfo,'')
4079+
exceptUnicodeEncodeError:
4080+
continue
40774081
self.assertIs(filtered.name,tarinfo.name)
40784082
self.assertIs(filtered.type,tarinfo.type)
40794083

@@ -4084,11 +4088,48 @@ def test_data_filter(self):
40844088
fortarinfointar.getmembers():
40854089
try:
40864090
filtered=tarfile.data_filter(tarinfo,'')
4087-
excepttarfile.FilterError:
4091+
except(tarfile.FilterError,UnicodeEncodeError):
40884092
continue
40894093
self.assertIs(filtered.name,tarinfo.name)
40904094
self.assertIs(filtered.type,tarinfo.type)
40914095

4096+
@unittest.skipIf(sys.platform=='win32','requires native bytes paths')
4097+
deftest_filter_unencodable(self):
4098+
# Sanity check using a valid path.
4099+
tarinfo=tarfile.TarInfo(os_helper.TESTFN)
4100+
filtered=tarfile.tar_filter(tarinfo,'')
4101+
self.assertIs(filtered.name,tarinfo.name)
4102+
filtered=tarfile.data_filter(tarinfo,'')
4103+
self.assertIs(filtered.name,tarinfo.name)
4104+
4105+
tarinfo=tarfile.TarInfo('test\x00')
4106+
self.assertRaises(ValueError,tarfile.tar_filter,tarinfo,'')
4107+
self.assertRaises(ValueError,tarfile.data_filter,tarinfo,'')
4108+
tarinfo=tarfile.TarInfo('\ud800')
4109+
self.assertRaises(UnicodeEncodeError,tarfile.tar_filter,tarinfo,'')
4110+
self.assertRaises(UnicodeEncodeError,tarfile.data_filter,tarinfo,'')
4111+
4112+
@unittest.skipIf(sys.platform=='win32','requires native bytes paths')
4113+
deftest_extract_unencodable(self):
4114+
# Create a member with name \xed\xa0\x80 which is UTF-8 encoded
4115+
# lone surrogate \ud800.
4116+
withArchiveMaker(encoding='ascii',errors='surrogateescape')asarc:
4117+
arc.add('\udced\udca0\udc80')
4118+
withos_helper.temp_cwd()astmp:
4119+
tar=arc.open(encoding='utf-8',errors='surrogatepass',
4120+
errorlevel=1)
4121+
self.assertEqual(tar.getnames(), ['\ud800'])
4122+
withself.assertRaises(UnicodeEncodeError):
4123+
tar.extractall()
4124+
self.assertEqual(os.listdir(), [])
4125+
4126+
tar=arc.open(encoding='utf-8',errors='surrogatepass',
4127+
errorlevel=0,debug=1)
4128+
withsupport.captured_stderr()asstderr:
4129+
tar.extractall()
4130+
self.assertEqual(os.listdir(), [])
4131+
self.assertIn('tarfile: UnicodeEncodeError ',stderr.getvalue())
4132+
40924133
deftest_change_default_filter_on_instance(self):
40934134
tar=tarfile.TarFile(tarname,'r')
40944135
defstrict_filter(tarinfo,path):
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The:mod:`tarfile` module now handles:exc:`UnicodeEncodeError` in the same
2+
way as:exc:`OSError` when cannot extract a member.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp