Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita836182

Browse files
committed
Support zip64 in zipimport
* Reads zip64 files as produced by zipfile* Include tests (somewhat slow, however, because of the need to create "large" zips)* About the same amount of strictness reading invalid zip files as zipfile has
1 parent47e3562 commita836182

File tree

4 files changed

+141
-37
lines changed

4 files changed

+141
-37
lines changed

‎Doc/library/zipimport.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
3030
corresponding:file:`.pyc` file, meaning that if a ZIP archive
3131
doesn't contain:file:`.pyc` files, importing may be rather slow.
3232

33+
..versionchanged::3.12
34+
ZIP64 is supported
35+
3336
..versionchanged::3.8
3437
Previously, ZIP archives with an archive comment were not supported.
3538

‎Lib/test/test_zipimport.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,16 @@ def testLargestPossibleComment(self):
776776
files= {TESTMOD+".py": (NOW,test_src)}
777777
self.doTest(".py",files,TESTMOD,comment=b"c"* ((1<<16)-1))
778778

779+
deftestZip64(self):
780+
# This is the simplest way to make zipfile generate the zip64 EOCD block
781+
files= {f"f{n}.py": (NOW,test_src)forninrange(65537)}
782+
self.doTest(".py",files,"f6")
783+
784+
deftestZip64CruftAndComment(self):
785+
# This is the simplest way to make zipfile generate the zip64 EOCD block
786+
files= {f"f{n}.py": (NOW,test_src)forninrange(65537)}
787+
self.doTest(".py",files,"f65536",comment=b"c"* ((1<<16)-1))
788+
779789

780790
@support.requires_zlib()
781791
classCompressedZipImportTestCase(UncompressedZipImportTestCase):

‎Lib/zipimport.py

Lines changed: 127 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
4040
_module_type=type(sys)
4141

4242
END_CENTRAL_DIR_SIZE=22
43-
STRING_END_ARCHIVE=b'PK\x05\x06'
43+
END_CENTRAL_DIR_SIZE_64=56
44+
END_CENTRAL_DIR_LOCATOR_SIZE_64=20
45+
STRING_END_ARCHIVE=b'PK\x05\x06'# standard EOCD signature
46+
STRING_END_LOCATOR_64=b'PK\x06\x07'# Zip64 EOCD Locator signature
47+
STRING_END_ZIP_64=b'PK\x06\x06'# Zip64 EOCD signature
4448
MAX_COMMENT_LEN= (1<<16)-1
49+
MAX_UINT32=0xffffffff
50+
ZIP64_EXTRA_TAG=0x1
4551

4652
classzipimporter(_bootstrap_external._LoaderBasics):
4753
"""zipimporter(archivepath) -> zipimporter object
@@ -406,49 +412,69 @@ def _read_directory(archive):
406412
raiseZipImportError(f"can't open Zip file:{archive!r}",path=archive)
407413

408414
withfp:
415+
# Check if there's a comment.
409416
try:
410-
fp.seek(-END_CENTRAL_DIR_SIZE,2)
411-
header_position=fp.tell()
412-
buffer=fp.read(END_CENTRAL_DIR_SIZE)
417+
fp.seek(0,2)
418+
file_size=fp.tell()
413419
exceptOSError:
414-
raiseZipImportError(f"can't read Zip file:{archive!r}",path=archive)
415-
iflen(buffer)!=END_CENTRAL_DIR_SIZE:
416-
raiseZipImportError(f"can't read Zip file:{archive!r}",path=archive)
417-
ifbuffer[:4]!=STRING_END_ARCHIVE:
418-
# Bad: End of Central Dir signature
419-
# Check if there's a comment.
420-
try:
421-
fp.seek(0,2)
422-
file_size=fp.tell()
423-
exceptOSError:
424-
raiseZipImportError(f"can't read Zip file:{archive!r}",
425-
path=archive)
426-
max_comment_start=max(file_size-MAX_COMMENT_LEN-
427-
END_CENTRAL_DIR_SIZE,0)
428-
try:
429-
fp.seek(max_comment_start)
430-
data=fp.read()
431-
exceptOSError:
432-
raiseZipImportError(f"can't read Zip file:{archive!r}",
433-
path=archive)
434-
pos=data.rfind(STRING_END_ARCHIVE)
435-
ifpos<0:
436-
raiseZipImportError(f'not a Zip file:{archive!r}',
420+
raiseZipImportError(f"can't read Zip file:{archive!r}",
421+
path=archive)
422+
max_comment_start=max(file_size-MAX_COMMENT_LEN-
423+
END_CENTRAL_DIR_SIZE-END_CENTRAL_DIR_SIZE_64-
424+
END_CENTRAL_DIR_LOCATOR_SIZE_64,0)
425+
try:
426+
fp.seek(max_comment_start)
427+
data=fp.read()
428+
exceptOSError:
429+
raiseZipImportError(f"can't read Zip file:{archive!r}",
430+
path=archive)
431+
pos=data.rfind(STRING_END_ARCHIVE)
432+
pos64=data.rfind(STRING_END_ZIP_64)
433+
434+
if (pos64>=0andpos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
435+
# Zip64 at "correct" offset from standard EOCD
436+
buffer=data[pos64:pos64+END_CENTRAL_DIR_SIZE_64]
437+
iflen(buffer)!=END_CENTRAL_DIR_SIZE_64:
438+
raiseZipImportError(f"corrupt Zip64 file:{archive!r}",
437439
path=archive)
440+
header_position=file_size-len(data)+pos64
441+
442+
central_directory_size=int.from_bytes(buffer[40:48],'little')
443+
central_directory_position=int.from_bytes(buffer[48:56],'little')
444+
num_entries=int.from_bytes(buffer[24:32],'little')
445+
elifpos>=0:
438446
buffer=data[pos:pos+END_CENTRAL_DIR_SIZE]
439447
iflen(buffer)!=END_CENTRAL_DIR_SIZE:
440448
raiseZipImportError(f"corrupt Zip file:{archive!r}",
441449
path=archive)
450+
442451
header_position=file_size-len(data)+pos
443452

444-
header_size=_unpack_uint32(buffer[12:16])
445-
header_offset=_unpack_uint32(buffer[16:20])
446-
ifheader_position<header_size:
453+
# Buffer now contains a valid EOCD, and header_position gives the
454+
# starting position of it.
455+
central_directory_size=_unpack_uint32(buffer[12:16])
456+
central_directory_position=_unpack_uint32(buffer[16:20])
457+
num_entries=_unpack_uint16(buffer[8:10])
458+
459+
# N.b. if someday you want to prefer the standard (non-zip64) EOCD,
460+
# you need to adjust position by 76 for arc to be 0.
461+
else:
462+
raiseZipImportError(f'not a Zip file:{archive!r}',
463+
path=archive)
464+
465+
# Buffer now contains a valid EOCD, and header_position gives the
466+
# starting position of it.
467+
# XXX: These are cursory checks but are not as exact or strict as they
468+
# could be. Checking the arc-adjusted value is probably good too.
469+
ifheader_position<central_directory_size:
447470
raiseZipImportError(f'bad central directory size:{archive!r}',path=archive)
448-
ifheader_position<header_offset:
471+
ifheader_position<central_directory_position:
449472
raiseZipImportError(f'bad central directory offset:{archive!r}',path=archive)
450-
header_position-=header_size
451-
arc_offset=header_position-header_offset
473+
header_position-=central_directory_size
474+
# On just-a-zipfile these values are the same and arc_offset is zero; if
475+
# the file has some bytes prepended, `arc_offset` is the number of such
476+
# bytes. This is used for pex as well as self-extracting .exe.
477+
arc_offset=header_position-central_directory_position
452478
ifarc_offset<0:
453479
raiseZipImportError(f'bad central directory size or offset:{archive!r}',path=archive)
454480

@@ -465,6 +491,11 @@ def _read_directory(archive):
465491
raiseEOFError('EOF read where not expected')
466492
# Start of file header
467493
ifbuffer[:4]!=b'PK\x01\x02':
494+
ifcount!=num_entries:
495+
raiseZipImportError(
496+
f"mismatched num_entries:{count} should be{num_entries} in{archive!r}",
497+
path=archive,
498+
)
468499
break# Bad: Central Dir File Header
469500
iflen(buffer)!=46:
470501
raiseEOFError('EOF read where not expected')
@@ -480,9 +511,6 @@ def _read_directory(archive):
480511
comment_size=_unpack_uint16(buffer[32:34])
481512
file_offset=_unpack_uint32(buffer[42:46])
482513
header_size=name_size+extra_size+comment_size
483-
iffile_offset>header_offset:
484-
raiseZipImportError(f'bad local header offset:{archive!r}',path=archive)
485-
file_offset+=arc_offset
486514

487515
try:
488516
name=fp.read(name_size)
@@ -494,7 +522,10 @@ def _read_directory(archive):
494522
# slower than reading the data because fseek flushes stdio's
495523
# internal buffers. See issue #8745.
496524
try:
497-
iflen(fp.read(header_size-name_size))!=header_size-name_size:
525+
extra_data_len=header_size-name_size
526+
extra_data=fp.read(extra_data_len)
527+
528+
iflen(extra_data)!=extra_data_len:
498529
raiseZipImportError(f"can't read Zip file:{archive!r}",path=archive)
499530
exceptOSError:
500531
raiseZipImportError(f"can't read Zip file:{archive!r}",path=archive)
@@ -511,6 +542,65 @@ def _read_directory(archive):
511542

512543
name=name.replace('/',path_sep)
513544
path=_bootstrap_external._path_join(archive,name)
545+
546+
# Ordering matches unpacking below.
547+
if (
548+
file_size==MAX_UINT32or
549+
data_size==MAX_UINT32or
550+
file_offset==MAX_UINT32
551+
):
552+
# need to decode extra_data looking for a zip64 extra (which might not
553+
# be present)
554+
whileextra_data:
555+
iflen(extra_data)<4:
556+
raiseZipImportError(f"can't read header extra:{archive!r}",path=archive)
557+
tag=_unpack_uint16(extra_data[:2])
558+
size=_unpack_uint16(extra_data[2:4])
559+
iflen(extra_data)<4+size:
560+
raiseZipImportError(f"can't read header extra:{archive!r}",path=archive)
561+
iftag==ZIP64_EXTRA_TAG:
562+
if (len(extra_data)-4)%8!=0:
563+
raiseZipImportError(f"can't read header extra:{archive!r}",path=archive)
564+
values= [
565+
int.from_bytes(extra_data[i:i+8],'little')
566+
foriinrange(4,len(extra_data),8)
567+
]
568+
569+
# N.b. Here be dragons: the ordering of these is different than
570+
# the header fields, and it's really easy to get it wrong since
571+
# naturally-occuring zips that use all 3 are >4GB and not
572+
# something that would be checked-in.
573+
# The tests include a binary-edited zip that uses zip64
574+
# (unnecessarily) for all three.
575+
iffile_size==MAX_UINT32:
576+
file_size=values.pop(0)
577+
ifdata_size==MAX_UINT32:
578+
data_size=values.pop(0)
579+
iffile_offset==MAX_UINT32:
580+
file_offset=values.pop(0)
581+
582+
ifvalues:
583+
raiseZipImportError(f"can't read header extra:{archive!r}",path=archive)
584+
585+
break
586+
587+
# For a typical zip, this bytes-slicing only happens 2-3 times, on
588+
# small data like timestamps and filesizes.
589+
extra_data=extra_data[4+size:]
590+
else:
591+
_bootstrap._verbose_message(
592+
"zipimport: suspected zip64 but no zip64 extra for {!r}",
593+
path,
594+
)
595+
# XXX These two statements seem swapped because `header_offset` is a
596+
# position within the actual file, but `file_offset` (when compared) is
597+
# as encoded in the entry, not adjusted for this file.
598+
# N.b. this must be after we've potentially read the zip64 extra which can
599+
# change `file_offset`.
600+
iffile_offset>central_directory_position:
601+
raiseZipImportError(f'bad local header offset:{archive!r}',path=archive)
602+
file_offset+=arc_offset
603+
514604
t= (path,compress,data_size,file_size,file_offset,time,date,crc)
515605
files[name]=t
516606
count+=1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
The ``zipimport`` module can now read ZIP64 files.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp