Mar 28, 2024 · Jun 22, 2022 · Jul 8, 2023 · Jul 8, 2023 · Jul 8, 2023 · Jul 8, 2023
diff --git a/Doc/library/zipimport.rst b/Doc/library/zipimport.rst
 corresponding :file:`.pyc` file, meaning that if a ZIP archive
 doesn't contain :file:`.pyc` files, importing may be rather slow.

 .. versionchanged:: 3.13
   ZIP64 is supported

 .. versionchanged:: 3.8
   Previously, ZIP archives with an archive comment were not supported.

diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst
  :func:`~xml.etree.ElementTree.iterparse` for explicit cleaning up.
  (Contributed by Serhiy Storchaka in :gh:`69893`.)

 zipimport
 ---------

 * Gains support for ZIP64 format files.  Everybody loves huge code right?
  (Contributed by Tim Hatch in :gh:`94146`.)


 Optimizations
 =============
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
    return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little')


 def _unpack_uint64(data):
    """Convert 8 bytes in little-endian to an integer."""
    assert len(data) == 8
    return int.from_bytes(data, 'little')

 def _unpack_uint32(data):
    """Convert 4 bytes in little-endian to an integer."""
    assert len(data) == 4
diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
                f.write(stuff)
                f.write(data)

    def getZip64Files(self):
        # This is the simplest way to make zipfile generate the zip64 EOCD block
        return {f"f{n}.py": (NOW, test_src) for n in range(65537)}

    def doTest(self, expected_ext, files, *modules, **kw):
        self.makeZip(files, **kw)

        files = {TESTMOD + ".py": (NOW, test_src)}
        self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1))

    def testZip64(self):
        files = self.getZip64Files()
        self.doTest(".py", files, "f6")

    def testZip64CruftAndComment(self):
        files = self.getZip64Files()
        self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))


 @support.requires_zlib()
 class CompressedZipImportTestCase(UncompressedZipImportTestCase):
diff --git a/Lib/zipimport.py b/Lib/zipimport.py
 #from importlib import _bootstrap_external
 #from importlib import _bootstrap  # for _verbose_message
 import _frozen_importlib_external as _bootstrap_external
 from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
 from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64
 import _frozen_importlib as _bootstrap  # for _verbose_message
 import _imp  # for check_hash_based_pycs
 import _io  # for open
 _module_type = type(sys)

 END_CENTRAL_DIR_SIZE = 22
 STRING_END_ARCHIVE = b'PK\x05\x06'
 END_CENTRAL_DIR_SIZE_64 = 56
 END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
 STRING_END_ARCHIVE = b'PK\x05\x06'  # standard EOCD signature
 STRING_END_LOCATOR_64 = b'PK\x06\x07'  # Zip64 EOCD Locator signature
 STRING_END_ZIP_64 = b'PK\x06\x06'  # Zip64 EOCD signature
 MAX_COMMENT_LEN = (1 << 16) - 1
 MAX_UINT32 = 0xffffffff
 ZIP64_EXTRA_TAG = 0x1

 class zipimporter(_bootstrap_external._LoaderBasics):
    """zipimporter(archivepath) -> zipimporter object
        # to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
        start_offset = fp.tell()
        try:
            # Check if there's a comment.
            try:
                fp.seek(-END_CENTRAL_DIR_SIZE, 2)
                header_position = fp.tell()
                buffer = fp.read(END_CENTRAL_DIR_SIZE)
                fp.seek(0, 2)
                file_size = fp.tell()
            except OSError:
                raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
            if len(buffer) != END_CENTRAL_DIR_SIZE:
                raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
            if buffer[:4] != STRING_END_ARCHIVE:
                # Bad: End of Central Dir signature
                # Check if there's a comment.
                try:
                    fp.seek(0, 2)
                    file_size = fp.tell()
                except OSError:
                    raise ZipImportError(f"can't read Zip file: {archive!r}",
                                         path=archive)
                max_comment_start = max(file_size - MAX_COMMENT_LEN -
                                        END_CENTRAL_DIR_SIZE, 0)
                try:
                    fp.seek(max_comment_start)
                    data = fp.read()
                except OSError:
                    raise ZipImportError(f"can't read Zip file: {archive!r}",
                                         path=archive)
                pos = data.rfind(STRING_END_ARCHIVE)
                if pos < 0:
                    raise ZipImportError(f'not a Zip file: {archive!r}',
                                         path=archive)
                raise ZipImportError(f"can't read Zip file: {archive!r}",
                                     path=archive)
            max_comment_plus_dirs_size = (
                MAX_COMMENT_LEN + END_CENTRAL_DIR_SIZE +
                END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64)
            max_comment_start = max(file_size - max_comment_plus_dirs_size, 0)
            try:
                fp.seek(max_comment_start)
                data = fp.read(max_comment_plus_dirs_size)
            except OSError:
                raise ZipImportError(f"can't read Zip file: {archive!r}",
                                     path=archive)
            pos = data.rfind(STRING_END_ARCHIVE)
            pos64 = data.rfind(STRING_END_ZIP_64)

            if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
                # Zip64 at "correct" offset from standard EOCD
                buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
                if len(buffer) != END_CENTRAL_DIR_SIZE_64:
                    raise ZipImportError(
                        f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte "
                        f"zip64 central directory, but read {len(buffer)} bytes.",
                        path=archive)
                header_position = file_size - len(data) + pos64

                central_directory_size = _unpack_uint64(buffer[40:48])
                central_directory_position = _unpack_uint64(buffer[48:56])
                num_entries = _unpack_uint64(buffer[24:32])
            elif pos >= 0:
                buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
                if len(buffer) != END_CENTRAL_DIR_SIZE:
                    raise ZipImportError(f"corrupt Zip file: {archive!r}",
                                         path=archive)

                header_position = file_size - len(data) + pos

            header_size = _unpack_uint32(buffer[12:16])
            header_offset = _unpack_uint32(buffer[16:20])
            if header_position < header_size:
                # Buffer now contains a valid EOCD, and header_position gives the
                # starting position of it.
                central_directory_size = _unpack_uint32(buffer[12:16])
                central_directory_position = _unpack_uint32(buffer[16:20])
                num_entries = _unpack_uint16(buffer[8:10])

                # N.b. if someday you want to prefer the standard (non-zip64) EOCD,
                # you need to adjust position by 76 for arc to be 0.
            else:
                raise ZipImportError(f'not a Zip file: {archive!r}',
                                     path=archive)

            # Buffer now contains a valid EOCD, and header_position gives the
            # starting position of it.
            # XXX: These are cursory checks but are not as exact or strict as they
            # could be.  Checking the arc-adjusted value is probably good too.
            if header_position < central_directory_size:
                raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
            if header_position <header_offset:
            if header_position <central_directory_position:
                raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
            header_position -= header_size
            arc_offset = header_position - header_offset
            header_position -= central_directory_size
            # On just-a-zipfile these values are the same and arc_offset is zero; if
            # the file has some bytes prepended, `arc_offset` is the number of such
            # bytes.  This is used for pex as well as self-extracting .exe.
            arc_offset = header_position - central_directory_position
            if arc_offset < 0:
                raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)

                    raise EOFError('EOF read where not expected')
                # Start of file header
                if buffer[:4] != b'PK\x01\x02':
                    if count != num_entries:
                        raise ZipImportError(
                            f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
                            path=archive,
                        )
                    break                                # Bad: Central Dir File Header
                if len(buffer) != 46:
                    raise EOFError('EOF read where not expected')
                comment_size = _unpack_uint16(buffer[32:34])
                file_offset = _unpack_uint32(buffer[42:46])
                header_size = name_size + extra_size + comment_size
                if file_offset > header_offset:
                    raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
                file_offset += arc_offset

                try:
                    name = fp.read(name_size)
                # slower than reading the data because fseek flushes stdio's
                # internal buffers.    See issue #8745.
                try:
                    if len(fp.read(header_size - name_size)) != header_size - name_size:
                    extra_data_len = header_size - name_size
                    extra_data = memoryview(fp.read(extra_data_len))

                    if len(extra_data) != extra_data_len:
                        raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
                except OSError:
                    raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)

                name = name.replace('/', path_sep)
                path = _bootstrap_external._path_join(archive, name)

                # Ordering matches unpacking below.
                if (
                    file_size == MAX_UINT32 or
                    data_size == MAX_UINT32 or
                    file_offset == MAX_UINT32
                ):
                    # need to decode extra_data looking for a zip64 extra (which might not
                    # be present)
                    while extra_data:
                        if len(extra_data) < 4:
                            raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
                        tag = _unpack_uint16(extra_data[:2])
                        size = _unpack_uint16(extra_data[2:4])
                        if len(extra_data) < 4 + size:
                            raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
                        if tag == ZIP64_EXTRA_TAG:
                            if (len(extra_data) - 4) % 8 != 0:
                                raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
                            num_extra_values = (len(extra_data) - 4) // 8
                            if num_extra_values > 3:
                                raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
                            values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
                                                        extra_data, offset=4)

                            # N.b. Here be dragons: the ordering of these is different than
                            # the header fields, and it's really easy to get it wrong since
                            # naturally-occuring zips that use all 3 are >4GB
                            if file_size == MAX_UINT32:
                                file_size = values.pop(0)
                            if data_size == MAX_UINT32:
                                data_size = values.pop(0)
                            if file_offset == MAX_UINT32:
                                file_offset = values.pop(0)

                            break

                        # For a typical zip, this bytes-slicing only happens 2-3 times, on
                        # small data like timestamps and filesizes.
                        extra_data = extra_data[4+size:]
                    else:
                        _bootstrap._verbose_message(
                            "zipimport: suspected zip64 but no zip64 extra for {!r}",
                            path,
                        )
                # XXX These two statements seem swapped because `central_directory_position`
                # is a position within the actual file, but `file_offset` (when compared) is
                # as encoded in the entry, not adjusted for this file.
                # N.b. this must be after we've potentially read the zip64 extra which can
                # change `file_offset`.
                if file_offset > central_directory_position:
                    raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
                file_offset += arc_offset

                t = (path, compress, data_size, file_size, file_offset, time, date, crc)
                files[name] = t
                count += 1
diff --git a/Misc/NEWS.d/next/Library/2022-06-22-14-45-32.gh-issue-89739.CqZcRL.rst b/Misc/NEWS.d/next/Library/2022-06-22-14-45-32.gh-issue-89739.CqZcRL.rst
 The :mod:`zipimport` module can now read ZIP64 files.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,6 +30,9 @@ Any files may be present in the ZIP archive, but importers are only invoked for
		corresponding :file:`.pyc` file, meaning that if a ZIP archive
		doesn't contain :file:`.pyc` files, importing may be rather slow.

		.. versionchanged:: 3.13
		ZIP64 is supported

		.. versionchanged:: 3.8
		Previously, ZIP archives with an archive comment were not supported.

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -700,6 +700,12 @@ xml.etree.ElementTree
		:func:`~xml.etree.ElementTree.iterparse` for explicit cleaning up.
		(Contributed by Serhiy Storchaka in :gh:`69893`.)

		zipimport
		---------

		* Gains support for ZIP64 format files. Everybody loves huge code right?
		(Contributed by Tim Hatch in :gh:`94146`.)


		Optimizations
		=============
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -81,6 +81,11 @@ def _pack_uint32(x):
		return (int(x) & 0xFFFFFFFF).to_bytes(4, 'little')


		def _unpack_uint64(data):
		"""Convert 8 bytes in little-endian to an integer."""
		assert len(data) == 8
		return int.from_bytes(data, 'little')

		def _unpack_uint32(data):
		"""Convert 4 bytes in little-endian to an integer."""
		assert len(data) == 4
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -128,6 +128,10 @@ def makeZip(self, files, zipName=TEMP_ZIP, **kw):
		f.write(stuff)
		f.write(data)

		def getZip64Files(self):
		# This is the simplest way to make zipfile generate the zip64 EOCD block
		return {f"f{n}.py": (NOW, test_src) for n in range(65537)}

		def doTest(self, expected_ext, files, modules, *kw):
		self.makeZip(files, **kw)

Expand DownExpand Up		@@ -798,6 +802,14 @@ def testLargestPossibleComment(self):
		files = {TESTMOD + ".py": (NOW, test_src)}
		self.doTest(".py", files, TESTMOD, comment=b"c" * ((1 << 16) - 1))

		def testZip64(self):
		files = self.getZip64Files()
		self.doTest(".py", files, "f6")

		def testZip64CruftAndComment(self):
		files = self.getZip64Files()
		self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))


		@support.requires_zlib()
		class CompressedZipImportTestCase(UncompressedZipImportTestCase):
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,7 +15,7 @@
		#from importlib import _bootstrap_external
		#from importlib import _bootstrap # for _verbose_message
		import _frozen_importlib_external as _bootstrap_external
		from _frozen_importlib_external import _unpack_uint16, _unpack_uint32
		from _frozen_importlib_external import _unpack_uint16, _unpack_uint32, _unpack_uint64
		import _frozen_importlib as _bootstrap # for _verbose_message
		import _imp # for check_hash_based_pycs
		import _io # for open
Expand All		@@ -40,8 +40,14 @@ class ZipImportError(ImportError):
		_module_type = type(sys)

		END_CENTRAL_DIR_SIZE = 22
		STRING_END_ARCHIVE = b'PK\x05\x06'
		END_CENTRAL_DIR_SIZE_64 = 56
		END_CENTRAL_DIR_LOCATOR_SIZE_64 = 20
		STRING_END_ARCHIVE = b'PK\x05\x06' # standard EOCD signature
		STRING_END_LOCATOR_64 = b'PK\x06\x07' # Zip64 EOCD Locator signature
		STRING_END_ZIP_64 = b'PK\x06\x06' # Zip64 EOCD signature
		MAX_COMMENT_LEN = (1 << 16) - 1
		MAX_UINT32 = 0xffffffff
		ZIP64_EXTRA_TAG = 0x1

		class zipimporter(_bootstrap_external._LoaderBasics):
		"""zipimporter(archivepath) -> zipimporter object
Expand DownExpand Up		@@ -356,49 +362,72 @@ def _read_directory(archive):
		# to not cause problems when some runs 'python3 /dev/fd/9 9<some_script'
		start_offset = fp.tell()
		try:
		# Check if there's a comment.
		try:
		fp.seek(-END_CENTRAL_DIR_SIZE, 2)
		header_position = fp.tell()
		buffer = fp.read(END_CENTRAL_DIR_SIZE)
		fp.seek(0, 2)
		file_size = fp.tell()
		except OSError:
		raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
		if len(buffer) != END_CENTRAL_DIR_SIZE:
		raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
		if buffer[:4] != STRING_END_ARCHIVE:
		# Bad: End of Central Dir signature
		# Check if there's a comment.
		try:
		fp.seek(0, 2)
		file_size = fp.tell()
		except OSError:
		raise ZipImportError(f"can't read Zip file: {archive!r}",
		path=archive)
		max_comment_start = max(file_size - MAX_COMMENT_LEN -
		END_CENTRAL_DIR_SIZE, 0)
		try:
		fp.seek(max_comment_start)
		data = fp.read()
		except OSError:
		raise ZipImportError(f"can't read Zip file: {archive!r}",
		path=archive)
		pos = data.rfind(STRING_END_ARCHIVE)
		if pos < 0:
		raise ZipImportError(f'not a Zip file: {archive!r}',
		path=archive)
		raise ZipImportError(f"can't read Zip file: {archive!r}",
		path=archive)
		max_comment_plus_dirs_size = (
		MAX_COMMENT_LEN + END_CENTRAL_DIR_SIZE +
		END_CENTRAL_DIR_SIZE_64 + END_CENTRAL_DIR_LOCATOR_SIZE_64)
		max_comment_start = max(file_size - max_comment_plus_dirs_size, 0)
		try:
		fp.seek(max_comment_start)
		data = fp.read(max_comment_plus_dirs_size)
		except OSError:
		raise ZipImportError(f"can't read Zip file: {archive!r}",
		path=archive)
		pos = data.rfind(STRING_END_ARCHIVE)
		pos64 = data.rfind(STRING_END_ZIP_64)

		if (pos64 >= 0 and pos64+END_CENTRAL_DIR_SIZE_64+END_CENTRAL_DIR_LOCATOR_SIZE_64==pos):
		# Zip64 at "correct" offset from standard EOCD
		buffer = data[pos64:pos64 + END_CENTRAL_DIR_SIZE_64]
		if len(buffer) != END_CENTRAL_DIR_SIZE_64:
		raise ZipImportError(
		f"corrupt Zip64 file: Expected {END_CENTRAL_DIR_SIZE_64} byte "
		f"zip64 central directory, but read {len(buffer)} bytes.",
		path=archive)
		header_position = file_size - len(data) + pos64

		central_directory_size = _unpack_uint64(buffer[40:48])
		central_directory_position = _unpack_uint64(buffer[48:56])
		num_entries = _unpack_uint64(buffer[24:32])
		elif pos >= 0:
		buffer = data[pos:pos+END_CENTRAL_DIR_SIZE]
		if len(buffer) != END_CENTRAL_DIR_SIZE:
		raise ZipImportError(f"corrupt Zip file: {archive!r}",
		path=archive)

		header_position = file_size - len(data) + pos

		header_size = _unpack_uint32(buffer[12:16])
		header_offset = _unpack_uint32(buffer[16:20])
		if header_position < header_size:
		# Buffer now contains a valid EOCD, and header_position gives the
		# starting position of it.
		central_directory_size = _unpack_uint32(buffer[12:16])
		central_directory_position = _unpack_uint32(buffer[16:20])
		num_entries = _unpack_uint16(buffer[8:10])

		# N.b. if someday you want to prefer the standard (non-zip64) EOCD,
		# you need to adjust position by 76 for arc to be 0.
		else:
		raise ZipImportError(f'not a Zip file: {archive!r}',
		path=archive)

		# Buffer now contains a valid EOCD, and header_position gives the
		# starting position of it.
		# XXX: These are cursory checks but are not as exact or strict as they
		# could be. Checking the arc-adjusted value is probably good too.
		if header_position < central_directory_size:
		raise ZipImportError(f'bad central directory size: {archive!r}', path=archive)
		if header_position <header_offset:
		if header_position <central_directory_position:
		raise ZipImportError(f'bad central directory offset: {archive!r}', path=archive)
		header_position -= header_size
		arc_offset = header_position - header_offset
		header_position -= central_directory_size
		# On just-a-zipfile these values are the same and arc_offset is zero; if
		# the file has some bytes prepended, `arc_offset` is the number of such
		# bytes. This is used for pex as well as self-extracting .exe.
		arc_offset = header_position - central_directory_position
		if arc_offset < 0:
		raise ZipImportError(f'bad central directory size or offset: {archive!r}', path=archive)

Expand All		@@ -415,6 +444,11 @@ def _read_directory(archive):
		raise EOFError('EOF read where not expected')
		# Start of file header
		if buffer[:4] != b'PK\x01\x02':
		if count != num_entries:
		raise ZipImportError(
		f"mismatched num_entries: {count} should be {num_entries} in {archive!r}",
		path=archive,
		)
		break # Bad: Central Dir File Header
		if len(buffer) != 46:
		raise EOFError('EOF read where not expected')
Expand All		@@ -430,9 +464,6 @@ def _read_directory(archive):
		comment_size = _unpack_uint16(buffer[32:34])
		file_offset = _unpack_uint32(buffer[42:46])
		header_size = name_size + extra_size + comment_size
		if file_offset > header_offset:
		raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
		file_offset += arc_offset

		try:
		name = fp.read(name_size)
Expand All		@@ -444,7 +475,10 @@ def _read_directory(archive):
		# slower than reading the data because fseek flushes stdio's
		# internal buffers. See issue #8745.
		try:
		if len(fp.read(header_size - name_size)) != header_size - name_size:
		extra_data_len = header_size - name_size
		extra_data = memoryview(fp.read(extra_data_len))

		if len(extra_data) != extra_data_len:
		raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
		except OSError:
		raise ZipImportError(f"can't read Zip file: {archive!r}", path=archive)
Expand All		@@ -461,6 +495,60 @@ def _read_directory(archive):

		name = name.replace('/', path_sep)
		path = _bootstrap_external._path_join(archive, name)

		# Ordering matches unpacking below.
		if (
		file_size == MAX_UINT32 or
		data_size == MAX_UINT32 or
		file_offset == MAX_UINT32
		):
		# need to decode extra_data looking for a zip64 extra (which might not
		# be present)
		while extra_data:
		if len(extra_data) < 4:
		raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
		tag = _unpack_uint16(extra_data[:2])
		size = _unpack_uint16(extra_data[2:4])
		if len(extra_data) < 4 + size:
		raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
		if tag == ZIP64_EXTRA_TAG:
		if (len(extra_data) - 4) % 8 != 0:
		raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
		num_extra_values = (len(extra_data) - 4) // 8
		if num_extra_values > 3:
		raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
		values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
Copy link Contributor jsiroisApr 19, 2024• edited Loading Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Where does this symbol come from? I see no struct module import in this file. My Pex too-big-zip tests also find the following under 3.13.0a6: `Failed checking if argv[0] is an import path entryTraceback (most recent call last): File "<frozen zipimport>", line 98, in __init__ File "<frozen zipimport>", line 520, in _read_directoryNameError: name 'struct' is not defined. Did you forget to import 'struct'? File "/tmp/pytest-of-jsirois/pytest-10/test_check0/too-big.pyz", line 1 PK-SyntaxError: source code cannot contain null bytes` itamaro reacted with thumbs up emoji Copy link Contributor jsiroisApr 19, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. A quick experiment seems to indicate just adding the import gets things working. I'll try my hand at sending up a patch. itamaro reacted with thumbs up emoji Copy link Contributor itamaroApr 19, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. good catch! too bad the tests didn't catch it. let me know if you need help with getting that PR going. Copy link Contributor jsiroisApr 19, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. I think I'm good, thank you. Filed#118107 to reference in the forthcoming PR. Copy link Contributor jsiroisApr 19, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. Fix here hopefully:#118108
		extra_data, offset=4)

		# N.b. Here be dragons: the ordering of these is different than
		# the header fields, and it's really easy to get it wrong since
		# naturally-occuring zips that use all 3 are >4GB
		if file_size == MAX_UINT32:
		file_size = values.pop(0)
Copy link Contributor jsiroisApr 20, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others.Learn more. These were problematic as well:`values` is a tuple; not a list. The new test caught this issue.
		if data_size == MAX_UINT32:
		data_size = values.pop(0)
		if file_offset == MAX_UINT32:
		file_offset = values.pop(0)

		break

		# For a typical zip, this bytes-slicing only happens 2-3 times, on
		# small data like timestamps and filesizes.
		extra_data = extra_data[4+size:]
		else:
		_bootstrap._verbose_message(
		"zipimport: suspected zip64 but no zip64 extra for {!r}",
		path,
		)
		# XXX These two statements seem swapped because `central_directory_position`
		# is a position within the actual file, but `file_offset` (when compared) is
		# as encoded in the entry, not adjusted for this file.
		# N.b. this must be after we've potentially read the zip64 extra which can
		# change `file_offset`.
		if file_offset > central_directory_position:
		raise ZipImportError(f'bad local header offset: {archive!r}', path=archive)
		file_offset += arc_offset

		t = (path, compress, data_size, file_size, file_offset, time, date, crc)
		files[name] = t
		count += 1
Expand Down
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		The :mod:`zipimport` module can now read ZIP64 files.