Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork34.1k
gh-89550: Buffer GzipFile.write to reduce execution time by ~15%#101251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.
Already on GitHub?Sign in to your account
Uh oh!
There was an error while loading.Please reload this page.
Changes from1 commit
943ca9c6f4701b8653faf1c36625bc4d929ef9dd50b3aa1f7ad2dc9ba929ac8d1618f2a7efd1bd0da047fe31588875ebd8e33fc195ef4674c0f3d2aFile filter
Filter by extension
Conversations
Uh oh!
There was an error while loading.Please reload this page.
Jump to
Uh oh!
There was an error while loading.Please reload this page.
Diff view
Diff view
- Loading branch information
Uh oh!
There was an error while loading.Please reload this page.
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -9,9 +9,7 @@ | ||
| import zlib | ||
| import builtins | ||
| import io | ||
| import _compression | ||
| __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] | ||
| @@ -122,6 +120,21 @@ class BadGzipFile(OSError): | ||
| """Exception raised in some cases for invalid gzip files.""" | ||
| class _WriteBufferStream(io.RawIOBase): | ||
| """Minimal object to pass WriteBuffer flushes into GzipFile""" | ||
| def __init__(self, gzip_file): | ||
| self.gzip_file = gzip_file | ||
| def write(self, data): | ||
| return self.gzip_file._write_raw(data) | ||
| def seekable(self): | ||
| return False | ||
| def writable(self): | ||
| return True | ||
| class GzipFile(_compression.BaseStream): | ||
| """The GzipFile class simulates most of the methods of a file object with | ||
| the exception of the truncate() method. | ||
| @@ -208,6 +221,7 @@ def __init__(self, filename=None, mode=None, | ||
| zlib.DEF_MEM_LEVEL, | ||
| 0) | ||
| self._write_mtime = mtime | ||
| self._buffer = io.BufferedWriter(_WriteBufferStream(self)) | ||
CCLDArjun marked this conversation as resolved. OutdatedShow resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
| else: | ||
| raise ValueError("Invalid mode: {!r}".format(mode)) | ||
| @@ -233,6 +247,11 @@ def _init_write(self, filename): | ||
| self.bufsize = 0 | ||
CCLDArjun marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
| self.offset = 0 # Current file offset for seek(), tell(), etc | ||
| def tell(self): | ||
| self._check_not_closed() | ||
| self._buffer.flush() | ||
| return super().tell() | ||
| def _write_gzip_header(self, compresslevel): | ||
| self.fileobj.write(b'\037\213') # magic header | ||
| self.fileobj.write(b'\010') # compression method | ||
| @@ -274,6 +293,9 @@ def write(self,data): | ||
| if self.fileobj is None: | ||
| raise ValueError("write() on closed GzipFile object") | ||
| return self._buffer.write(data) | ||
| def _write_raw(self, data): | ||
CCLDArjun marked this conversation as resolved. Show resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
| if isinstance(data, (bytes, bytearray)): | ||
| length = len(data) | ||
| else: | ||
| @@ -324,16 +346,17 @@ def close(self): | ||
| fileobj = self.fileobj | ||
| if fileobj is None: | ||
| return | ||
| try: | ||
| if self.mode == WRITE: | ||
| self._buffer.flush() | ||
| fileobj.write(self.compress.flush()) | ||
| write32u(fileobj, self.crc) | ||
| # self.size may exceed 2 GiB, or even 4 GiB | ||
| write32u(fileobj, self.size & 0xffffffff) | ||
| elif self.mode == READ: | ||
| self._buffer.close() | ||
| finally: | ||
| self.fileobj = None | ||
| myfileobj = self.myfileobj | ||
| if myfileobj: | ||
| self.myfileobj = None | ||
| @@ -343,7 +366,7 @@ def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH): | ||
| self._check_not_closed() | ||
| if self.mode == WRITE: | ||
| # Ensure the compressor's buffer is flushed | ||
| self._buffer.flush() | ||
| self.fileobj.flush() | ||
| def fileno(self): | ||
| @@ -381,8 +404,7 @@ def seek(self, offset, whence=io.SEEK_SET): | ||
| raise OSError('Negative seek in write mode') | ||
| count = offset - self.offset | ||
| chunk = b'\0' * 1024 | ||
| self.write(chunk * (count // 1024)) | ||
CCLDArjun marked this conversation as resolved. OutdatedShow resolvedHide resolvedUh oh!There was an error while loading.Please reload this page. | ||
| self.write(b'\0' * (count % 1024)) | ||
| elif self.mode == READ: | ||
| self._check_not_closed() | ||
| @@ -447,115 +469,6 @@ def _read_gzip_header(fp): | ||
| _read_exact(fp, 2) # Read & discard the 16-bit header CRC | ||
| return last_mtime | ||
| class _GzipReader(_compression.DecompressReader): | ||
| def __init__(self, fp): | ||