Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commita49c8a8

Browse files
CCLDArjunAlexWaygoodgpshead
authored andcommitted
pythongh-89550: Buffer GzipFile.write to reduce execution time by ~15% (python#101251)
Use `io.BufferedWriter` to buffer gzip writes.---------Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>Co-authored-by: Gregory P. Smith <greg@krypto.org>
1 parentb28c5f2 commita49c8a8

File tree

2 files changed

+37
-5
lines changed

2 files changed

+37
-5
lines changed

‎Lib/gzip.py‎

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
_COMPRESS_LEVEL_BEST=9
2323

2424
READ_BUFFER_SIZE=128*1024
25+
_WRITE_BUFFER_SIZE=4*io.DEFAULT_BUFFER_SIZE
2526

2627

2728
defopen(filename,mode="rb",compresslevel=_COMPRESS_LEVEL_BEST,
@@ -120,6 +121,21 @@ class BadGzipFile(OSError):
120121
"""Exception raised in some cases for invalid gzip files."""
121122

122123

124+
class_WriteBufferStream(io.RawIOBase):
125+
"""Minimal object to pass WriteBuffer flushes into GzipFile"""
126+
def__init__(self,gzip_file):
127+
self.gzip_file=gzip_file
128+
129+
defwrite(self,data):
130+
returnself.gzip_file._write_raw(data)
131+
132+
defseekable(self):
133+
returnFalse
134+
135+
defwritable(self):
136+
returnTrue
137+
138+
123139
classGzipFile(_compression.BaseStream):
124140
"""The GzipFile class simulates most of the methods of a file object with
125141
the exception of the truncate() method.
@@ -184,6 +200,7 @@ def __init__(self, filename=None, mode=None,
184200
ifmodeisNone:
185201
mode=getattr(fileobj,'mode','rb')
186202

203+
187204
ifmode.startswith('r'):
188205
self.mode=READ
189206
raw=_GzipReader(fileobj)
@@ -206,6 +223,9 @@ def __init__(self, filename=None, mode=None,
206223
zlib.DEF_MEM_LEVEL,
207224
0)
208225
self._write_mtime=mtime
226+
self._buffer_size=_WRITE_BUFFER_SIZE
227+
self._buffer=io.BufferedWriter(_WriteBufferStream(self),
228+
buffer_size=self._buffer_size)
209229
else:
210230
raiseValueError("Invalid mode: {!r}".format(mode))
211231

@@ -231,6 +251,11 @@ def _init_write(self, filename):
231251
self.bufsize=0
232252
self.offset=0# Current file offset for seek(), tell(), etc
233253

254+
deftell(self):
255+
self._check_not_closed()
256+
self._buffer.flush()
257+
returnsuper().tell()
258+
234259
def_write_gzip_header(self,compresslevel):
235260
self.fileobj.write(b'\037\213')# magic header
236261
self.fileobj.write(b'\010')# compression method
@@ -272,6 +297,10 @@ def write(self,data):
272297
ifself.fileobjisNone:
273298
raiseValueError("write() on closed GzipFile object")
274299

300+
returnself._buffer.write(data)
301+
302+
def_write_raw(self,data):
303+
# Called by our self._buffer underlying WriteBufferStream.
275304
ifisinstance(data, (bytes,bytearray)):
276305
length=len(data)
277306
else:
@@ -322,16 +351,17 @@ def close(self):
322351
fileobj=self.fileobj
323352
iffileobjisNone:
324353
return
325-
self.fileobj=None
326354
try:
327355
ifself.mode==WRITE:
356+
self._buffer.flush()
328357
fileobj.write(self.compress.flush())
329358
write32u(fileobj,self.crc)
330359
# self.size may exceed 2 GiB, or even 4 GiB
331360
write32u(fileobj,self.size&0xffffffff)
332361
elifself.mode==READ:
333362
self._buffer.close()
334363
finally:
364+
self.fileobj=None
335365
myfileobj=self.myfileobj
336366
ifmyfileobj:
337367
self.myfileobj=None
@@ -341,7 +371,7 @@ def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
341371
self._check_not_closed()
342372
ifself.mode==WRITE:
343373
# Ensure the compressor's buffer is flushed
344-
self.fileobj.write(self.compress.flush(zlib_mode))
374+
self._buffer.flush()
345375
self.fileobj.flush()
346376

347377
deffileno(self):
@@ -378,10 +408,10 @@ def seek(self, offset, whence=io.SEEK_SET):
378408
ifoffset<self.offset:
379409
raiseOSError('Negative seek in write mode')
380410
count=offset-self.offset
381-
chunk=b'\0'*1024
382-
foriinrange(count//1024):
411+
chunk=b'\0'*self._buffer_size
412+
foriinrange(count//self._buffer_size):
383413
self.write(chunk)
384-
self.write(b'\0'* (count%1024))
414+
self.write(b'\0'* (count%self._buffer_size))
385415
elifself.mode==READ:
386416
self._check_not_closed()
387417
returnself._buffer.seek(offset,whence)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Decrease execution time of some:mod:`gzip` file writes by 15% by
2+
adding more appropriate buffering.

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp