Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit4b4a514

Browse files
committed
Added performance comparison to cgit ... and yes, git-python is faster :)
1 parent26e138c commit4b4a514

File tree

2 files changed

+87
-11
lines changed

2 files changed

+87
-11
lines changed

‎lib/git/odb/utils.py‎

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,12 @@ class DecompressMemMapReader(object):
103103
times we actually allocate. An own zlib implementation would be good here
104104
to better support streamed reading - it would only need to keep the mmap
105105
and decompress it into chunks, thats all ... """
106-
__slots__= ('_m','_zip','_buf','_buflen','_br','_cws','_cwe','_s','_cs','_close')
106+
__slots__= ('_m','_zip','_buf','_buflen','_br','_cws','_cwe','_s','_close')
107107

108-
def__init__(self,m,close_on_deletion,cs=128*1024):
109-
"""Initialize with mmap and chunk_size for stream reading"""
108+
max_read_size=512*1024
109+
110+
def__init__(self,m,close_on_deletion):
111+
"""Initialize with mmap for stream reading"""
110112
self._m=m
111113
self._zip=zlib.decompressobj()
112114
self._buf=None# buffer of decompressed bytes
@@ -115,7 +117,6 @@ def __init__(self, m, close_on_deletion, cs = 128*1024):
115117
self._br=0# num uncompressed bytes read
116118
self._cws=0# start byte of compression window
117119
self._cwe=0# end byte of compression window
118-
self._cs=cs# chunk size (when reading from zip)
119120
self._close=close_on_deletion# close the memmap on deletion ?
120121

121122
def__del__(self):
@@ -163,6 +164,28 @@ def read(self, size=-1):
163164
returnstr()
164165
# END handle depletion
165166

167+
# protect from memory peaks
168+
# If he tries to read large chunks, our memory patterns get really bad
169+
# as we end up copying a possibly huge chunk from our memory map right into
170+
# memory. This might not even be possible. Nonetheless, try to dampen the
171+
# effect a bit by reading in chunks, returning a huge string in the end.
172+
# Our performance now depends on StringIO. This way we don't need two large
173+
# buffers in peak times, but only one large one in the end which is
174+
# the return buffer
175+
ifsize>self.max_read_size:
176+
sio=StringIO()
177+
whilesize:
178+
read_size=min(self.max_read_size,size)
179+
data=self.read(read_size)
180+
sio.write(data)
181+
size-=len(data)
182+
iflen(data)<read_size:
183+
break
184+
# END data loop
185+
sio.seek(0)
186+
returnsio.getvalue()
187+
# END handle maxread
188+
166189
# deplete the buffer, then just continue using the decompress object
167190
# which has an own buffer. We just need this to transparently parse the
168191
# header from the zlib stream

‎test/git/performance/test_streams.py‎

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
importsys
1111
importstat
1212
importrandom
13+
importsubprocess
1314

1415

1516
fromlibimport (
@@ -51,23 +52,24 @@ def test_large_data_streaming(self, rwrepo):
5152
# writing - due to the compression it will seem faster than it is
5253
st=time()
5354
sha=ldb.to_object('blob',size,stream)
54-
elapsed=time()-st
55+
elapsed_add=time()-st
5556
assertldb.has_object(sha)
56-
fsize_kib=os.path.getsize(ldb.readable_db_object_path(sha))/1000
57+
db_file=ldb.readable_db_object_path(sha)
58+
fsize_kib=os.path.getsize(db_file)/1000
5759

5860

5961
size_kib=size/1000
60-
print>>sys.stderr,"Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"% (size_kib,fsize_kib,desc,elapsed,size_kib/elapsed)
62+
print>>sys.stderr,"Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"% (size_kib,fsize_kib,desc,elapsed_add,size_kib/elapsed_add)
6163

6264
# reading all at once
6365
st=time()
6466
type,size,shastream=ldb.object(sha)
6567
shadata=shastream.read()
66-
elapsed=time()-st
68+
elapsed_readall=time()-st
6769

6870
stream.seek(0)
6971
assertshadata==stream.getvalue()
70-
print>>sys.stderr,"Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"% (size_kib,desc,elapsed,size_kib/elapsed)
72+
print>>sys.stderr,"Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"% (size_kib,desc,elapsed_readall,size_kib/elapsed_readall)
7173

7274

7375
# reading in chunks of 1 MiB
@@ -81,11 +83,62 @@ def test_large_data_streaming(self, rwrepo):
8183
iflen(data)<cs:
8284
break
8385
# END read in chunks
84-
elapsed=time()-st
86+
elapsed_readchunks=time()-st
8587

8688
stream.seek(0)
8789
assert''.join(chunks)==stream.getvalue()
8890

8991
cs_kib=cs/1000
90-
print>>sys.stderr,"Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"% (size_kib,desc,cs_kib,elapsed,size_kib/elapsed)
92+
print>>sys.stderr,"Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"% (size_kib,desc,cs_kib,elapsed_readchunks,size_kib/elapsed_readchunks)
93+
94+
# del db file so git has something to do
95+
os.remove(db_file)
96+
97+
# VS. CGIT
98+
##########
99+
# CGIT ! Can using the cgit programs be faster ?
100+
proc=rwrepo.git.hash_object('-w','--stdin',as_process=True,istream=subprocess.PIPE)
101+
102+
# write file - pump everything in at once to be a fast as possible
103+
data=stream.getvalue()# cache it
104+
st=time()
105+
proc.stdin.write(data)
106+
proc.stdin.close()
107+
gitsha=proc.stdout.read().strip()
108+
proc.wait()
109+
gelapsed_add=time()-st
110+
del(data)
111+
assertgitsha==sha# we do it the same way, right ?
112+
113+
# as its the same sha, we reuse our path
114+
fsize_kib=os.path.getsize(db_file)/1000
115+
print>>sys.stderr,"Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"% (size_kib,fsize_kib,desc,gelapsed_add,size_kib/gelapsed_add)
116+
117+
# compare ...
118+
print>>sys.stderr,"Git-Python is %f %% faster than git when adding big %s files"% (100.0- (elapsed_add/gelapsed_add)*100,desc)
119+
120+
121+
# read all
122+
st=time()
123+
s,t,size,data=rwrepo.git.get_object_data(gitsha)
124+
gelapsed_readall=time()-st
125+
print>>sys.stderr,"Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"% (size_kib,desc,gelapsed_readall,size_kib/gelapsed_readall)
126+
127+
# compare
128+
print>>sys.stderr,"Git-Python is %f %% faster than git when reading big %sfiles"% (100.0- (elapsed_readall/gelapsed_readall)*100,desc)
129+
130+
131+
# read chunks
132+
st=time()
133+
s,t,size,stream=rwrepo.git.stream_object_data(gitsha)
134+
whileTrue:
135+
data=stream.read(cs)
136+
iflen(data)<cs:
137+
break
138+
# END read stream
139+
gelapsed_readchunks=time()-st
140+
print>>sys.stderr,"Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"% (size_kib,desc,cs_kib,gelapsed_readchunks,size_kib/gelapsed_readchunks)
141+
142+
# compare
143+
print>>sys.stderr,"Git-Python is %f %% faster than git when reading big %s files in chunks"% (100.0- (elapsed_readchunks/gelapsed_readchunks)*100,desc)
91144
# END for each randomization factor

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2026 Movatter.jp