gitpython-developers/GitPythonPublic

NotificationsYou must be signed in to change notification settings
Fork966
Star5.1k

test_streams.py

Latest commit

History

149 lines (124 loc) · 5.68 KB

test_streams.py

File metadata and controls

149 lines (124 loc) · 5.68 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

"""Performance data streaming performance"""

from __future__importprint_function

importos

importsubprocess

importsys

fromtimeimporttime

fromtest.libimport (

with_rw_repo

)

fromgit.utilimportbin_to_hex

fromgitdbimport (

LooseObjectDB,

IStream

)

fromgitdb.test.libimportmake_memory_file

importos.pathasosp

from .libimport (

TestBigRepoR

)

classTestObjDBPerformance(TestBigRepoR):

large_data_size_bytes=1000*1000*10# some MiB should do it

moderate_data_size_bytes=1000*1000*1# just 1 MiB

@with_rw_repo('HEAD',bare=True)

deftest_large_data_streaming(self,rwrepo):

# TODO: This part overlaps with the same file in gitdb.test.performance.test_stream

# It should be shared if possible

ldb=LooseObjectDB(osp.join(rwrepo.git_dir,'objects'))

forrandomizeinrange(2):

desc= (randomizeand'random ')or''

print("Creating %s data ..."%desc,file=sys.stderr)

st=time()

size,stream=make_memory_file(self.large_data_size_bytes,randomize)

elapsed=time()-st

print("Done (in %f s)"%elapsed,file=sys.stderr)

# writing - due to the compression it will seem faster than it is

st=time()

binsha=ldb.store(IStream('blob',size,stream)).binsha

elapsed_add=time()-st

assertldb.has_object(binsha)

db_file=ldb.readable_db_object_path(bin_to_hex(binsha))

fsize_kib=osp.getsize(db_file)/1000

size_kib=size/1000

msg="Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"

msg%= (size_kib,fsize_kib,desc,elapsed_add,size_kib/elapsed_add)

print(msg,file=sys.stderr)

# reading all at once

st=time()

ostream=ldb.stream(binsha)

shadata=ostream.read()

elapsed_readall=time()-st

stream.seek(0)

assertshadata==stream.getvalue()

msg="Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"

msg%= (size_kib,desc,elapsed_readall,size_kib/elapsed_readall)

print(msg,file=sys.stderr)

# reading in chunks of 1 MiB

cs=512*1000

chunks= []

st=time()

ostream=ldb.stream(binsha)

whileTrue:

data=ostream.read(cs)

chunks.append(data)

iflen(data)<cs:

break

# END read in chunks

elapsed_readchunks=time()-st

stream.seek(0)

assertb''.join(chunks)==stream.getvalue()

cs_kib=cs/1000

print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"

% (size_kib,desc,cs_kib,elapsed_readchunks,size_kib/elapsed_readchunks),file=sys.stderr)

# del db file so git has something to do

ostream=None

importgc

gc.collect()

os.remove(db_file)

# VS. CGIT

##########

# CGIT ! Can using the cgit programs be faster ?

proc=rwrepo.git.hash_object('-w','--stdin',as_process=True,istream=subprocess.PIPE)

# write file - pump everything in at once to be a fast as possible

data=stream.getvalue()# cache it

st=time()

proc.stdin.write(data)

proc.stdin.close()

gitsha=proc.stdout.read().strip()

proc.wait()

gelapsed_add=time()-st

del(data)

assertgitsha==bin_to_hex(binsha)# we do it the same way, right ?

# as its the same sha, we reuse our path

fsize_kib=osp.getsize(db_file)/1000

msg="Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"

msg%= (size_kib,fsize_kib,desc,gelapsed_add,size_kib/gelapsed_add)

print(msg,file=sys.stderr)

# compare ...

print("Git-Python is %f %% faster than git when adding big %s files"

% (100.0- (elapsed_add/gelapsed_add)*100,desc),file=sys.stderr)

# read all

st=time()

_hexsha,_typename,size,data=rwrepo.git.get_object_data(gitsha)

gelapsed_readall=time()-st

print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"

% (size_kib,desc,gelapsed_readall,size_kib/gelapsed_readall),file=sys.stderr)

# compare

print("Git-Python is %f %% faster than git when reading big %sfiles"

% (100.0- (elapsed_readall/gelapsed_readall)*100,desc),file=sys.stderr)

# read chunks

st=time()

_hexsha,_typename,size,stream=rwrepo.git.stream_object_data(gitsha)

whileTrue:

data=stream.read(cs)

iflen(data)<cs:

break

# END read stream

gelapsed_readchunks=time()-st

msg="Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"

msg%= (size_kib,desc,cs_kib,gelapsed_readchunks,size_kib/gelapsed_readchunks)

print(msg,file=sys.stderr)

# compare

print("Git-Python is %f %% faster than git when reading big %s files in chunks"

% (100.0- (elapsed_readchunks/gelapsed_readchunks)*100,desc),file=sys.stderr)

# END for each randomization factor

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

test_streams.py

Latest commit

History

test_streams.py

File metadata and controls

Movatterモバイル変換

Uh oh!

FilesExpand file tree

test_streams.py

Latest commit

History

test_streams.py

File metadata and controls