Uh oh!
There was an error while loading.Please reload this page.
- Notifications
You must be signed in to change notification settings - Fork966
Expand file tree
/
Copy pathtest_streams.py
More file actions
149 lines (124 loc) · 5.68 KB
/
test_streams.py
File metadata and controls
149 lines (124 loc) · 5.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""Performance data streaming performance"""
from __future__importprint_function
importos
importsubprocess
importsys
fromtimeimporttime
fromtest.libimport (
with_rw_repo
)
fromgit.utilimportbin_to_hex
fromgitdbimport (
LooseObjectDB,
IStream
)
fromgitdb.test.libimportmake_memory_file
importos.pathasosp
from .libimport (
TestBigRepoR
)
classTestObjDBPerformance(TestBigRepoR):
large_data_size_bytes=1000*1000*10# some MiB should do it
moderate_data_size_bytes=1000*1000*1# just 1 MiB
@with_rw_repo('HEAD',bare=True)
deftest_large_data_streaming(self,rwrepo):
# TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
# It should be shared if possible
ldb=LooseObjectDB(osp.join(rwrepo.git_dir,'objects'))
forrandomizeinrange(2):
desc= (randomizeand'random ')or''
print("Creating %s data ..."%desc,file=sys.stderr)
st=time()
size,stream=make_memory_file(self.large_data_size_bytes,randomize)
elapsed=time()-st
print("Done (in %f s)"%elapsed,file=sys.stderr)
# writing - due to the compression it will seem faster than it is
st=time()
binsha=ldb.store(IStream('blob',size,stream)).binsha
elapsed_add=time()-st
assertldb.has_object(binsha)
db_file=ldb.readable_db_object_path(bin_to_hex(binsha))
fsize_kib=osp.getsize(db_file)/1000
size_kib=size/1000
msg="Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"
msg%= (size_kib,fsize_kib,desc,elapsed_add,size_kib/elapsed_add)
print(msg,file=sys.stderr)
# reading all at once
st=time()
ostream=ldb.stream(binsha)
shadata=ostream.read()
elapsed_readall=time()-st
stream.seek(0)
assertshadata==stream.getvalue()
msg="Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"
msg%= (size_kib,desc,elapsed_readall,size_kib/elapsed_readall)
print(msg,file=sys.stderr)
# reading in chunks of 1 MiB
cs=512*1000
chunks= []
st=time()
ostream=ldb.stream(binsha)
whileTrue:
data=ostream.read(cs)
chunks.append(data)
iflen(data)<cs:
break
# END read in chunks
elapsed_readchunks=time()-st
stream.seek(0)
assertb''.join(chunks)==stream.getvalue()
cs_kib=cs/1000
print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"
% (size_kib,desc,cs_kib,elapsed_readchunks,size_kib/elapsed_readchunks),file=sys.stderr)
# del db file so git has something to do
ostream=None
importgc
gc.collect()
os.remove(db_file)
# VS. CGIT
##########
# CGIT ! Can using the cgit programs be faster ?
proc=rwrepo.git.hash_object('-w','--stdin',as_process=True,istream=subprocess.PIPE)
# write file - pump everything in at once to be a fast as possible
data=stream.getvalue()# cache it
st=time()
proc.stdin.write(data)
proc.stdin.close()
gitsha=proc.stdout.read().strip()
proc.wait()
gelapsed_add=time()-st
del(data)
assertgitsha==bin_to_hex(binsha)# we do it the same way, right ?
# as its the same sha, we reuse our path
fsize_kib=osp.getsize(db_file)/1000
msg="Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
msg%= (size_kib,fsize_kib,desc,gelapsed_add,size_kib/gelapsed_add)
print(msg,file=sys.stderr)
# compare ...
print("Git-Python is %f %% faster than git when adding big %s files"
% (100.0- (elapsed_add/gelapsed_add)*100,desc),file=sys.stderr)
# read all
st=time()
_hexsha,_typename,size,data=rwrepo.git.get_object_data(gitsha)
gelapsed_readall=time()-st
print("Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"
% (size_kib,desc,gelapsed_readall,size_kib/gelapsed_readall),file=sys.stderr)
# compare
print("Git-Python is %f %% faster than git when reading big %sfiles"
% (100.0- (elapsed_readall/gelapsed_readall)*100,desc),file=sys.stderr)
# read chunks
st=time()
_hexsha,_typename,size,stream=rwrepo.git.stream_object_data(gitsha)
whileTrue:
data=stream.read(cs)
iflen(data)<cs:
break
# END read stream
gelapsed_readchunks=time()-st
msg="Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"
msg%= (size_kib,desc,cs_kib,gelapsed_readchunks,size_kib/gelapsed_readchunks)
print(msg,file=sys.stderr)
# compare
print("Git-Python is %f %% faster than git when reading big %s files in chunks"
% (100.0- (elapsed_readchunks/gelapsed_readchunks)*100,desc),file=sys.stderr)
# END for each randomization factor