1
- """Performance data streaming performance"""
1
+ """Performance tests for data streaming."""
2
+
2
3
import os
3
4
import subprocess
4
5
import sys
15
16
16
17
17
18
class TestObjDBPerformance (TestBigRepoR ):
18
- large_data_size_bytes = 1000 * 1000 * 10 #some MiB should do it
19
- moderate_data_size_bytes = 1000 * 1000 * 1 #just 1 MiB
19
+ large_data_size_bytes = 1000 * 1000 * 10 #Some MiB should do it.
20
+ moderate_data_size_bytes = 1000 * 1000 * 1 #Just 1 MiB.
20
21
21
22
@with_rw_repo ("HEAD" ,bare = True )
22
23
def test_large_data_streaming (self ,rwrepo ):
23
- # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
24
- # It should be shared if possible
24
+ # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream.
25
+ # It should be shared if possible.
25
26
ldb = LooseObjectDB (osp .join (rwrepo .git_dir ,"objects" ))
26
27
27
28
for randomize in range (2 ):
@@ -32,7 +33,7 @@ def test_large_data_streaming(self, rwrepo):
32
33
elapsed = time ()- st
33
34
print ("Done (in %f s)" % elapsed ,file = sys .stderr )
34
35
35
- #writing - due to the compression it will seem faster than it is
36
+ #Writing - due to the compression it will seem faster than it is.
36
37
st = time ()
37
38
binsha = ldb .store (IStream ("blob" ,size ,stream )).binsha
38
39
elapsed_add = time ()- st
@@ -45,7 +46,7 @@ def test_large_data_streaming(self, rwrepo):
45
46
msg %= (size_kib ,fsize_kib ,desc ,elapsed_add ,size_kib / elapsed_add )
46
47
print (msg ,file = sys .stderr )
47
48
48
- #reading all at once
49
+ #Reading all at once.
49
50
st = time ()
50
51
ostream = ldb .stream (binsha )
51
52
shadata = ostream .read ()
@@ -57,7 +58,7 @@ def test_large_data_streaming(self, rwrepo):
57
58
msg %= (size_kib ,desc ,elapsed_readall ,size_kib / elapsed_readall )
58
59
print (msg ,file = sys .stderr )
59
60
60
- #reading in chunks of 1 MiB
61
+ #Reading in chunks of 1 MiB.
61
62
cs = 512 * 1000
62
63
chunks = []
63
64
st = time ()
@@ -86,7 +87,7 @@ def test_large_data_streaming(self, rwrepo):
86
87
file = sys .stderr ,
87
88
)
88
89
89
- # del db file so git has something to do
90
+ # del db file so git has something to do.
90
91
ostream = None
91
92
import gc
92
93
@@ -95,34 +96,34 @@ def test_large_data_streaming(self, rwrepo):
95
96
96
97
# VS. CGIT
97
98
##########
98
- # CGIT ! Can using the cgit programs be faster ?
99
+ # CGIT! Can using the cgit programs be faster?
99
100
proc = rwrepo .git .hash_object ("-w" ,"--stdin" ,as_process = True ,istream = subprocess .PIPE )
100
101
101
- #write file - pump everything in at once to be a fast as possible
102
- data = stream .getvalue ()#cache it
102
+ #Write file - pump everything in at once to be a fast as possible.
103
+ data = stream .getvalue ()#Cache it.
103
104
st = time ()
104
105
proc .stdin .write (data )
105
106
proc .stdin .close ()
106
107
gitsha = proc .stdout .read ().strip ()
107
108
proc .wait ()
108
109
gelapsed_add = time ()- st
109
110
del data
110
- assert gitsha == bin_to_hex (binsha )#we do it the same way, right ?
111
+ assert gitsha == bin_to_hex (binsha )#We do it the same way, right?
111
112
112
- # as its the same sha, we reuse our path
113
+ #As it's the same sha, we reuse our path.
113
114
fsize_kib = osp .getsize (db_file )/ 1000
114
115
msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
115
116
msg %= (size_kib ,fsize_kib ,desc ,gelapsed_add ,size_kib / gelapsed_add )
116
117
print (msg ,file = sys .stderr )
117
118
118
- #compare .. .
119
+ #Compare .
119
120
print (
120
121
"Git-Python is %f %% faster than git when adding big %s files"
121
122
% (100.0 - (elapsed_add / gelapsed_add )* 100 ,desc ),
122
123
file = sys .stderr ,
123
124
)
124
125
125
- #read all
126
+ #Read all.
126
127
st = time ()
127
128
_hexsha ,_typename ,size ,data = rwrepo .git .get_object_data (gitsha )
128
129
gelapsed_readall = time ()- st
@@ -132,14 +133,14 @@ def test_large_data_streaming(self, rwrepo):
132
133
file = sys .stderr ,
133
134
)
134
135
135
- #compare
136
+ #Compare.
136
137
print (
137
138
"Git-Python is %f %% faster than git when reading big %sfiles"
138
139
% (100.0 - (elapsed_readall / gelapsed_readall )* 100 ,desc ),
139
140
file = sys .stderr ,
140
141
)
141
142
142
- #read chunks
143
+ #Read chunks.
143
144
st = time ()
144
145
_hexsha ,_typename ,size ,stream = rwrepo .git .stream_object_data (gitsha )
145
146
while True :
@@ -158,7 +159,7 @@ def test_large_data_streaming(self, rwrepo):
158
159
)
159
160
print (msg ,file = sys .stderr )
160
161
161
- #compare
162
+ #Compare.
162
163
print (
163
164
"Git-Python is %f %% faster than git when reading big %s files in chunks"
164
165
% (100.0 - (elapsed_readchunks / gelapsed_readchunks )* 100 ,desc ),