1- """Performance data streaming performance"""
1+ """Performance tests for data streaming."""
2+
23import os
34import subprocess
45import sys
1516
1617
1718class TestObjDBPerformance (TestBigRepoR ):
18- large_data_size_bytes = 1000 * 1000 * 10 #some MiB should do it
19- moderate_data_size_bytes = 1000 * 1000 * 1 #just 1 MiB
19+ large_data_size_bytes = 1000 * 1000 * 10 #Some MiB should do it.
20+ moderate_data_size_bytes = 1000 * 1000 * 1 #Just 1 MiB.
2021
2122@with_rw_repo ("HEAD" ,bare = True )
2223def test_large_data_streaming (self ,rwrepo ):
23- # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
24- # It should be shared if possible
24+ # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream.
25+ # It should be shared if possible.
2526ldb = LooseObjectDB (osp .join (rwrepo .git_dir ,"objects" ))
2627
2728for randomize in range (2 ):
@@ -32,7 +33,7 @@ def test_large_data_streaming(self, rwrepo):
3233elapsed = time ()- st
3334print ("Done (in %f s)" % elapsed ,file = sys .stderr )
3435
35- #writing - due to the compression it will seem faster than it is
36+ #Writing - due to the compression it will seem faster than it is.
3637st = time ()
3738binsha = ldb .store (IStream ("blob" ,size ,stream )).binsha
3839elapsed_add = time ()- st
@@ -45,7 +46,7 @@ def test_large_data_streaming(self, rwrepo):
4546msg %= (size_kib ,fsize_kib ,desc ,elapsed_add ,size_kib / elapsed_add )
4647print (msg ,file = sys .stderr )
4748
48- #reading all at once
49+ #Reading all at once.
4950st = time ()
5051ostream = ldb .stream (binsha )
5152shadata = ostream .read ()
@@ -57,7 +58,7 @@ def test_large_data_streaming(self, rwrepo):
5758msg %= (size_kib ,desc ,elapsed_readall ,size_kib / elapsed_readall )
5859print (msg ,file = sys .stderr )
5960
60- #reading in chunks of 1 MiB
61+ #Reading in chunks of 1 MiB.
6162cs = 512 * 1000
6263chunks = []
6364st = time ()
@@ -86,7 +87,7 @@ def test_large_data_streaming(self, rwrepo):
8687file = sys .stderr ,
8788 )
8889
89- # del db file so git has something to do
90+ # del db file so git has something to do.
9091ostream = None
9192import gc
9293
@@ -95,34 +96,34 @@ def test_large_data_streaming(self, rwrepo):
9596
9697# VS. CGIT
9798##########
98- # CGIT ! Can using the cgit programs be faster ?
99+ # CGIT! Can using the cgit programs be faster?
99100proc = rwrepo .git .hash_object ("-w" ,"--stdin" ,as_process = True ,istream = subprocess .PIPE )
100101
101- #write file - pump everything in at once to be a fast as possible
102- data = stream .getvalue ()#cache it
102+ #Write file - pump everything in at once to be a fast as possible.
103+ data = stream .getvalue ()#Cache it.
103104st = time ()
104105proc .stdin .write (data )
105106proc .stdin .close ()
106107gitsha = proc .stdout .read ().strip ()
107108proc .wait ()
108109gelapsed_add = time ()- st
109110del data
110- assert gitsha == bin_to_hex (binsha )#we do it the same way, right ?
111+ assert gitsha == bin_to_hex (binsha )#We do it the same way, right?
111112
112- # as its the same sha, we reuse our path
113+ #As it's the same sha, we reuse our path.
113114fsize_kib = osp .getsize (db_file )/ 1000
114115msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
115116msg %= (size_kib ,fsize_kib ,desc ,gelapsed_add ,size_kib / gelapsed_add )
116117print (msg ,file = sys .stderr )
117118
118- #compare .. .
119+ #Compare .
119120print (
120121"Git-Python is %f %% faster than git when adding big %s files"
121122% (100.0 - (elapsed_add / gelapsed_add )* 100 ,desc ),
122123file = sys .stderr ,
123124 )
124125
125- #read all
126+ #Read all.
126127st = time ()
127128_hexsha ,_typename ,size ,data = rwrepo .git .get_object_data (gitsha )
128129gelapsed_readall = time ()- st
@@ -132,14 +133,14 @@ def test_large_data_streaming(self, rwrepo):
132133file = sys .stderr ,
133134 )
134135
135- #compare
136+ #Compare.
136137print (
137138"Git-Python is %f %% faster than git when reading big %sfiles"
138139% (100.0 - (elapsed_readall / gelapsed_readall )* 100 ,desc ),
139140file = sys .stderr ,
140141 )
141142
142- #read chunks
143+ #Read chunks.
143144st = time ()
144145_hexsha ,_typename ,size ,stream = rwrepo .git .stream_object_data (gitsha )
145146while True :
@@ -158,7 +159,7 @@ def test_large_data_streaming(self, rwrepo):
158159 )
159160print (msg ,file = sys .stderr )
160161
161- #compare
162+ #Compare.
162163print (
163164"Git-Python is %f %% faster than git when reading big %s files in chunks"
164165% (100.0 - (elapsed_readchunks / gelapsed_readchunks )* 100 ,desc ),