Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitac62760

Browse files
committed
index.add: now uses gitdb.store functionality instead of git-hash-file. The python version is about as fast, but could support multithreading using async
1 parentf164627 commitac62760

File tree

5 files changed

+678
-647
lines changed

5 files changed

+678
-647
lines changed

‎lib/git/index/base.py

Lines changed: 46 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
importstat
1313
importsubprocess
1414
importglob
15+
fromcStringIOimportStringIO
1516

1617
fromtypimport*
1718
fromutilimport (
@@ -48,6 +49,10 @@
4849
)
4950

5051

52+
fromgitdb.baseimport (
53+
IStream
54+
)
55+
5156
__all__= ('IndexFile','CheckoutError' )
5257

5358

@@ -255,9 +260,6 @@ def write(self, file_path = None, ignore_tree_extension_data=False):
255260
256261
Returns
257262
self
258-
259-
Note
260-
Index writing based on the dulwich implementation
261263
"""
262264
lfd=LockedFD(file_pathorself._file_path)
263265
stream=lfd.open(write=True,stream=True)
@@ -634,12 +636,10 @@ def _preprocess_add_items(self, items):
634636
# END for each item
635637
return (paths,entries)
636638

637-
638639
@clear_cache
639640
@default_index
640641
defadd(self,items,force=True,fprogress=lambda*args:None,path_rewriter=None):
641-
"""
642-
Add files from the working tree, specific blobs or BaseIndexEntries
642+
"""Add files from the working tree, specific blobs or BaseIndexEntries
643643
to the index. The underlying index file will be written immediately, hence
644644
you should provide as many items as possible to minimize the amounts of writes
645645
@@ -695,7 +695,7 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
695695
696696
:param fprogress:
697697
Function with signature f(path, done=False, item=item) called for each
698-
path to be added,once once it is about to be added where done==False
698+
path to be added,one time once it is about to be added where done==False
699699
and once after it was added where done=True.
700700
item is set to the actual item we handle, either a Path or a BaseIndexEntry
701701
Please note that the processed path is not guaranteed to be present
@@ -713,8 +713,8 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
713713
:return:
714714
List(BaseIndexEntries) representing the entries just actually added.
715715
716-
Raises
717-
GitCommandErrorif a supplied Path did not exist. Please note that BaseIndexEntry
716+
:raise OSError:
717+
if a supplied Path did not exist. Please note that BaseIndexEntry
718718
Objects that do not have a null sha will be added even if their paths
719719
do not exist.
720720
"""
@@ -734,28 +734,45 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
734734
del(paths[:])
735735
# END rewrite paths
736736

737+
738+
defstore_path(filepath):
739+
"""Store file at filepath in the database and return the base index entry"""
740+
st=os.lstat(filepath)# handles non-symlinks as well
741+
stream=None
742+
ifstat.S_ISLNK(st.st_mode):
743+
stream=StringIO(os.readlink(filepath))
744+
else:
745+
stream=open(filepath,'rb')
746+
# END handle stream
747+
fprogress(filepath,False,filepath)
748+
istream=self.repo.odb.store(IStream(Blob.type,st.st_size,stream))
749+
fprogress(filepath,True,filepath)
750+
751+
returnBaseIndexEntry((st.st_mode,istream.sha,0,filepath))
752+
# END utility method
753+
754+
737755
# HANDLE PATHS
738756
ifpaths:
739-
# to get suitable progress information, pipe paths to stdin
740-
args= ("--add","--replace","--verbose","--stdin")
741-
proc=self.repo.git.update_index(*args,**{'as_process':True,'istream':subprocess.PIPE})
742-
make_exc=lambda :GitCommandError(("git-update-index",)+args,128,proc.stderr.read())
757+
assertlen(entries_added)==0
743758
added_files=list()
744-
745759
forfilepathinself._iter_expand_paths(paths):
746-
self._write_path_to_stdin(proc,filepath,filepath,make_exc,
747-
fprogress,read_from_stdout=False)
748-
added_files.append(filepath)
760+
entries_added.append(store_path(filepath))
749761
# END for each filepath
750-
self._flush_stdin_and_wait(proc,ignore_stdout=True)# ignore stdout
762+
763+
# add the new entries to this instance, and write it
764+
forentryinentries_added:
765+
self.entries[(entry.path,0)]=IndexEntry.from_base(entry)
751766

752-
# force rereading our entries once it is all done
753-
self._delete_entries_cache()
754-
entries_added.extend(self.entries[(f,0)]forfinadded_files)
767+
# finally write the changed index
768+
self.write()
755769
# END path handling
756770

771+
757772
# HANDLE ENTRIES
758773
ifentries:
774+
# TODO: Add proper IndexEntries to ourselves, and write the index
775+
# just once. Currently its done twice at least
759776
null_mode_entries= [eforeinentriesife.mode==0 ]
760777
ifnull_mode_entries:
761778
raiseValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity")
@@ -765,37 +782,22 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
765782
# create objects if required, otherwise go with the existing shas
766783
null_entries_indices= [ifori,einenumerate(entries)ife.sha==Object.NULL_HEX_SHA ]
767784
ifnull_entries_indices:
768-
# creating object ids is the time consuming part. Hence we will
769-
# send progress for these now.
770-
args= ("-w","--stdin-paths")
771-
proc=self.repo.git.hash_object(*args,**{'istream':subprocess.PIPE,'as_process':True})
772-
make_exc=lambda :GitCommandError(("git-hash-object",)+args,128,proc.stderr.read())
773-
obj_ids=list()
774785
foreiinnull_entries_indices:
775-
entry=entries[ei]
776-
obj_ids.append(self._write_path_to_stdin(proc,entry.path,entry,
777-
make_exc,fprogress,read_from_stdout=True))
786+
null_entry=entries[ei]
787+
new_entry=store_path(null_entry.path)
788+
789+
# update null entry
790+
entries[ei]=BaseIndexEntry((null_entry.mode,new_entry.sha,null_entry.stage,null_entry.path))
778791
# END for each entry index
779-
assertlen(obj_ids)==len(null_entries_indices),"git-hash-object did not produce all requested objects: want %i, got %i"% (len(null_entries_indices),len(obj_ids) )
780-
781-
# update IndexEntries with new object id
782-
fori,new_shainzip(null_entries_indices,obj_ids):
783-
e=entries[i]
784-
785-
new_entry=BaseIndexEntry((e.mode,new_sha,e.stage,e.path))
786-
entries[i]=new_entry
787-
# END for each index
788792
# END null_entry handling
789793

790794
# REWRITE PATHS
791795
# If we have to rewrite the entries, do so now, after we have generated
792796
# all object sha's
793797
ifpath_rewriter:
794-
new_entries=list()
795-
foreinentries:
796-
new_entries.append(BaseIndexEntry((e.mode,e.sha,e.stage,path_rewriter(e))))
798+
fori,einenumerate(entries):
799+
entries[i]=BaseIndexEntry((e.mode,e.sha,e.stage,path_rewriter(e)))
797800
# END for each entry
798-
entries=new_entries
799801
# END handle path rewriting
800802

801803
# feed pure entries to stdin
@@ -821,7 +823,7 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
821823
self._flush_stdin_and_wait(proc,ignore_stdout=True)
822824
entries_added.extend(entries)
823825
# END if there are base entries
824-
826+
825827
returnentries_added
826828

827829
def_items_to_rela_paths(self,items):

‎lib/git/repo.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -723,16 +723,17 @@ def init(cls, path=None, mkdir=True, **kwargs):
723723
returnRepo(path)
724724

725725
defclone(self,path,**kwargs):
726-
"""
727-
Create a clone from this repository.
728-
729-
``path``
726+
"""Create a clone from this repository.
727+
:param path:
730728
is the full path of the new repo (traditionally ends with ./<name>.git).
731729
732-
``kwargs``
733-
keyword arguments to be given to the git-clone command
734-
735-
Returns
730+
:param kwargs:
731+
odbt = ObjectDatabase Type, allowing to determine the object database
732+
implementation used by the returned Repo instance
733+
734+
All remaining keyword arguments are given to the git-clone command
735+
736+
:return:
736737
``git.Repo`` (the newly cloned repo)
737738
"""
738739
# special handling for windows for path at which the clone should be
@@ -741,6 +742,7 @@ def clone(self, path, **kwargs):
741742
# we at least give a proper error instead of letting git fail
742743
prev_cwd=None
743744
prev_path=None
745+
odbt=kwargs.pop('odbt',GitCmdObjectDB)
744746
ifos.name=='nt':
745747
if'~'inpath:
746748
raiseOSError("Git cannot handle the ~ character in path %r correctly"%path)
@@ -767,7 +769,7 @@ def clone(self, path, **kwargs):
767769
path=prev_path
768770
# END reset previous working dir
769771
# END bad windows handling
770-
returnRepo(path)
772+
returnRepo(path,odbt=odbt)
771773

772774

773775
defarchive(self,ostream,treeish=None,prefix=None,**kwargs):

‎test/git/performance/lib.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
importshutil
55
importtempfile
66

7+
fromgit.dbimport (
8+
GitCmdObjectDB,
9+
GitDB
10+
)
11+
712
fromgitimport (
813
Repo
914
)
@@ -31,9 +36,14 @@ class TestBigRepoR(TestBase):
3136
"""TestCase providing access to readonly 'big' repositories using the following
3237
member variables:
3338
34-
*gitrepo
39+
*gitrorepo
3540
36-
* Read-Only git repository - actually the repo of git itself"""
41+
* Read-Only git repository - actually the repo of git itself
42+
43+
* puregitrorepo
44+
45+
* As gitrepo, but uses pure python implementation
46+
"""
3747

3848
#{ Invariants
3949
head_sha_2k='235d521da60e4699e5bd59ac658b5b48bd76ddca'
@@ -43,20 +53,23 @@ class TestBigRepoR(TestBase):
4353
@classmethod
4454
defsetUpAll(cls):
4555
super(TestBigRepoR,cls).setUpAll()
46-
cls.gitrorepo=Repo(resolve_or_fail(k_env_git_repo))
56+
repo_path=resolve_or_fail(k_env_git_repo)
57+
cls.gitrorepo=Repo(repo_path,odbt=GitCmdObjectDB)
58+
cls.puregitrorepo=Repo(repo_path,odbt=GitDB)
4759

4860

4961
classTestBigRepoRW(TestBigRepoR):
5062
"""As above, but provides a big repository that we can write to.
5163
52-
Provides ``self.gitrwrepo``"""
64+
Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``"""
5365

5466
@classmethod
5567
defsetUpAll(cls):
5668
super(TestBigRepoRW,cls).setUpAll()
5769
dirname=tempfile.mktemp()
5870
os.mkdir(dirname)
59-
cls.gitrwrepo=cls.gitrorepo.clone(dirname,shared=True,bare=True)
71+
cls.gitrwrepo=cls.gitrorepo.clone(dirname,shared=True,bare=True,odbt=GitCmdObjectDB)
72+
cls.puregitrwrepo=Repo(dirname,odbt=GitDB)
6073

6174
@classmethod
6275
deftearDownAll(cls):

‎test/git/performance/test_odb.py

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,50 +12,58 @@
1212
classTestObjDBPerformance(TestBigRepoR):
1313

1414
deftest_random_access(self):
15-
16-
# GET COMMITS
17-
# TODO: use the actual db for this
18-
st=time()
19-
root_commit=self.gitrorepo.commit(self.head_sha_2k)
20-
commits=list(root_commit.traverse())
21-
nc=len(commits)
22-
elapsed=time()-st
23-
24-
print>>sys.stderr,"Retrieved %i commits from ObjectStore in %g s ( %f commits / s )"% (nc,elapsed,nc/elapsed)
15+
results= [ ["Iterate Commits"], ["Iterate Blobs"], ["Retrieve Blob Data"] ]
16+
forrepoin (self.gitrorepo,self.puregitrorepo):
17+
# GET COMMITS
18+
st=time()
19+
root_commit=repo.commit(self.head_sha_2k)
20+
commits=list(root_commit.traverse())
21+
nc=len(commits)
22+
elapsed=time()-st
2523

24+
print>>sys.stderr,"%s: Retrieved %i commits from ObjectStore in %g s ( %f commits / s )"% (type(repo.odb),nc,elapsed,nc/elapsed)
25+
results[0].append(elapsed)
26+
27+
# GET TREES
28+
# walk all trees of all commits
29+
st=time()
30+
blobs_per_commit=list()
31+
nt=0
32+
forcommitincommits:
33+
tree=commit.tree
34+
blobs=list()
35+
foritemintree.traverse():
36+
nt+=1
37+
ifitem.type=='blob':
38+
blobs.append(item)
39+
# direct access for speed
40+
# END while trees are there for walking
41+
blobs_per_commit.append(blobs)
42+
# END for each commit
43+
elapsed=time()-st
2644

27-
# GET TREES
28-
# walk all trees of all commits
29-
st=time()
30-
blobs_per_commit=list()
31-
nt=0
32-
forcommitincommits:
33-
tree=commit.tree
34-
blobs=list()
35-
foritemintree.traverse():
36-
nt+=1
37-
ifitem.type=='blob':
38-
blobs.append(item)
39-
# direct access for speed
40-
# END while trees are there for walking
41-
blobs_per_commit.append(blobs)
42-
# END for each commit
43-
elapsed=time()-st
44-
45-
print>>sys.stderr,"Retrieved %i objects from %i commits in %g s ( %f objects / s )"% (nt,len(commits),elapsed,nt/elapsed)
46-
47-
# GET BLOBS
48-
st=time()
49-
nb=0
50-
too_many=15000
51-
forblob_listinblobs_per_commit:
52-
forblobinblob_list:
53-
blob.data
54-
# END for each blobsha
55-
nb+=len(blob_list)
56-
ifnb>too_many:
57-
break
58-
# END for each bloblist
59-
elapsed=time()-st
45+
print>>sys.stderr,"%s: Retrieved %i objects from %i commits in %g s ( %f objects / s )"% (type(repo.odb),nt,len(commits),elapsed,nt/elapsed)
46+
results[1].append(elapsed)
47+
48+
# GET BLOBS
49+
st=time()
50+
nb=0
51+
too_many=15000
52+
forblob_listinblobs_per_commit:
53+
forblobinblob_list:
54+
blob.data
55+
# END for each blobsha
56+
nb+=len(blob_list)
57+
ifnb>too_many:
58+
break
59+
# END for each bloblist
60+
elapsed=time()-st
61+
62+
print>>sys.stderr,"%s: Retrieved %i blob and their data in %g s ( %f blobs / s )"% (type(repo.odb),nb,elapsed,nb/elapsed)
63+
results[2].append(elapsed)
64+
# END for each repo type
6065

61-
print>>sys.stderr,"Retrieved %i blob and their data in %g s ( %f blobs / s )"% (nb,elapsed,nb/elapsed)
66+
# final results
67+
fortest_name,a,binresults:
68+
print>>sys.stderr,"%s: %f s vs %f s, pure is %f times slower"% (test_name,a,b,b/a)
69+
# END for each result

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp