Commitf0c05ea

committed

util: pick the type of memory manager based on the python version, to have optimal results in all cases (at least the ones I can test)

pack: now works properly with a sliding memory managertest_packedodb_pure: fixed very memory hungry implementation by using an iterator. This will of course reduce the measured performance a bit, but 750MB of memory is just a little bit too much for an ordinary test. Maybe it would be alright to just reduce the number of items ... but performance isn't a strength of python after all

1 parent21499d9 commitf0c05eaCopy full SHA for f0c05ea

File tree

3 files changed

+28

-18

lines changed

git
- pack.py
- test/performance/db
  - test_packedodb_pure.py
- util.py

3 files changed

+28

-18

lines changed

`‎git/pack.py`

Lines changed: 19 additions & 13 deletions

Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,7 @@`
`73`	`73`
`74`	`74`	`#{ Utilities`
`75`	`75`
`76`		`-defpack_object_at(data,offset,as_stream):`
	`76`	`+defpack_object_at(cursor,offset,as_stream):`
`77`	`77`	`"""`
`78`	`78`	`:return: Tuple(abs_data_offset, PackInfo\|PackStream)`
`79`	`79`	`an object of the correct type according to the type_id of the object.`
`@@ -83,7 +83,7 @@ def pack_object_at(data, offset, as_stream):`
`83`	`83`	`:parma offset: offset in to the data at which the object information is located`
`84`	`84`	`:param as_stream: if True, a stream object will be returned that can read`
`85`	`85`	`the data, otherwise you receive an info object only"""`
`86`		`-data=buffer(data,offset)`
	`86`	`+data=cursor.use_region(offset).buffer()`
`87`	`87`	`type_id,uncomp_size,data_rela_offset=pack_object_header_info(data)`
`88`	`88`	`total_rela_offset=None# set later, actual offset until data stream begins`
`89`	`89`	`delta_info=None`
`@@ -269,6 +269,10 @@ def _set_cache_(self, attr):`
`269`	`269`	`# that we can actually write to the location - it could be a read-only`
`270`	`270`	`# alternate for instance`
`271`	`271`	`self._cursor=mman.make_cursor(self._indexpath).use_region()`
	`272`	`+# We will assume that the index will always fully fit into memory !`
	`273`	`+ifmman.window_size()>0andself._cursor.file_size()>mman.window_size():`
	`274`	`+raiseAssertionError("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation"% (self._indexpath,self._cursor.file_size(),mman.window_size()))`
	`275`	`+#END assert window size`
`272`	`276`	`else:`
`273`	`277`	`# now its time to initialize everything - if we are here, someone wants`
`274`	`278`	`# to access the fanout table or related properties`
`@@ -527,13 +531,13 @@ def _set_cache_(self, attr):`
`527`	`531`
`528`	`532`	`def_iter_objects(self,start_offset,as_stream=True):`
`529`	`533`	`"""Handle the actual iteration of objects within this pack"""`
`530`		`-data=self._cursor.map()`
`531`		`-content_size=len(data)-self.footer_size`
	`534`	`+c=self._cursor`
	`535`	`+content_size=c.file_size()-self.footer_size`
`532`	`536`	`cur_offset=start_offsetorself.first_object_offset`
`533`	`537`
`534`	`538`	`null=NullStream()`
`535`	`539`	`whilecur_offset<content_size:`
`536`		`-data_offset,ostream=pack_object_at(data,cur_offset,True)`
	`540`	`+data_offset,ostream=pack_object_at(c,cur_offset,True)`
`537`	`541`	`# scrub the stream to the end - this decompresses the object, but yields`
`538`	`542`	`# the amount of compressed bytes we need to get to the next offset`
`539`	`543`
`@@ -562,12 +566,14 @@ def version(self):`
`562`	`566`	`defdata(self):`
`563`	`567`	`"""`
`564`	`568`	`:return: read-only data of this pack. It provides random access and usually`
`565`		`-is a memory map"""`
`566`		`-returnself._cursor.map()`
	`569`	`+is a memory map.`
	`570`	`+:note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""`
	`571`	`+# can use map as we are starting at offset 0. Otherwise we would have to use buffer()`
	`572`	`+returnself._cursor.use_region().map()`
`567`	`573`
`568`	`574`	`defchecksum(self):`
`569`	`575`	`""":return: 20 byte sha1 hash on all object sha's contained in this file"""`
`570`		`-returnself._cursor.map()[-20:]`
	`576`	`+returnself._cursor.use_region(self._cursor.file_size()-20).buffer()[:]`
`571`	`577`
`572`	`578`	`defpath(self):`
`573`	`579`	`""":return: path to the packfile"""`
`@@ -586,9 +592,9 @@ def collect_streams(self, offset):`
`586`	`592`	`If the object at offset is no delta, the size of the list is 1.`
`587`	`593`	`:param offset: specifies the first byte of the object within this pack"""`
`588`	`594`	`out=list()`
`589`		`-data=self._cursor.map()`
	`595`	`+c=self._cursor`
`590`	`596`	`whileTrue:`
`591`		`-ostream=pack_object_at(data,offset,True)[1]`
	`597`	`+ostream=pack_object_at(c,offset,True)[1]`
`592`	`598`	`out.append(ostream)`
`593`	`599`	`ifostream.type_id==OFS_DELTA:`
`594`	`600`	`offset=ostream.pack_offset-ostream.delta_info`
`@@ -610,14 +616,14 @@ def info(self, offset):`
`610`	`616`
`611`	`617`	`:param offset: byte offset`
`612`	`618`	`:return: OPackInfo instance, the actual type differs depending on the type_id attribute"""`
`613`		`-returnpack_object_at(self._cursor.map(),offsetorself.first_object_offset,False)[1]`
	`619`	`+returnpack_object_at(self._cursor,offsetorself.first_object_offset,False)[1]`
`614`	`620`
`615`	`621`	`defstream(self,offset):`
`616`	`622`	`"""Retrieve an object at the given file-relative offset as stream along with its information`
`617`	`623`
`618`	`624`	`:param offset: byte offset`
`619`	`625`	`:return: OPackStream instance, the actual type differs depending on the type_id attribute"""`
`620`		`-returnpack_object_at(self._cursor.map(),offsetorself.first_object_offset,True)[1]`
	`626`	`+returnpack_object_at(self._cursor,offsetorself.first_object_offset,True)[1]`
`621`	`627`
`622`	`628`	`defstream_iter(self,start_offset=0):`
`623`	`629`	`"""`
`@@ -700,7 +706,7 @@ def _object(self, sha, as_stream, index=-1):`
`700`	`706`	`sha=self._index.sha(index)`
`701`	`707`	`# END assure sha is present ( in output )`
`702`	`708`	`offset=self._index.offset(index)`
`703`		`-type_id,uncomp_size,data_rela_offset=pack_object_header_info(buffer(self._pack._cursor.map(),offset))`
	`709`	`+type_id,uncomp_size,data_rela_offset=pack_object_header_info(self._pack._cursor.use_region(offset).buffer())`
`704`	`710`	`ifas_stream:`
`705`	`711`	`iftype_idnotindelta_types:`
`706`	`712`	`packstream=self._pack.stream(offset)`

`‎git/test/performance/db/test_packedodb_pure.py`

Lines changed: 3 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -49,18 +49,17 @@ def test_pack_writing(self):`
`49`	`49`	`count=0`
`50`	`50`	`total_size=0`
`51`	`51`	`st=time()`
`52`		`-objs=list()`
`53`	`52`	`forshainrorepo.sha_iter():`
`54`	`53`	`count+=1`
`55`		`-objs.append(rorepo.stream(sha))`
	`54`	`+rorepo.stream(sha)`
`56`	`55`	`ifcount==ni:`
`57`	`56`	`break`
`58`	`57`	`#END gather objects for pack-writing`
`59`	`58`	`elapsed=time()-st`
`60`		`-print>>sys.stderr,"PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )"% (ni,rorepo.__class__.__name__,elapsed,ni/elapsed)`
	`59`	`+print>>sys.stderr,"PDB Streaming: Got %i streams from %s by sha in in %f s ( %f streams/s )"% (count,rorepo.__class__.__name__,elapsed,count/elapsed)`
`61`	`60`
`62`	`61`	`st=time()`
`63`		`-PackEntity.write_pack(objs,ostream.write)`
	`62`	`+PackEntity.write_pack((rorepo.stream(sha)forshainrorepo.sha_iter()),ostream.write,object_count=ni)`
`64`	`63`	`elapsed=time()-st`
`65`	`64`	`total_kb=ostream.bytes_written()/1000`
`66`	`65`	`print>>sys.stderr,"PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)"% (total_kb,elapsed,total_kb/elapsed)`

`‎git/util.py`

Lines changed: 6 additions & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -17,6 +17,7 @@`
`17`	`17`	`importtempfile`
`18`	`18`	`fromsmmapimport (`
`19`	`19`	`StaticWindowMapManager,`
	`20`	`+SlidingWindowMapManager,`
`20`	`21`	`SlidingWindowMapBuffer`
`21`	`22`	`)`
`22`	`23`
`@@ -72,7 +73,11 @@ def unpack_from(fmt, data, offset=0):`
`72`	`73`
`73`	`74`	`# initialize our global memory manager instance`
`74`	`75`	`# Use it to free cached (and unused) resources.`
`75`		`-mman=StaticWindowMapManager()`
	`76`	`+ifsys.version_info[1]<6:`
	`77`	`+mman=StaticWindowMapManager()`
	`78`	`+else:`
	`79`	`+mman=SlidingWindowMapManager()`
	`80`	`+#END handle mman`
`76`	`81`
`77`	`82`	`#} END globals`
`78`	`83`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitf0c05ea

File tree

3 files changed

3 files changed

`‎git/pack.py`

`‎git/test/performance/db/test_packedodb_pure.py`

`‎git/util.py`

0 commit comments