Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit21499d9

Browse files
committed
util: added global sliding memory manager
pack: now using the global sliding memory manager. The current implementation uses assumes that packs are small enough to fit into memory right away, where the window size will be about 1 GB, as it never calls use_window() to assure the required offset actually exists. It will need to change to set the window appropriately.
1 parentd37ef77 commit21499d9

File tree

2 files changed

+46
-40
lines changed

2 files changed

+46
-40
lines changed

‎git/pack.py

Lines changed: 36 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
)
1111
fromutilimport (
1212
zlib,
13+
mman,
1314
LazyMixin,
1415
unpack_from,
1516
bin_to_hex,
16-
file_contents_ro_filepath,
1717
)
1818

1919
fromfunimport (
@@ -247,7 +247,7 @@ class PackIndexFile(LazyMixin):
247247

248248
# Dont use slots as we dynamically bind functions for each version, need a dict for this
249249
# The slots you see here are just to keep track of our instance variables
250-
# __slots__ = ('_indexpath', '_fanout_table', '_data', '_version',
250+
# __slots__ = ('_indexpath', '_fanout_table', '_cursor', '_version',
251251
#'_sha_list_offset', '_crc_list_offset', '_pack_offset', '_pack_64_offset')
252252

253253
# used in v2 indices
@@ -261,22 +261,23 @@ def __init__(self, indexpath):
261261

262262
def_set_cache_(self,attr):
263263
ifattr=="_packfile_checksum":
264-
self._packfile_checksum=self._data[-40:-20]
264+
self._packfile_checksum=self._cursor.map()[-40:-20]
265265
elifattr=="_packfile_checksum":
266-
self._packfile_checksum=self._data[-20:]
267-
elifattr=="_data":
266+
self._packfile_checksum=self._cursor.map()[-20:]
267+
elifattr=="_cursor":
268268
# Note: We don't lock the file when reading as we cannot be sure
269269
# that we can actually write to the location - it could be a read-only
270270
# alternate for instance
271-
self._data=file_contents_ro_filepath(self._indexpath)
271+
self._cursor=mman.make_cursor(self._indexpath).use_region()
272272
else:
273273
# now its time to initialize everything - if we are here, someone wants
274274
# to access the fanout table or related properties
275275

276276
# CHECK VERSION
277-
self._version= (self._data[:4]==self.index_v2_signatureand2)or1
277+
mmap=self._cursor.map()
278+
self._version= (mmap[:4]==self.index_v2_signatureand2)or1
278279
ifself._version==2:
279-
version_id=unpack_from(">L",self._data,4)[0]
280+
version_id=unpack_from(">L",mmap,4)[0]
280281
assertversion_id==self._version,"Unsupported index version: %i"%version_id
281282
# END assert version
282283

@@ -297,16 +298,16 @@ def _set_cache_(self, attr):
297298

298299
def_entry_v1(self,i):
299300
""":return: tuple(offset, binsha, 0)"""
300-
returnunpack_from(">L20s",self._data,1024+i*24)+ (0, )
301+
returnunpack_from(">L20s",self._cursor.map(),1024+i*24)+ (0, )
301302

302303
def_offset_v1(self,i):
303304
"""see ``_offset_v2``"""
304-
returnunpack_from(">L",self._data,1024+i*24)[0]
305+
returnunpack_from(">L",self._cursor.map(),1024+i*24)[0]
305306

306307
def_sha_v1(self,i):
307308
"""see ``_sha_v2``"""
308309
base=1024+ (i*24)+4
309-
returnself._data[base:base+20]
310+
returnself._cursor.map()[base:base+20]
310311

311312
def_crc_v1(self,i):
312313
"""unsupported"""
@@ -322,25 +323,25 @@ def _entry_v2(self, i):
322323
def_offset_v2(self,i):
323324
""":return: 32 or 64 byte offset into pack files. 64 byte offsets will only
324325
be returned if the pack is larger than 4 GiB, or 2^32"""
325-
offset=unpack_from(">L",self._data,self._pack_offset+i*4)[0]
326+
offset=unpack_from(">L",self._cursor.map(),self._pack_offset+i*4)[0]
326327

327328
# if the high-bit is set, this indicates that we have to lookup the offset
328329
# in the 64 bit region of the file. The current offset ( lower 31 bits )
329330
# are the index into it
330331
ifoffset&0x80000000:
331-
offset=unpack_from(">Q",self._data,self._pack_64_offset+ (offset&~0x80000000)*8)[0]
332+
offset=unpack_from(">Q",self._cursor.map(),self._pack_64_offset+ (offset&~0x80000000)*8)[0]
332333
# END handle 64 bit offset
333334

334335
returnoffset
335336

336337
def_sha_v2(self,i):
337338
""":return: sha at the given index of this file index instance"""
338339
base=self._sha_list_offset+i*20
339-
returnself._data[base:base+20]
340+
returnself._cursor.map()[base:base+20]
340341

341342
def_crc_v2(self,i):
342343
""":return: 4 bytes crc for the object at index i"""
343-
returnunpack_from(">L",self._data,self._crc_list_offset+i*4)[0]
344+
returnunpack_from(">L",self._cursor.map(),self._crc_list_offset+i*4)[0]
344345

345346
#} END access V2
346347

@@ -358,7 +359,7 @@ def _initialize(self):
358359

359360
def_read_fanout(self,byte_offset):
360361
"""Generate a fanout table from our data"""
361-
d=self._data
362+
d=self._cursor.map()
362363
out=list()
363364
append=out.append
364365
foriinrange(256):
@@ -382,19 +383,19 @@ def path(self):
382383

383384
defpackfile_checksum(self):
384385
""":return: 20 byte sha representing the sha1 hash of the pack file"""
385-
returnself._data[-40:-20]
386+
returnself._cursor.map()[-40:-20]
386387

387388
defindexfile_checksum(self):
388389
""":return: 20 byte sha representing the sha1 hash of this index file"""
389-
returnself._data[-20:]
390+
returnself._cursor.map()[-20:]
390391

391392
defoffsets(self):
392393
""":return: sequence of all offsets in the order in which they were written
393394
:note: return value can be random accessed, but may be immmutable"""
394395
ifself._version==2:
395396
# read stream to array, convert to tuple
396397
a=array.array('I')# 4 byte unsigned int, long are 8 byte on 64 bit it appears
397-
a.fromstring(buffer(self._data,self._pack_offset,self._pack_64_offset-self._pack_offset))
398+
a.fromstring(buffer(self._cursor.map(),self._pack_offset,self._pack_64_offset-self._pack_offset))
398399

399400
# networkbyteorder to something array likes more
400401
ifsys.byteorder=='little':
@@ -501,7 +502,7 @@ class PackFile(LazyMixin):
501502
for some reason - one clearly doesn't want to read 10GB at once in that
502503
case"""
503504

504-
__slots__= ('_packpath','_data','_size','_version')
505+
__slots__= ('_packpath','_cursor','_size','_version')
505506
pack_signature=0x5041434b# 'PACK'
506507
pack_version_default=2
507508

@@ -513,26 +514,20 @@ def __init__(self, packpath):
513514
self._packpath=packpath
514515

515516
def_set_cache_(self,attr):
516-
ifattr=='_data':
517-
self._data=file_contents_ro_filepath(self._packpath)
518-
519-
# read the header information
520-
type_id,self._version,self._size=unpack_from(">LLL",self._data,0)
521-
522-
# TODO: figure out whether we should better keep the lock, or maybe
523-
# add a .keep file instead ?
524-
else:# must be '_size' or '_version'
525-
# read header info - we do that just with a file stream
526-
type_id,self._version,self._size=unpack(">LLL",open(self._packpath).read(12))
527-
# END handle header
517+
# we fill the whole cache, whichever attribute gets queried first
518+
self._cursor=mman.make_cursor(self._packpath).use_region()
528519

520+
# read the header information
521+
type_id,self._version,self._size=unpack_from(">LLL",self._cursor.map(),0)
522+
523+
# TODO: figure out whether we should better keep the lock, or maybe
524+
# add a .keep file instead ?
529525
iftype_id!=self.pack_signature:
530526
raiseParseError("Invalid pack signature: %i"%type_id)
531-
#END assert type id
532527

533528
def_iter_objects(self,start_offset,as_stream=True):
534529
"""Handle the actual iteration of objects within this pack"""
535-
data=self._data
530+
data=self._cursor.map()
536531
content_size=len(data)-self.footer_size
537532
cur_offset=start_offsetorself.first_object_offset
538533

@@ -568,11 +563,11 @@ def data(self):
568563
"""
569564
:return: read-only data of this pack. It provides random access and usually
570565
is a memory map"""
571-
returnself._data
566+
returnself._cursor.map()
572567

573568
defchecksum(self):
574569
""":return: 20 byte sha1 hash on all object sha's contained in this file"""
575-
returnself._data[-20:]
570+
returnself._cursor.map()[-20:]
576571

577572
defpath(self):
578573
""":return: path to the packfile"""
@@ -591,8 +586,9 @@ def collect_streams(self, offset):
591586
If the object at offset is no delta, the size of the list is 1.
592587
:param offset: specifies the first byte of the object within this pack"""
593588
out=list()
589+
data=self._cursor.map()
594590
whileTrue:
595-
ostream=pack_object_at(self._data,offset,True)[1]
591+
ostream=pack_object_at(data,offset,True)[1]
596592
out.append(ostream)
597593
ifostream.type_id==OFS_DELTA:
598594
offset=ostream.pack_offset-ostream.delta_info
@@ -614,14 +610,14 @@ def info(self, offset):
614610
615611
:param offset: byte offset
616612
:return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
617-
returnpack_object_at(self._data,offsetorself.first_object_offset,False)[1]
613+
returnpack_object_at(self._cursor.map(),offsetorself.first_object_offset,False)[1]
618614

619615
defstream(self,offset):
620616
"""Retrieve an object at the given file-relative offset as stream along with its information
621617
622618
:param offset: byte offset
623619
:return: OPackStream instance, the actual type differs depending on the type_id attribute"""
624-
returnpack_object_at(self._data,offsetorself.first_object_offset,True)[1]
620+
returnpack_object_at(self._cursor.map(),offsetorself.first_object_offset,True)[1]
625621

626622
defstream_iter(self,start_offset=0):
627623
"""
@@ -704,7 +700,7 @@ def _object(self, sha, as_stream, index=-1):
704700
sha=self._index.sha(index)
705701
# END assure sha is present ( in output )
706702
offset=self._index.offset(index)
707-
type_id,uncomp_size,data_rela_offset=pack_object_header_info(buffer(self._pack._data,offset))
703+
type_id,uncomp_size,data_rela_offset=pack_object_header_info(buffer(self._pack._cursor.map(),offset))
708704
ifas_stream:
709705
iftype_idnotindelta_types:
710706
packstream=self._pack.stream(offset)

‎git/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@
1515
importstat
1616
importshutil
1717
importtempfile
18+
fromsmmapimport (
19+
StaticWindowMapManager,
20+
SlidingWindowMapBuffer
21+
)
22+
23+
1824

1925
__all__= ("stream_copy","join_path","to_native_path_windows","to_native_path_linux",
2026
"join_path_native","Stats","IndexFileSHA1Writer","Iterable","IterableList",
@@ -64,6 +70,10 @@ def unpack_from(fmt, data, offset=0):
6470
# will be handled in the main thread
6571
pool=ThreadPool(0)
6672

73+
# initialize our global memory manager instance
74+
# Use it to free cached (and unused) resources.
75+
mman=StaticWindowMapManager()
76+
6777
#} END globals
6878

6979

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp