73
73
74
74
#{ Utilities
75
75
76
- def pack_object_at (data ,offset ,as_stream ):
76
+ def pack_object_at (cursor ,offset ,as_stream ):
77
77
"""
78
78
:return: Tuple(abs_data_offset, PackInfo|PackStream)
79
79
an object of the correct type according to the type_id of the object.
@@ -83,7 +83,7 @@ def pack_object_at(data, offset, as_stream):
83
83
:parma offset: offset in to the data at which the object information is located
84
84
:param as_stream: if True, a stream object will be returned that can read
85
85
the data, otherwise you receive an info object only"""
86
- data = buffer ( data , offset )
86
+ data = cursor . use_region ( offset ). buffer ( )
87
87
type_id ,uncomp_size ,data_rela_offset = pack_object_header_info (data )
88
88
total_rela_offset = None # set later, actual offset until data stream begins
89
89
delta_info = None
@@ -269,6 +269,10 @@ def _set_cache_(self, attr):
269
269
# that we can actually write to the location - it could be a read-only
270
270
# alternate for instance
271
271
self ._cursor = mman .make_cursor (self ._indexpath ).use_region ()
272
+ # We will assume that the index will always fully fit into memory !
273
+ if mman .window_size ()> 0 and self ._cursor .file_size ()> mman .window_size ():
274
+ raise AssertionError ("The index file at %s is too large to fit into a mapped window (%i > %i). This is a limitation of the implementation" % (self ._indexpath ,self ._cursor .file_size (),mman .window_size ()))
275
+ #END assert window size
272
276
else :
273
277
# now its time to initialize everything - if we are here, someone wants
274
278
# to access the fanout table or related properties
@@ -527,13 +531,13 @@ def _set_cache_(self, attr):
527
531
528
532
def _iter_objects (self ,start_offset ,as_stream = True ):
529
533
"""Handle the actual iteration of objects within this pack"""
530
- data = self ._cursor . map ()
531
- content_size = len ( data )- self .footer_size
534
+ c = self ._cursor
535
+ content_size = c . file_size ( )- self .footer_size
532
536
cur_offset = start_offset or self .first_object_offset
533
537
534
538
null = NullStream ()
535
539
while cur_offset < content_size :
536
- data_offset ,ostream = pack_object_at (data ,cur_offset ,True )
540
+ data_offset ,ostream = pack_object_at (c ,cur_offset ,True )
537
541
# scrub the stream to the end - this decompresses the object, but yields
538
542
# the amount of compressed bytes we need to get to the next offset
539
543
@@ -562,12 +566,14 @@ def version(self):
562
566
def data (self ):
563
567
"""
564
568
:return: read-only data of this pack. It provides random access and usually
565
- is a memory map"""
566
- return self ._cursor .map ()
569
+ is a memory map.
570
+ :note: This method is unsafe as it returns a window into a file which might be larger than than the actual window size"""
571
+ # can use map as we are starting at offset 0. Otherwise we would have to use buffer()
572
+ return self ._cursor .use_region ().map ()
567
573
568
574
def checksum (self ):
569
575
""":return: 20 byte sha1 hash on all object sha's contained in this file"""
570
- return self ._cursor .map ()[ - 20 :]
576
+ return self ._cursor .use_region ( self . _cursor . file_size () - 20 ). buffer ()[ :]
571
577
572
578
def path (self ):
573
579
""":return: path to the packfile"""
@@ -586,9 +592,9 @@ def collect_streams(self, offset):
586
592
If the object at offset is no delta, the size of the list is 1.
587
593
:param offset: specifies the first byte of the object within this pack"""
588
594
out = list ()
589
- data = self ._cursor . map ()
595
+ c = self ._cursor
590
596
while True :
591
- ostream = pack_object_at (data ,offset ,True )[1 ]
597
+ ostream = pack_object_at (c ,offset ,True )[1 ]
592
598
out .append (ostream )
593
599
if ostream .type_id == OFS_DELTA :
594
600
offset = ostream .pack_offset - ostream .delta_info
@@ -610,14 +616,14 @@ def info(self, offset):
610
616
611
617
:param offset: byte offset
612
618
:return: OPackInfo instance, the actual type differs depending on the type_id attribute"""
613
- return pack_object_at (self ._cursor . map () ,offset or self .first_object_offset ,False )[1 ]
619
+ return pack_object_at (self ._cursor ,offset or self .first_object_offset ,False )[1 ]
614
620
615
621
def stream (self ,offset ):
616
622
"""Retrieve an object at the given file-relative offset as stream along with its information
617
623
618
624
:param offset: byte offset
619
625
:return: OPackStream instance, the actual type differs depending on the type_id attribute"""
620
- return pack_object_at (self ._cursor . map () ,offset or self .first_object_offset ,True )[1 ]
626
+ return pack_object_at (self ._cursor ,offset or self .first_object_offset ,True )[1 ]
621
627
622
628
def stream_iter (self ,start_offset = 0 ):
623
629
"""
@@ -700,7 +706,7 @@ def _object(self, sha, as_stream, index=-1):
700
706
sha = self ._index .sha (index )
701
707
# END assure sha is present ( in output )
702
708
offset = self ._index .offset (index )
703
- type_id ,uncomp_size ,data_rela_offset = pack_object_header_info (buffer ( self ._pack ._cursor .map (), offset ))
709
+ type_id ,uncomp_size ,data_rela_offset = pack_object_header_info (self ._pack ._cursor .use_region ( offset ). buffer ( ))
704
710
if as_stream :
705
711
if type_id not in delta_types :
706
712
packstream = self ._pack .stream (offset )