summaryrefslogtreecommitdiff
path: root/git/fun.py
diff options
context:
space:
mode:
Diffstat (limited to 'git/fun.py')
-rw-r--r--git/fun.py1200
1 files changed, 600 insertions, 600 deletions
diff --git a/git/fun.py b/git/fun.py
index 5bbe8efc..7749ee8a 100644
--- a/git/fun.py
+++ b/git/fun.py
@@ -7,8 +7,8 @@ Keeping this code separate from the beginning makes it easier to out-source
it into c later, if required"""
from exc import (
- BadObjectType
- )
+ BadObjectType
+ )
from util import zlib
decompressobj = zlib.decompressobj
@@ -23,652 +23,652 @@ OFS_DELTA = 6
REF_DELTA = 7
delta_types = (OFS_DELTA, REF_DELTA)
-type_id_to_type_map = {
- 0 : "", # EXT 1
- 1 : "commit",
- 2 : "tree",
- 3 : "blob",
- 4 : "tag",
- 5 : "", # EXT 2
- OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
- REF_DELTA : "REF_DELTA" # REFERENCE DELTA
- }
+type_id_to_type_map = {
+ 0 : "", # EXT 1
+ 1 : "commit",
+ 2 : "tree",
+ 3 : "blob",
+ 4 : "tag",
+ 5 : "", # EXT 2
+ OFS_DELTA : "OFS_DELTA", # OFFSET DELTA
+ REF_DELTA : "REF_DELTA" # REFERENCE DELTA
+ }
type_to_type_id_map = dict(
- commit=1,
- tree=2,
- blob=3,
- tag=4,
- OFS_DELTA=OFS_DELTA,
- REF_DELTA=REF_DELTA
- )
+ commit=1,
+ tree=2,
+ blob=3,
+ tag=4,
+ OFS_DELTA=OFS_DELTA,
+ REF_DELTA=REF_DELTA
+ )
# used when dealing with larger streams
chunk_size = 1000*mmap.PAGESIZE
__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
- 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
- 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
+ 'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
+ 'is_equal_canonical_sha', 'connect_deltas', 'DeltaChunkList', 'create_pack_object_header')
#{ Structures
def _set_delta_rbound(d, size):
- """Truncate the given delta to the given size
- :param size: size relative to our target offset, may not be 0, must be smaller or equal
- to our size
- :return: d"""
- d.ts = size
-
- # NOTE: data is truncated automatically when applying the delta
- # MUST NOT DO THIS HERE
- return d
-
+ """Truncate the given delta to the given size
+ :param size: size relative to our target offset, may not be 0, must be smaller or equal
+ to our size
+ :return: d"""
+ d.ts = size
+
+ # NOTE: data is truncated automatically when applying the delta
+ # MUST NOT DO THIS HERE
+ return d
+
def _move_delta_lbound(d, bytes):
- """Move the delta by the given amount of bytes, reducing its size so that its
- right bound stays static
- :param bytes: amount of bytes to move, must be smaller than delta size
- :return: d"""
- if bytes == 0:
- return
-
- d.to += bytes
- d.so += bytes
- d.ts -= bytes
- if d.data is not None:
- d.data = d.data[bytes:]
- # END handle data
-
- return d
-
+ """Move the delta by the given amount of bytes, reducing its size so that its
+ right bound stays static
+ :param bytes: amount of bytes to move, must be smaller than delta size
+ :return: d"""
+ if bytes == 0:
+ return
+
+ d.to += bytes
+ d.so += bytes
+ d.ts -= bytes
+ if d.data is not None:
+ d.data = d.data[bytes:]
+ # END handle data
+
+ return d
+
def delta_duplicate(src):
- return DeltaChunk(src.to, src.ts, src.so, src.data)
-
+ return DeltaChunk(src.to, src.ts, src.so, src.data)
+
def delta_chunk_apply(dc, bbuf, write):
- """Apply own data to the target buffer
- :param bbuf: buffer providing source bytes for copy operations
- :param write: write method to call with data to write"""
- if dc.data is None:
- # COPY DATA FROM SOURCE
- write(buffer(bbuf, dc.so, dc.ts))
- else:
- # APPEND DATA
- # whats faster: if + 4 function calls or just a write with a slice ?
- # Considering data can be larger than 127 bytes now, it should be worth it
- if dc.ts < len(dc.data):
- write(dc.data[:dc.ts])
- else:
- write(dc.data)
- # END handle truncation
- # END handle chunk mode
+ """Apply own data to the target buffer
+ :param bbuf: buffer providing source bytes for copy operations
+ :param write: write method to call with data to write"""
+ if dc.data is None:
+ # COPY DATA FROM SOURCE
+ write(buffer(bbuf, dc.so, dc.ts))
+ else:
+ # APPEND DATA
+ # whats faster: if + 4 function calls or just a write with a slice ?
+ # Considering data can be larger than 127 bytes now, it should be worth it
+ if dc.ts < len(dc.data):
+ write(dc.data[:dc.ts])
+ else:
+ write(dc.data)
+ # END handle truncation
+ # END handle chunk mode
class DeltaChunk(object):
- """Represents a piece of a delta, it can either add new data, or copy existing
- one from a source buffer"""
- __slots__ = (
- 'to', # start offset in the target buffer in bytes
- 'ts', # size of this chunk in the target buffer in bytes
- 'so', # start offset in the source buffer in bytes or None
- 'data', # chunk of bytes to be added to the target buffer,
- # DeltaChunkList to use as base, or None
- )
-
- def __init__(self, to, ts, so, data):
- self.to = to
- self.ts = ts
- self.so = so
- self.data = data
+ """Represents a piece of a delta, it can either add new data, or copy existing
+ one from a source buffer"""
+ __slots__ = (
+ 'to', # start offset in the target buffer in bytes
+ 'ts', # size of this chunk in the target buffer in bytes
+ 'so', # start offset in the source buffer in bytes or None
+ 'data', # chunk of bytes to be added to the target buffer,
+ # DeltaChunkList to use as base, or None
+ )
+
+ def __init__(self, to, ts, so, data):
+ self.to = to
+ self.ts = ts
+ self.so = so
+ self.data = data
- def __repr__(self):
- return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
-
- #{ Interface
-
- def rbound(self):
- return self.to + self.ts
-
- def has_data(self):
- """:return: True if the instance has data to add to the target stream"""
- return self.data is not None
-
- #} END interface
+ def __repr__(self):
+ return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
+
+ #{ Interface
+
+ def rbound(self):
+ return self.to + self.ts
+
+ def has_data(self):
+ """:return: True if the instance has data to add to the target stream"""
+ return self.data is not None
+
+ #} END interface
def _closest_index(dcl, absofs):
- """:return: index at which the given absofs should be inserted. The index points
- to the DeltaChunk with a target buffer absofs that equals or is greater than
- absofs.
- :note: global method for performance only, it belongs to DeltaChunkList"""
- lo = 0
- hi = len(dcl)
- while lo < hi:
- mid = (lo + hi) / 2
- dc = dcl[mid]
- if dc.to > absofs:
- hi = mid
- elif dc.rbound() > absofs or dc.to == absofs:
- return mid
- else:
- lo = mid + 1
- # END handle bound
- # END for each delta absofs
- return len(dcl)-1
-
+ """:return: index at which the given absofs should be inserted. The index points
+ to the DeltaChunk with a target buffer absofs that equals or is greater than
+ absofs.
+ :note: global method for performance only, it belongs to DeltaChunkList"""
+ lo = 0
+ hi = len(dcl)
+ while lo < hi:
+ mid = (lo + hi) / 2
+ dc = dcl[mid]
+ if dc.to > absofs:
+ hi = mid
+ elif dc.rbound() > absofs or dc.to == absofs:
+ return mid
+ else:
+ lo = mid + 1
+ # END handle bound
+ # END for each delta absofs
+ return len(dcl)-1
+
def delta_list_apply(dcl, bbuf, write):
- """Apply the chain's changes and write the final result using the passed
- write function.
- :param bbuf: base buffer containing the base of all deltas contained in this
- list. It will only be used if the chunk in question does not have a base
- chain.
- :param write: function taking a string of bytes to write to the output"""
- for dc in dcl:
- delta_chunk_apply(dc, bbuf, write)
- # END for each dc
+ """Apply the chain's changes and write the final result using the passed
+ write function.
+ :param bbuf: base buffer containing the base of all deltas contained in this
+ list. It will only be used if the chunk in question does not have a base
+ chain.
+ :param write: function taking a string of bytes to write to the output"""
+ for dc in dcl:
+ delta_chunk_apply(dc, bbuf, write)
+ # END for each dc
def delta_list_slice(dcl, absofs, size, ndcl):
- """:return: Subsection of this list at the given absolute offset, with the given
- size in bytes.
- :return: None"""
- cdi = _closest_index(dcl, absofs) # delta start index
- cd = dcl[cdi]
- slen = len(dcl)
- lappend = ndcl.append
-
- if cd.to != absofs:
- tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
- _move_delta_lbound(tcd, absofs - cd.to)
- tcd.ts = min(tcd.ts, size)
- lappend(tcd)
- size -= tcd.ts
- cdi += 1
- # END lbound overlap handling
-
- while cdi < slen and size:
- # are we larger than the current block
- cd = dcl[cdi]
- if cd.ts <= size:
- lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
- size -= cd.ts
- else:
- tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
- tcd.ts = size
- lappend(tcd)
- size -= tcd.ts
- break
- # END hadle size
- cdi += 1
- # END for each chunk
-
-
+ """:return: Subsection of this list at the given absolute offset, with the given
+ size in bytes.
+ :return: None"""
+ cdi = _closest_index(dcl, absofs) # delta start index
+ cd = dcl[cdi]
+ slen = len(dcl)
+ lappend = ndcl.append
+
+ if cd.to != absofs:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ _move_delta_lbound(tcd, absofs - cd.to)
+ tcd.ts = min(tcd.ts, size)
+ lappend(tcd)
+ size -= tcd.ts
+ cdi += 1
+ # END lbound overlap handling
+
+ while cdi < slen and size:
+ # are we larger than the current block
+ cd = dcl[cdi]
+ if cd.ts <= size:
+ lappend(DeltaChunk(cd.to, cd.ts, cd.so, cd.data))
+ size -= cd.ts
+ else:
+ tcd = DeltaChunk(cd.to, cd.ts, cd.so, cd.data)
+ tcd.ts = size
+ lappend(tcd)
+ size -= tcd.ts
+ break
+ # END hadle size
+ cdi += 1
+ # END for each chunk
+
+
class DeltaChunkList(list):
- """List with special functionality to deal with DeltaChunks.
- There are two types of lists we represent. The one was created bottom-up, working
- towards the latest delta, the other kind was created top-down, working from the
- latest delta down to the earliest ancestor. This attribute is queryable
- after all processing with is_reversed."""
-
- __slots__ = tuple()
-
- def rbound(self):
- """:return: rightmost extend in bytes, absolute"""
- if len(self) == 0:
- return 0
- return self[-1].rbound()
-
- def lbound(self):
- """:return: leftmost byte at which this chunklist starts"""
- if len(self) == 0:
- return 0
- return self[0].to
-
- def size(self):
- """:return: size of bytes as measured by our delta chunks"""
- return self.rbound() - self.lbound()
-
- def apply(self, bbuf, write):
- """Only used by public clients, internally we only use the global routines
- for performance"""
- return delta_list_apply(self, bbuf, write)
-
- def compress(self):
- """Alter the list to reduce the amount of nodes. Currently we concatenate
- add-chunks
- :return: self"""
- slen = len(self)
- if slen < 2:
- return self
- i = 0
- slen_orig = slen
-
- first_data_index = None
- while i < slen:
- dc = self[i]
- i += 1
- if dc.data is None:
- if first_data_index is not None and i-2-first_data_index > 1:
- #if first_data_index is not None:
- nd = StringIO() # new data
- so = self[first_data_index].to # start offset in target buffer
- for x in xrange(first_data_index, i-1):
- xdc = self[x]
- nd.write(xdc.data[:xdc.ts])
- # END collect data
-
- del(self[first_data_index:i-1])
- buf = nd.getvalue()
- self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
-
- slen = len(self)
- i = first_data_index + 1
-
- # END concatenate data
- first_data_index = None
- continue
- # END skip non-data chunks
-
- if first_data_index is None:
- first_data_index = i-1
- # END iterate list
-
- #if slen_orig != len(self):
- # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
- return self
-
- def check_integrity(self, target_size=-1):
- """Verify the list has non-overlapping chunks only, and the total size matches
- target_size
- :param target_size: if not -1, the total size of the chain must be target_size
- :raise AssertionError: if the size doen't match"""
- if target_size > -1:
- assert self[-1].rbound() == target_size
- assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
- # END target size verification
-
- if len(self) < 2:
- return
-
- # check data
- for dc in self:
- assert dc.ts > 0
- if dc.has_data():
- assert len(dc.data) >= dc.ts
- # END for each dc
-
- left = islice(self, 0, len(self)-1)
- right = iter(self)
- right.next()
- # this is very pythonic - we might have just use index based access here,
- # but this could actually be faster
- for lft,rgt in izip(left, right):
- assert lft.rbound() == rgt.to
- assert lft.to + lft.ts == rgt.to
- # END for each pair
-
+ """List with special functionality to deal with DeltaChunks.
+ There are two types of lists we represent. The one was created bottom-up, working
+ towards the latest delta, the other kind was created top-down, working from the
+ latest delta down to the earliest ancestor. This attribute is queryable
+ after all processing with is_reversed."""
+
+ __slots__ = tuple()
+
+ def rbound(self):
+ """:return: rightmost extend in bytes, absolute"""
+ if len(self) == 0:
+ return 0
+ return self[-1].rbound()
+
+ def lbound(self):
+ """:return: leftmost byte at which this chunklist starts"""
+ if len(self) == 0:
+ return 0
+ return self[0].to
+
+ def size(self):
+ """:return: size of bytes as measured by our delta chunks"""
+ return self.rbound() - self.lbound()
+
+ def apply(self, bbuf, write):
+ """Only used by public clients, internally we only use the global routines
+ for performance"""
+ return delta_list_apply(self, bbuf, write)
+
+ def compress(self):
+ """Alter the list to reduce the amount of nodes. Currently we concatenate
+ add-chunks
+ :return: self"""
+ slen = len(self)
+ if slen < 2:
+ return self
+ i = 0
+ slen_orig = slen
+
+ first_data_index = None
+ while i < slen:
+ dc = self[i]
+ i += 1
+ if dc.data is None:
+ if first_data_index is not None and i-2-first_data_index > 1:
+ #if first_data_index is not None:
+ nd = StringIO() # new data
+ so = self[first_data_index].to # start offset in target buffer
+ for x in xrange(first_data_index, i-1):
+ xdc = self[x]
+ nd.write(xdc.data[:xdc.ts])
+ # END collect data
+
+ del(self[first_data_index:i-1])
+ buf = nd.getvalue()
+ self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))
+
+ slen = len(self)
+ i = first_data_index + 1
+
+ # END concatenate data
+ first_data_index = None
+ continue
+ # END skip non-data chunks
+
+ if first_data_index is None:
+ first_data_index = i-1
+ # END iterate list
+
+ #if slen_orig != len(self):
+ # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
+ return self
+
+ def check_integrity(self, target_size=-1):
+ """Verify the list has non-overlapping chunks only, and the total size matches
+ target_size
+ :param target_size: if not -1, the total size of the chain must be target_size
+ :raise AssertionError: if the size doen't match"""
+ if target_size > -1:
+ assert self[-1].rbound() == target_size
+ assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
+ # END target size verification
+
+ if len(self) < 2:
+ return
+
+ # check data
+ for dc in self:
+ assert dc.ts > 0
+ if dc.has_data():
+ assert len(dc.data) >= dc.ts
+ # END for each dc
+
+ left = islice(self, 0, len(self)-1)
+ right = iter(self)
+ right.next()
+ # this is very pythonic - we might have just use index based access here,
+ # but this could actually be faster
+ for lft,rgt in izip(left, right):
+ assert lft.rbound() == rgt.to
+ assert lft.to + lft.ts == rgt.to
+ # END for each pair
+
class TopdownDeltaChunkList(DeltaChunkList):
- """Represents a list which is generated by feeding its ancestor streams one by
- one"""
- __slots__ = tuple()
-
- def connect_with_next_base(self, bdcl):
- """Connect this chain with the next level of our base delta chunklist.
- The goal in this game is to mark as many of our chunks rigid, hence they
- cannot be changed by any of the upcoming bases anymore. Once all our
- chunks are marked like that, we can stop all processing
- :param bdcl: data chunk list being one of our bases. They must be fed in
- consequtively and in order, towards the earliest ancestor delta
- :return: True if processing was done. Use it to abort processing of
- remaining streams if False is returned"""
- nfc = 0 # number of frozen chunks
- dci = 0 # delta chunk index
- slen = len(self) # len of self
- ccl = list() # temporary list
- while dci < slen:
- dc = self[dci]
- dci += 1
-
- # all add-chunks which are already topmost don't need additional processing
- if dc.data is not None:
- nfc += 1
- continue
- # END skip add chunks
-
- # copy chunks
- # integrate the portion of the base list into ourselves. Lists
- # dont support efficient insertion ( just one at a time ), but for now
- # we live with it. Internally, its all just a 32/64bit pointer, and
- # the portions of moved memory should be smallish. Maybe we just rebuild
- # ourselves in order to reduce the amount of insertions ...
- del(ccl[:])
- delta_list_slice(bdcl, dc.so, dc.ts, ccl)
-
- # move the target bounds into place to match with our chunk
- ofs = dc.to - dc.so
- for cdc in ccl:
- cdc.to += ofs
- # END update target bounds
-
- if len(ccl) == 1:
- self[dci-1] = ccl[0]
- else:
- # maybe try to compute the expenses here, and pick the right algorithm
- # It would normally be faster than copying everything physically though
- # TODO: Use a deque here, and decide by the index whether to extend
- # or extend left !
- post_dci = self[dci:]
- del(self[dci-1:]) # include deletion of dc
- self.extend(ccl)
- self.extend(post_dci)
-
- slen = len(self)
- dci += len(ccl)-1 # deleted dc, added rest
-
- # END handle chunk replacement
- # END for each chunk
-
- if nfc == slen:
- return False
- # END handle completeness
- return True
-
-
+ """Represents a list which is generated by feeding its ancestor streams one by
+ one"""
+ __slots__ = tuple()
+
+ def connect_with_next_base(self, bdcl):
+ """Connect this chain with the next level of our base delta chunklist.
+ The goal in this game is to mark as many of our chunks rigid, hence they
+ cannot be changed by any of the upcoming bases anymore. Once all our
+ chunks are marked like that, we can stop all processing
+ :param bdcl: data chunk list being one of our bases. They must be fed in
+ consequtively and in order, towards the earliest ancestor delta
+ :return: True if processing was done. Use it to abort processing of
+ remaining streams if False is returned"""
+ nfc = 0 # number of frozen chunks
+ dci = 0 # delta chunk index
+ slen = len(self) # len of self
+ ccl = list() # temporary list
+ while dci < slen:
+ dc = self[dci]
+ dci += 1
+
+ # all add-chunks which are already topmost don't need additional processing
+ if dc.data is not None:
+ nfc += 1
+ continue
+ # END skip add chunks
+
+ # copy chunks
+ # integrate the portion of the base list into ourselves. Lists
+ # dont support efficient insertion ( just one at a time ), but for now
+ # we live with it. Internally, its all just a 32/64bit pointer, and
+ # the portions of moved memory should be smallish. Maybe we just rebuild
+ # ourselves in order to reduce the amount of insertions ...
+ del(ccl[:])
+ delta_list_slice(bdcl, dc.so, dc.ts, ccl)
+
+ # move the target bounds into place to match with our chunk
+ ofs = dc.to - dc.so
+ for cdc in ccl:
+ cdc.to += ofs
+ # END update target bounds
+
+ if len(ccl) == 1:
+ self[dci-1] = ccl[0]
+ else:
+ # maybe try to compute the expenses here, and pick the right algorithm
+ # It would normally be faster than copying everything physically though
+ # TODO: Use a deque here, and decide by the index whether to extend
+ # or extend left !
+ post_dci = self[dci:]
+ del(self[dci-1:]) # include deletion of dc
+ self.extend(ccl)
+ self.extend(post_dci)
+
+ slen = len(self)
+ dci += len(ccl)-1 # deleted dc, added rest
+
+ # END handle chunk replacement
+ # END for each chunk
+
+ if nfc == slen:
+ return False
+ # END handle completeness
+ return True
+
+
#} END structures
#{ Routines
def is_loose_object(m):
- """
- :return: True the file contained in memory map m appears to be a loose object.
- Only the first two bytes are needed"""
- b0, b1 = map(ord, m[:2])
- word = (b0 << 8) + b1
- return b0 == 0x78 and (word % 31) == 0
+ """
+ :return: True the file contained in memory map m appears to be a loose object.
+ Only the first two bytes are needed"""
+ b0, b1 = map(ord, m[:2])
+ word = (b0 << 8) + b1
+ return b0 == 0x78 and (word % 31) == 0
def loose_object_header_info(m):
- """
- :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
- object as well as its uncompressed size in bytes.
- :param m: memory map from which to read the compressed object data"""
- decompress_size = 8192 # is used in cgit as well
- hdr = decompressobj().decompress(m, decompress_size)
- type_name, size = hdr[:hdr.find("\0")].split(" ")
- return type_name, int(size)
-
+ """
+ :return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
+ object as well as its uncompressed size in bytes.
+ :param m: memory map from which to read the compressed object data"""
+ decompress_size = 8192 # is used in cgit as well
+ hdr = decompressobj().decompress(m, decompress_size)
+ type_name, size = hdr[:hdr.find("\0")].split(" ")
+ return type_name, int(size)
+
def pack_object_header_info(data):
- """
- :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
- The type_id should be interpreted according to the ``type_id_to_type_map`` map
- The byte-offset specifies the start of the actual zlib compressed datastream
- :param m: random-access memory, like a string or memory map"""
- c = ord(data[0]) # first byte
- i = 1 # next char to read
- type_id = (c >> 4) & 7 # numeric type
- size = c & 15 # starting size
- s = 4 # starting bit-shift size
- while c & 0x80:
- c = ord(data[i])
- i += 1
- size += (c & 0x7f) << s
- s += 7
- # END character loop
- return (type_id, size, i)
+ """
+ :return: tuple(type_id, uncompressed_size_in_bytes, byte_offset)
+ The type_id should be interpreted according to the ``type_id_to_type_map`` map
+ The byte-offset specifies the start of the actual zlib compressed datastream
+ :param m: random-access memory, like a string or memory map"""
+ c = ord(data[0]) # first byte
+ i = 1 # next char to read
+ type_id = (c >> 4) & 7 # numeric type
+ size = c & 15 # starting size
+ s = 4 # starting bit-shift size
+ while c & 0x80:
+ c = ord(data[i])
+ i += 1
+ size += (c & 0x7f) << s
+ s += 7
+ # END character loop
+ return (type_id, size, i)
def create_pack_object_header(obj_type, obj_size):
- """:return: string defining the pack header comprised of the object type
- and its incompressed size in bytes
- :parmam obj_type: pack type_id of the object
- :param obj_size: uncompressed size in bytes of the following object stream"""
- c = 0 # 1 byte
- hdr = str() # output string
+ """:return: string defining the pack header comprised of the object type
+ and its incompressed size in bytes
+ :parmam obj_type: pack type_id of the object
+ :param obj_size: uncompressed size in bytes of the following object stream"""
+ c = 0 # 1 byte
+ hdr = str() # output string
- c = (obj_type << 4) | (obj_size & 0xf)
- obj_size >>= 4
- while obj_size:
- hdr += chr(c | 0x80)
- c = obj_size & 0x7f
- obj_size >>= 7
- #END until size is consumed
- hdr += chr(c)
- return hdr
-
+ c = (obj_type << 4) | (obj_size & 0xf)
+ obj_size >>= 4
+ while obj_size:
+ hdr += chr(c | 0x80)
+ c = obj_size & 0x7f
+ obj_size >>= 7
+ #END until size is consumed
+ hdr += chr(c)
+ return hdr
+
def msb_size(data, offset=0):
- """
- :return: tuple(read_bytes, size) read the msb size from the given random
- access data starting at the given byte offset"""
- size = 0
- i = 0
- l = len(data)
- hit_msb = False
- while i < l:
- c = ord(data[i+offset])
- size |= (c & 0x7f) << i*7
- i += 1
- if not c & 0x80:
- hit_msb = True
- break
- # END check msb bit
- # END while in range
- if not hit_msb:
- raise AssertionError("Could not find terminating MSB byte in data stream")
- return i+offset, size
-
+ """
+ :return: tuple(read_bytes, size) read the msb size from the given random
+ access data starting at the given byte offset"""
+ size = 0
+ i = 0
+ l = len(data)
+ hit_msb = False
+ while i < l:
+ c = ord(data[i+offset])
+ size |= (c & 0x7f) << i*7
+ i += 1
+ if not c & 0x80:
+ hit_msb = True
+ break
+ # END check msb bit
+ # END while in range
+ if not hit_msb:
+ raise AssertionError("Could not find terminating MSB byte in data stream")
+ return i+offset, size
+
def loose_object_header(type, size):
- """
- :return: string representing the loose object header, which is immediately
- followed by the content stream of size 'size'"""
- return "%s %i\0" % (type, size)
-
+ """
+ :return: string representing the loose object header, which is immediately
+ followed by the content stream of size 'size'"""
+ return "%s %i\0" % (type, size)
+
def write_object(type, size, read, write, chunk_size=chunk_size):
- """
- Write the object as identified by type, size and source_stream into the
- target_stream
-
- :param type: type string of the object
- :param size: amount of bytes to write from source_stream
- :param read: read method of a stream providing the content data
- :param write: write method of the output stream
- :param close_target_stream: if True, the target stream will be closed when
- the routine exits, even if an error is thrown
- :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
- tbw = 0 # total num bytes written
-
- # WRITE HEADER: type SP size NULL
- tbw += write(loose_object_header(type, size))
- tbw += stream_copy(read, write, size, chunk_size)
-
- return tbw
+ """
+ Write the object as identified by type, size and source_stream into the
+ target_stream
+
+ :param type: type string of the object
+ :param size: amount of bytes to write from source_stream
+ :param read: read method of a stream providing the content data
+ :param write: write method of the output stream
+ :param close_target_stream: if True, the target stream will be closed when
+ the routine exits, even if an error is thrown
+ :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+ tbw = 0 # total num bytes written
+
+ # WRITE HEADER: type SP size NULL
+ tbw += write(loose_object_header(type, size))
+ tbw += stream_copy(read, write, size, chunk_size)
+
+ return tbw
def stream_copy(read, write, size, chunk_size):
- """
- Copy a stream up to size bytes using the provided read and write methods,
- in chunks of chunk_size
-
- :note: its much like stream_copy utility, but operates just using methods"""
- dbw = 0 # num data bytes written
-
- # WRITE ALL DATA UP TO SIZE
- while True:
- cs = min(chunk_size, size-dbw)
- # NOTE: not all write methods return the amount of written bytes, like
- # mmap.write. Its bad, but we just deal with it ... perhaps its not
- # even less efficient
- # data_len = write(read(cs))
- # dbw += data_len
- data = read(cs)
- data_len = len(data)
- dbw += data_len
- write(data)
- if data_len < cs or dbw == size:
- break
- # END check for stream end
- # END duplicate data
- return dbw
-
+ """
+ Copy a stream up to size bytes using the provided read and write methods,
+ in chunks of chunk_size
+
+ :note: its much like stream_copy utility, but operates just using methods"""
+ dbw = 0 # num data bytes written
+
+ # WRITE ALL DATA UP TO SIZE
+ while True:
+ cs = min(chunk_size, size-dbw)
+ # NOTE: not all write methods return the amount of written bytes, like
+ # mmap.write. Its bad, but we just deal with it ... perhaps its not
+ # even less efficient
+ # data_len = write(read(cs))
+ # dbw += data_len
+ data = read(cs)
+ data_len = len(data)
+ dbw += data_len
+ write(data)
+ if data_len < cs or dbw == size:
+ break
+ # END check for stream end
+ # END duplicate data
+ return dbw
+
def connect_deltas(dstreams):
- """
- Read the condensed delta chunk information from dstream and merge its information
- into a list of existing delta chunks
-
- :param dstreams: iterable of delta stream objects, the delta to be applied last
- comes first, then all its ancestors in order
- :return: DeltaChunkList, containing all operations to apply"""
- tdcl = None # topmost dcl
-
- dcl = tdcl = TopdownDeltaChunkList()
- for dsi, ds in enumerate(dstreams):
- # print "Stream", dsi
- db = ds.read()
- delta_buf_size = ds.size
-
- # read header
- i, base_size = msb_size(db)
- i, target_size = msb_size(db, i)
-
- # interpret opcodes
- tbw = 0 # amount of target bytes written
- while i < delta_buf_size:
- c = ord(db[i])
- i += 1
- if c & 0x80:
- cp_off, cp_size = 0, 0
- if (c & 0x01):
- cp_off = ord(db[i])
- i += 1
- if (c & 0x02):
- cp_off |= (ord(db[i]) << 8)
- i += 1
- if (c & 0x04):
- cp_off |= (ord(db[i]) << 16)
- i += 1
- if (c & 0x08):
- cp_off |= (ord(db[i]) << 24)
- i += 1
- if (c & 0x10):
- cp_size = ord(db[i])
- i += 1
- if (c & 0x20):
- cp_size |= (ord(db[i]) << 8)
- i += 1
- if (c & 0x40):
- cp_size |= (ord(db[i]) << 16)
- i += 1
-
- if not cp_size:
- cp_size = 0x10000
-
- rbound = cp_off + cp_size
- if (rbound < cp_size or
- rbound > base_size):
- break
-
- dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
- tbw += cp_size
- elif c:
- # NOTE: in C, the data chunks should probably be concatenated here.
- # In python, we do it as a post-process
- dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c]))
- i += c
- tbw += c
- else:
- raise ValueError("unexpected delta opcode 0")
- # END handle command byte
- # END while processing delta data
-
- dcl.compress()
-
- # merge the lists !
- if dsi > 0:
- if not tdcl.connect_with_next_base(dcl):
- break
- # END handle merge
-
- # prepare next base
- dcl = DeltaChunkList()
- # END for each delta stream
-
- return tdcl
-
+ """
+ Read the condensed delta chunk information from dstream and merge its information
+ into a list of existing delta chunks
+
+ :param dstreams: iterable of delta stream objects, the delta to be applied last
+ comes first, then all its ancestors in order
+ :return: DeltaChunkList, containing all operations to apply"""
+ tdcl = None # topmost dcl
+
+ dcl = tdcl = TopdownDeltaChunkList()
+ for dsi, ds in enumerate(dstreams):
+ # print "Stream", dsi
+ db = ds.read()
+ delta_buf_size = ds.size
+
+ # read header
+ i, base_size = msb_size(db)
+ i, target_size = msb_size(db, i)
+
+ # interpret opcodes
+ tbw = 0 # amount of target bytes written
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > base_size):
+ break
+
+ dcl.append(DeltaChunk(tbw, cp_size, cp_off, None))
+ tbw += cp_size
+ elif c:
+ # NOTE: in C, the data chunks should probably be concatenated here.
+ # In python, we do it as a post-process
+ dcl.append(DeltaChunk(tbw, c, 0, db[i:i+c]))
+ i += c
+ tbw += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ dcl.compress()
+
+ # merge the lists !
+ if dsi > 0:
+ if not tdcl.connect_with_next_base(dcl):
+ break
+ # END handle merge
+
+ # prepare next base
+ dcl = DeltaChunkList()
+ # END for each delta stream
+
+ return tdcl
+
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
- """
- Apply data from a delta buffer using a source buffer to the target file
-
- :param src_buf: random access data from which the delta was created
- :param src_buf_size: size of the source buffer in bytes
- :param delta_buf_size: size fo the delta buffer in bytes
- :param delta_buf: random access delta data
- :param write: write method taking a chunk of bytes
- :note: transcribed to python from the similar routine in patch-delta.c"""
- i = 0
- db = delta_buf
- while i < delta_buf_size:
- c = ord(db[i])
- i += 1
- if c & 0x80:
- cp_off, cp_size = 0, 0
- if (c & 0x01):
- cp_off = ord(db[i])
- i += 1
- if (c & 0x02):
- cp_off |= (ord(db[i]) << 8)
- i += 1
- if (c & 0x04):
- cp_off |= (ord(db[i]) << 16)
- i += 1
- if (c & 0x08):
- cp_off |= (ord(db[i]) << 24)
- i += 1
- if (c & 0x10):
- cp_size = ord(db[i])
- i += 1
- if (c & 0x20):
- cp_size |= (ord(db[i]) << 8)
- i += 1
- if (c & 0x40):
- cp_size |= (ord(db[i]) << 16)
- i += 1
-
- if not cp_size:
- cp_size = 0x10000
-
- rbound = cp_off + cp_size
- if (rbound < cp_size or
- rbound > src_buf_size):
- break
- write(buffer(src_buf, cp_off, cp_size))
- elif c:
- write(db[i:i+c])
- i += c
- else:
- raise ValueError("unexpected delta opcode 0")
- # END handle command byte
- # END while processing delta data
-
- # yes, lets use the exact same error message that git uses :)
- assert i == delta_buf_size, "delta replay has gone wild"
-
-
+ """
+ Apply data from a delta buffer using a source buffer to the target file
+
+ :param src_buf: random access data from which the delta was created
+ :param src_buf_size: size of the source buffer in bytes
+ :param delta_buf_size: size fo the delta buffer in bytes
+ :param delta_buf: random access delta data
+ :param write: write method taking a chunk of bytes
+ :note: transcribed to python from the similar routine in patch-delta.c"""
+ i = 0
+ db = delta_buf
+ while i < delta_buf_size:
+ c = ord(db[i])
+ i += 1
+ if c & 0x80:
+ cp_off, cp_size = 0, 0
+ if (c & 0x01):
+ cp_off = ord(db[i])
+ i += 1
+ if (c & 0x02):
+ cp_off |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x04):
+ cp_off |= (ord(db[i]) << 16)
+ i += 1
+ if (c & 0x08):
+ cp_off |= (ord(db[i]) << 24)
+ i += 1
+ if (c & 0x10):
+ cp_size = ord(db[i])
+ i += 1
+ if (c & 0x20):
+ cp_size |= (ord(db[i]) << 8)
+ i += 1
+ if (c & 0x40):
+ cp_size |= (ord(db[i]) << 16)
+ i += 1
+
+ if not cp_size:
+ cp_size = 0x10000
+
+ rbound = cp_off + cp_size
+ if (rbound < cp_size or
+ rbound > src_buf_size):
+ break
+ write(buffer(src_buf, cp_off, cp_size))
+ elif c:
+ write(db[i:i+c])
+ i += c
+ else:
+ raise ValueError("unexpected delta opcode 0")
+ # END handle command byte
+ # END while processing delta data
+
+ # yes, lets use the exact same error message that git uses :)
+ assert i == delta_buf_size, "delta replay has gone wild"
+
+
def is_equal_canonical_sha(canonical_length, match, sha1):
- """
- :return: True if the given lhs and rhs 20 byte binary shas
- The comparison will take the canonical_length of the match sha into account,
- hence the comparison will only use the last 4 bytes for uneven canonical representations
- :param match: less than 20 byte sha
- :param sha1: 20 byte sha"""
- binary_length = canonical_length/2
- if match[:binary_length] != sha1[:binary_length]:
- return False
-
- if canonical_length - binary_length and \
- (ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0:
- return False
- # END handle uneven canonnical length
- return True
-
+ """
+ :return: True if the given lhs and rhs 20 byte binary shas
+ The comparison will take the canonical_length of the match sha into account,
+ hence the comparison will only use the last 4 bytes for uneven canonical representations
+ :param match: less than 20 byte sha
+ :param sha1: 20 byte sha"""
+ binary_length = canonical_length/2
+ if match[:binary_length] != sha1[:binary_length]:
+ return False
+
+ if canonical_length - binary_length and \
+ (ord(match[-1]) ^ ord(sha1[len(match)-1])) & 0xf0:
+ return False
+ # END handle uneven canonnical length
+ return True
+
#} END routines
try:
- # raise ImportError; # DEBUG
- from _perf import connect_deltas
+ # raise ImportError; # DEBUG
+ from _perf import connect_deltas
except ImportError:
- pass
+ pass