diff options
Diffstat (limited to 'lib/git/odb')
-rw-r--r-- | lib/git/odb/fun.py | 17 | ||||
-rw-r--r-- | lib/git/odb/stream.py | 66 |
2 files changed, 52 insertions, 31 deletions
diff --git a/lib/git/odb/fun.py b/lib/git/odb/fun.py index 870a6f02..3321a8ea 100644 --- a/lib/git/odb/fun.py +++ b/lib/git/odb/fun.py @@ -83,26 +83,33 @@ def write_object(type, size, read, write, chunk_size=chunk_size): :param size: amount of bytes to write from source_stream :param read: read method of a stream providing the content data :param write: write method of the output stream - :param close_target_stream: if True, the target stream will be closed when + :param close_target_stream: if True, the target stream will be closed when the routine exits, even if an error is thrown :return: The actual amount of bytes written to stream, which includes the header and a trailing newline""" tbw = 0 # total num bytes written - dbw = 0 # num data bytes written # WRITE HEADER: type SP size NULL tbw += write("%s %i\0" % (type, size)) + tbw += stream_copy(read, write, size, chunk_size) + + return tbw +def stream_copy(read, write, size, chunk_size): + """Copy a stream up to size bytes using the provided read and write methods, + in chunks of chunk_size + :note: its much like stream_copy utility, but operates just using methods""" + dbw = 0 # num data bytes written + # WRITE ALL DATA UP TO SIZE while True: cs = min(chunk_size, size-dbw) data_len = write(read(cs)) dbw += data_len if data_len < cs or dbw == size: - tbw += dbw break # END check for stream end # END duplicate data - return tbw - + return dbw + #} END routines diff --git a/lib/git/odb/stream.py b/lib/git/odb/stream.py index d1181382..654bcbf6 100644 --- a/lib/git/odb/stream.py +++ b/lib/git/odb/stream.py @@ -75,7 +75,7 @@ class OStream(OInfo): """:return: True if reads of this stream yield zlib compressed data. Default False :note: this does not imply anything about the actual internal storage. Hence the data could be uncompressed, but read compressed, or vice versa""" - raise False + return False #} END interface @@ -105,10 +105,12 @@ class IStream(list): #{ Interface + @property def hexsha(self): """:return: our sha, hex encoded, 40 bytes""" return to_hex_sha(self[0]) - + + @property def binsha(self): """:return: our sha as binary, 20 bytes""" return to_bin_sha(self[0]) @@ -229,10 +231,11 @@ class DecompressMemMapReader(object): and decompress it into chunks, thats all ... """ __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close') - max_read_size = 512*1024 + max_read_size = 512*1024 # currently unused def __init__(self, m, close_on_deletion, size): - """Initialize with mmap for stream reading""" + """Initialize with mmap for stream reading + :param m: must be content data - use new if you have object data and no size""" self._m = m self._zip = zlib.decompressobj() self._buf = None # buffer of decompressed bytes @@ -248,32 +251,38 @@ class DecompressMemMapReader(object): self._m.close() # END handle resource freeing - @classmethod - def new(self, m, close_on_deletion=False): - """Create a new DecompressMemMapReader instance for acting as a read-only stream - This method parses the object header from m and returns the parsed - type and size, as well as the created stream instance. - :param m: memory map on which to oparate - :param close_on_deletion: if True, the memory map will be closed once we are - being deleted""" - inst = DecompressMemMapReader(m, close_on_deletion, 0) - + def _parse_header_info(self): + """If this stream contains object data, parse the header info and skip the + stream to a point where each read will yield object content + :return: parsed type_string, size""" # read header maxb = 512 # should really be enough, cgit uses 8192 I believe - inst._s = maxb - hdr = inst.read(maxb) + self._s = maxb + hdr = self.read(maxb) hdrend = hdr.find("\0") type, size = hdr[:hdrend].split(" ") size = int(size) - inst._s = size + self._s = size # adjust internal state to match actual header length that we ignore # The buffer will be depleted first on future reads - inst._br = 0 + self._br = 0 hdrend += 1 # count terminating \0 - inst._buf = StringIO(hdr[hdrend:]) - inst._buflen = len(hdr) - hdrend + self._buf = StringIO(hdr[hdrend:]) + self._buflen = len(hdr) - hdrend + + return type, size + @classmethod + def new(self, m, close_on_deletion=False): + """Create a new DecompressMemMapReader instance for acting as a read-only stream + This method parses the object header from m and returns the parsed + type and size, as well as the created stream instance. + :param m: memory map on which to oparate. It must be object data ( header + contents ) + :param close_on_deletion: if True, the memory map will be closed once we are + being deleted""" + inst = DecompressMemMapReader(m, close_on_deletion, 0) + type, size = inst._parse_header_info() return type, size, inst def read(self, size=-1): @@ -355,17 +364,22 @@ class DecompressMemMapReader(object): # needs to be as large as the uncompressed bytes we want to read. self._cws = self._cwe - len(tail) self._cwe = self._cws + size - - - indata = self._m[self._cws:self._cwe] # another copy ... :( - # get the actual window end to be sure we don't use it for computations - self._cwe = self._cws + len(indata) else: cws = self._cws self._cws = self._cwe self._cwe = cws + size - indata = self._m[self._cws:self._cwe] # ... copy it again :( # END handle tail + + + # if window is too small, make it larger so zip can decompress something + win_size = self._cwe - self._cws + if win_size < 8: + self._cwe = self._cws + 8 + # END adjust winsize + indata = self._m[self._cws:self._cwe] # another copy ... :( + + # get the actual window end to be sure we don't use it for computations + self._cwe = self._cws + len(indata) dcompdat = self._zip.decompress(indata, size) |