diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-03 19:04:18 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-03 20:40:43 +0200 |
commit | 4b4a514e51fbc7dc6ddcb27c188159d57b5d1fa9 (patch) | |
tree | 1401628227fda3f1ab5c81c1ad9ae6213e6ccacb /lib/git/odb/utils.py | |
parent | 26e138cb47dccc859ff219f108ce9b7d96cbcbcd (diff) | |
download | gitpython-4b4a514e51fbc7dc6ddcb27c188159d57b5d1fa9.tar.gz |
Added performance comparison to cgit ... and yes, git-python is faster :)
Diffstat (limited to 'lib/git/odb/utils.py')
-rw-r--r-- | lib/git/odb/utils.py | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py index 1e4a8e9d..94d1cea8 100644 --- a/lib/git/odb/utils.py +++ b/lib/git/odb/utils.py @@ -103,10 +103,12 @@ class DecompressMemMapReader(object): times we actually allocate. An own zlib implementation would be good here to better support streamed reading - it would only need to keep the mmap and decompress it into chunks, thats all ... """ - __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_cs', '_close') + __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close') - def __init__(self, m, close_on_deletion, cs = 128*1024): - """Initialize with mmap and chunk_size for stream reading""" + max_read_size = 512*1024 + + def __init__(self, m, close_on_deletion): + """Initialize with mmap for stream reading""" self._m = m self._zip = zlib.decompressobj() self._buf = None # buffer of decompressed bytes @@ -115,7 +117,6 @@ class DecompressMemMapReader(object): self._br = 0 # num uncompressed bytes read self._cws = 0 # start byte of compression window self._cwe = 0 # end byte of compression window - self._cs = cs # chunk size (when reading from zip) self._close = close_on_deletion # close the memmap on deletion ? def __del__(self): @@ -163,6 +164,28 @@ class DecompressMemMapReader(object): return str() # END handle depletion + # protect from memory peaks + # If he tries to read large chunks, our memory patterns get really bad + # as we end up copying a possibly huge chunk from our memory map right into + # memory. This might not even be possible. Nonetheless, try to dampen the + # effect a bit by reading in chunks, returning a huge string in the end. + # Our performance now depends on StringIO. This way we don't need two large + # buffers in peak times, but only one large one in the end which is + # the return buffer + if size > self.max_read_size: + sio = StringIO() + while size: + read_size = min(self.max_read_size, size) + data = self.read(read_size) + sio.write(data) + size -= len(data) + if len(data) < read_size: + break + # END data loop + sio.seek(0) + return sio.getvalue() + # END handle maxread + # deplete the buffer, then just continue using the decompress object # which has an own buffer. We just need this to transparently parse the # header from the zlib stream |