odb: fixed streamed decompression reader ( specific tests would still be missing ) and added performance tests which are extremely promising

author: Sebastian Thiel <byronimo@gmail.com> 2010-06-03 18:21:05 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-06-03 18:21:05 +0200
commit: 26e138cb47dccc859ff219f108ce9b7d96cbcbcd (patch)
tree: db40b36d8265f6e2d55a5e4b6f9a6e4a991d2819 /lib/git
parent: 38d59fc8ccccae8882fa48671377bf40a27915a7 (diff)
download: gitpython-26e138cb47dccc859ff219f108ce9b7d96cbcbcd.tar.gz
2 files changed, 16 insertions, 10 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
index 5c50a512..e656b2b5 100644
--- a/lib/git/odb/db.py
+++ b/lib/git/odb/db.py
@@ -91,8 +91,8 @@ class iObjectDBW(object):
 			they where given.
 		:param iter_info: iterable yielding tuples containing the type_string
 			size_in_bytes and the steam with the content data.
-		:param dry_run: see ``to_obj``
-		:param sha_as_hex: see ``to_obj``
+		:param dry_run: see ``to_object``
+		:param sha_as_hex: see ``to_object``
 		:param max_threads: if < 1, any number of threads may be started while processing
 			the request, otherwise the given number of threads will be started.
 		:raise IOError: if data could not be written"""
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
index 8a054201..1e4a8e9d 100644
--- a/lib/git/odb/utils.py
+++ b/lib/git/odb/utils.py
@@ -39,7 +39,7 @@ write = os.write
 close = os.close
 
 # ZLIB configuration
-# used when compressing objects
+# used when compressing objects - 1 to 9 ( slowest )
 Z_BEST_SPEED = 1
 
 #} END Routines
@@ -70,7 +70,7 @@ class FDCompressedSha1Writer(object):
 		bytes_written = write(self.fd, cdata)
 		if bytes_written != len(cdata):
 			raise self.exc
-		return bytes_written
+		return len(data)
 
 	def sha(self, as_hex = False):
 		""":return: sha so far
@@ -175,7 +175,7 @@ class DecompressMemMapReader(object):
 				self._br += size
 				return dat
 			else:
-				dat = self._buf.getvalue()		# ouch, duplicates data
+				dat = self._buf.read()		# ouch, duplicates data
 				size -= self._buflen
 				self._br += self._buflen
 				
@@ -195,28 +195,34 @@ class DecompressMemMapReader(object):
 		# copied once, and another copy of a part of it when it creates the unconsumed
 		# tail. We have to use it to hand in the appropriate amount of bytes durin g
 		# the next read.
-		if self._zip.unconsumed_tail:
+		tail = self._zip.unconsumed_tail
+		if tail:
 			# move the window, make it as large as size demands. For code-clarity, 
 			# we just take the chunk from our map again instead of reusing the unconsumed
 			# tail. The latter one would safe some memory copying, but we could end up
 			# with not getting enough data uncompressed, so we had to sort that out as well.
 			# Now we just assume the worst case, hence the data is uncompressed and the window
 			# needs to be as large as the uncompressed bytes we want to read.
-			self._cws = self._cwe - len(self._zip.unconsumed_tail)
+			self._cws = self._cwe - len(tail)
 			self._cwe = self._cws + size
+			
+			
 			indata = self._m[self._cws:self._cwe]		# another copy ... :(
+			# get the actual window end to be sure we don't use it for computations
+			self._cwe = self._cws + len(indata) 
 		else:
 			cws = self._cws
 			self._cws = self._cwe
 			self._cwe = cws + size 
 			indata = self._m[self._cws:self._cwe]		# ... copy it again :(
 		# END handle tail
-		
+			
 		dcompdat = self._zip.decompress(indata, size)
-		self._br += len(dcompdat)
 		
+		self._br += len(dcompdat)
 		if dat:
-			return dat + dcompdat
+			dcompdat = dat + dcompdat
+			
 		return dcompdat
 		
 #} END classes
author	Sebastian Thiel <byronimo@gmail.com>	2010-06-03 18:21:05 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-06-03 18:21:05 +0200
commit	26e138cb47dccc859ff219f108ce9b7d96cbcbcd (patch)
tree	db40b36d8265f6e2d55a5e4b6f9a6e4a991d2819 /lib/git
parent	38d59fc8ccccae8882fa48671377bf40a27915a7 (diff)
download	gitpython-26e138cb47dccc859ff219f108ce9b7d96cbcbcd.tar.gz