initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions

author: Sebastian Thiel <byronimo@gmail.com> 2010-06-04 14:41:15 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-06-04 14:41:15 +0200
commit: a1e80445ad5cb6da4c0070d7cb8af89da3b0803b (patch)
tree: ff94069b9d49d5a06576a0838a1bbde1e8c992ae /lib/git/odb/db.py
parent: b01ca6a3e4ae9d944d799743c8ff774e2a7a82b6 (diff)
download: gitpython-a1e80445ad5cb6da4c0070d7cb8af89da3b0803b.tar.gz
1 files changed, 72 insertions, 42 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
index 1d1d4c40..7ae8f446 100644
--- a/lib/git/odb/db.py
+++ b/lib/git/odb/db.py
@@ -6,9 +6,12 @@ from git.errors import (
 	BadObjectType
 	)
 
-from utils import (
+from stream import (
 		DecompressMemMapReader,
-		FDCompressedSha1Writer,
+		FDCompressedSha1Writer
+	)
+
+from utils import (
 		ENOENT,
 		to_hex_sha,
 		exists,
@@ -31,7 +34,7 @@ import mmap
 import os
 
 
-class iObjectDBR(object):
+class ObjectDBR(object):
 	"""Defines an interface for object database lookup.
 	Objects are identified either by hex-sha (40 bytes) or 
 	by sha (20 bytes)"""
@@ -48,62 +51,87 @@ class iObjectDBR(object):
 		:raise BadObject:"""
 		raise NotImplementedError("To be implemented in subclass")
 		
-	def object(self, sha):
-		"""
-		:return: tuple(type_string, size_in_bytes, stream) a tuple with object
-			information including its type, its size as well as a stream from which its
-			contents can be read
+	def info(self, sha):
+		""" :return: ODB_Info instance
 		:param sha: 40 bytes hexsha or 20 bytes binary sha
 		:raise BadObject:"""
 		raise NotImplementedError("To be implemented in subclass")
 		
-	def object_info(self, sha):
-		"""
-		:return: tuple(type_string, size_in_bytes) tuple with the object's type 
-			string as well as its size in bytes
+	def info_async(self, input_channel):
+		"""Retrieve information of a multitude of objects asynchronously
+		:param input_channel: Channel yielding the sha's of the objects of interest
+		:return: Channel yielding ODB_Info|InvalidODB_Info, in any order"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream(self, sha):
+		""":return: ODB_OStream instance
 		:param sha: 40 bytes hexsha or 20 bytes binary sha
 		:raise BadObject:"""
 		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream_async(self, input_channel):
+		"""Retrieve the ODB_OStream of multiple objects
+		:param input_channel: see ``info``
+		:param max_threads: see ``ObjectDBW.store``
+		:return: Channel yielding ODB_OStream|InvalidODB_OStream instances in any order"""
+		raise NotImplementedError("To be implemented in subclass")
 			
 	#} END query interface
 	
-class iObjectDBW(object):
+class ObjectDBW(object):
 	"""Defines an interface to create objects in the database"""
-	__slots__ = tuple()
+	__slots__ = "_ostream"
+	
+	def __init__(self, *args, **kwargs):
+		self._ostream = None
 	
 	#{ Edit Interface
+	def set_ostream(self, stream):
+		"""Adjusts the stream to which all data should be sent when storing new objects
+		:param stream: if not None, the stream to use, if None the default stream
+			will be used.
+		:return: previously installed stream, or None if there was no override
+		:raise TypeError: if the stream doesn't have the supported functionality"""
+		cstream = self._ostream
+		self._ostream = stream
+		return cstream
+		
+	def ostream(self):
+		""":return: overridden output stream this instance will write to, or None
+			if it will write to the default stream"""
+			return self._ostream
 	
-	def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
+	def store(self, istream):
 		"""Create a new object in the database
-		:return: the sha identifying the object in the database
-		:param type: type string identifying the object
-		:param size: size of the data to read from stream
-		:param stream: stream providing the data
-		:param dry_run: if True, the object database will not actually be changed
-		:param sha_as_hex: if True, the returned sha identifying the object will be 
-			hex encoded, not binary
+		:return: the input istream object with its sha set to its corresponding value
+		:param istream: ODB_IStream compatible instance. If its sha is already set 
+			to a value, the object will just be stored in the our database format, 
+			in which case the input stream is expected to be in object format ( header + contents ).
 		:raise IOError: if data could not be written"""
 		raise NotImplementedError("To be implemented in subclass")
 	
-	def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
-		"""Create multiple new objects in the database
-		:return: sequence of shas identifying the created objects in the order in which 
-			they where given.
-		:param iter_info: iterable yielding tuples containing the type_string
-			size_in_bytes and the steam with the content data.
-		:param dry_run: see ``to_object``
-		:param sha_as_hex: see ``to_object``
-		:param max_threads: if < 1, any number of threads may be started while processing
-			the request, otherwise the given number of threads will be started.
-		:raise IOError: if data could not be written"""
+	def store_async(self, input_channel):
+		"""Create multiple new objects in the database asynchronously. The method will 
+		return right away, returning an output channel which receives the results as 
+		they are computed.
+		
+		:return: Channel yielding your ODB_IStream which served as input, in any order.
+			The IStreams sha will be set to the sha it received during the process, 
+			or its error attribute will be set to the exception informing about the error.
+		:param input_channel: Channel yielding ODB_IStream instance.
+			As the same instances will be used in the output channel, you can create a map
+			between the id(istream) -> istream
+		:note:As some ODB implementations implement this operation as atomic, they might 
+			abort the whole operation if one item could not be processed. Hence check how 
+			many items have actually been produced."""
 		# a trivial implementation, ignoring the threads for now
 		# TODO: add configuration to the class to determine whether we may 
 		# actually use multiple threads, default False of course. If the add
 		shas = list()
 		for args in iter_info:
-			shas.append(self.to_object(dry_run=dry_run, sha_as_hex=sha_as_hex, *args))
+			shas.append(self.store(dry_run=dry_run, sha_as_hex=sha_as_hex, *args))
 		return shas
-		
+	
 	#} END edit interface
 	
 
@@ -118,6 +146,7 @@ class FileDBBase(object):
 		:raise InvalidDBRoot: 
 		:note: The base will perform basic checking for accessability, but the subclass
 			is required to verify that the root_path contains the database structure it needs"""
+		super(FileDBBase, self).__init__()
 		if not os.path.isdir(root_path):
 			raise InvalidDBRoot(root_path)
 		self._root_path = root_path
@@ -141,7 +170,7 @@ class FileDBBase(object):
 	#} END utilities
 	
 	
-class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
+class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
 	"""A database which operates on loose object files"""
 	__slots__ = ('_hexsha_to_file', '_fd_open_flags')
 	# CONFIGURATION
@@ -210,7 +239,7 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
 			os.close(fd)
 		# END assure file is closed
 			
-	def object_info(self, sha):
+	def info(self, sha):
 		m = self._map_loose_object(sha)
 		try:
 			return loose_object_header_info(m)
@@ -233,8 +262,9 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
 			return False
 		# END check existance
 	
-	def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
+	def store(self, istream):
 		# open a tmp file to write the data to
+		# todo: implement ostream properly
 		fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
 		writer = FDCompressedSha1Writer(fd)
 	
@@ -269,11 +299,11 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
 		return sha
 	
 	
-class PackedDB(FileDBBase, iObjectDBR):
+class PackedDB(FileDBBase, ObjectDBR):
 	"""A database operating on a set of object packs"""
 	
 	
-class CompoundDB(iObjectDBR):
+class CompoundDB(ObjectDBR):
 	"""A database which delegates calls to sub-databases"""
 	
 
@@ -281,7 +311,7 @@ class ReferenceDB(CompoundDB):
 	"""A database consisting of database referred to in a file"""
 	
 	
-#class GitObjectDB(CompoundDB, iObjectDBW):
+#class GitObjectDB(CompoundDB, ObjectDBW):
 class GitObjectDB(LooseObjectDB):
 	"""A database representing the default git object store, which includes loose 
 	objects, pack files and an alternates file
@@ -296,7 +326,7 @@ class GitObjectDB(LooseObjectDB):
 		super(GitObjectDB, self).__init__(root_path)
 		self._git = git
 		
-	def object_info(self, sha):
+	def info(self, sha):
 		discard, type, size = self._git.get_object_header(sha)
 		return type, size
author	Sebastian Thiel <byronimo@gmail.com>	2010-06-04 14:41:15 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-06-04 14:41:15 +0200
commit	a1e80445ad5cb6da4c0070d7cb8af89da3b0803b (patch)
tree	ff94069b9d49d5a06576a0838a1bbde1e8c992ae /lib/git/odb/db.py
parent	b01ca6a3e4ae9d944d799743c8ff774e2a7a82b6 (diff)
download	gitpython-a1e80445ad5cb6da4c0070d7cb8af89da3b0803b.tar.gz