2 files changed, 202 insertions, 24 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
index fd1b640a..204da9ad 100644
--- a/lib/git/odb/db.py
+++ b/lib/git/odb/db.py
@@ -1,6 +1,21 @@
 """Contains implementations of database retrieveing objects"""
 import os
 from git.errors import InvalidDBRoot
+from git.utils import IndexFileSHA1Writer
+
+from utils import (
+		to_hex_sha,
+		exists,
+		hex_to_bin,
+		FDCompressedSha1Writer,
+		isdir,
+		mkdir,
+		rename,
+		dirname,
+		join
+	)
+
+import tempfile
 
 
 class iObjectDBR(object):
@@ -9,29 +24,29 @@ class iObjectDBR(object):
 	by sha (20 bytes)"""
 	__slots__ = tuple()
 	
+	def __contains__(self, sha):
+		return self.has_obj
+	
 	#{ Query Interface 
-	def has_obj_hex(self, hexsha):
-		""":return: True if the object identified by the given 40 byte hexsha is 
-		contained in the database"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def has_obj_bin(self, sha):
-		""":return: as ``has_obj_hex``, but takes a 20 byte binary sha"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def obj_hex(self, hexsha):
-		""":return: tuple(type_string, size_in_bytes, stream) a tuple with object
-		information including its type, its size as well as a stream from which its
-		contents can be read"""
+	def has_object(self, sha):
+		"""
+		:return: True if the object identified by the given 40 byte hexsha or 20 bytes
+			binary sha is contained in the database"""
 		raise NotImplementedError("To be implemented in subclass")
 		
-	def obj_bin(self, sha):
-		""":return: as in ``obj_hex``, but takes a binary sha"""
+	def object(self, sha):
+		"""
+		:return: tuple(type_string, size_in_bytes, stream) a tuple with object
+			information including its type, its size as well as a stream from which its
+			contents can be read
+		:param sha: 40 bytes hexsha or 20 bytes binary sha  """
 		raise NotImplementedError("To be implemented in subclass")
 		
-	def obj_info_hex(self, hexsha):
-		""":return: tuple(type_string, size_in_bytes) tuple with the object's type 
-			string as well as its size in bytes"""
+	def object_info(self, sha):
+		"""
+		:return: tuple(type_string, size_in_bytes) tuple with the object's type 
+			string as well as its size in bytes
+		:param sha: 40 bytes hexsha or 20 bytes binary sha"""
 		raise NotImplementedError("To be implemented in subclass")
 			
 	#} END query interface
@@ -42,7 +57,7 @@ class iObjectDBW(object):
 	
 	#{ Edit Interface
 	
-	def to_obj(self, type, size, stream, dry_run=False, sha_as_hex=True):
+	def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
 		"""Create a new object in the database
 		:return: the sha identifying the object in the database
 		:param type: type string identifying the object
@@ -53,7 +68,7 @@ class iObjectDBW(object):
 			hex encoded, not binary"""
 		raise NotImplementedError("To be implemented in subclass")
 	
-	def to_objs(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
+	def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
 		"""Create multiple new objects in the database
 		:return: sequence of shas identifying the created objects in the order in which 
 			they where given.
@@ -68,7 +83,7 @@ class iObjectDBW(object):
 		# actually use multiple threads, default False of course. If the add
 		shas = list()
 		for args in iter_info:
-			shas.append(self.to_obj(*args, dry_run=dry_run, sha_as_hex=sha_as_hex))
+			shas.append(self.to_object(*args, dry_run=dry_run, sha_as_hex=sha_as_hex))
 		return shas
 		
 	#} END edit interface
@@ -95,18 +110,103 @@ class FileDBBase(object):
 		""":return: path at which this db operates"""
 		return self._root_path
 	
+	def db_path(self, rela_path):
+		"""
+		:return: the given relative path relative to our database root, allowing 
+			to pontentially access datafiles"""
+		return join(self._root_path, rela_path)
 	#} END interface
 		
 	#{ Utiltities
-	def _root_rela_path(self, rela_path):
-		""":return: the given relative path relative to our database root"""
-		return os.path.join(self._root_path, rela_path)
+	
 		
 	#} END utilities
 	
 	
 class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
 	"""A database which operates on loose object files"""
+	__slots__ = ('_hexsha_to_file', )
+	
+	# CONFIGURATION
+	# chunks in which data will be copied between streams
+	stream_chunk_size = 1000*1000
+	
+	def __init__(self, root_path):
+		super(LooseObjectDB, self).__init__(root_path)
+		self._hexsha_to_file = dict()
+	
+	#{ Interface 
+	def hexsha_to_object_path(self, hexsha):
+		"""
+		:return: path at which the object with the given hexsha would be stored, 
+			relative to the database root"""
+		return join(hexsha[:2], hexsha[2:])
+	
+	#} END interface
+	
+	def has_object(self, sha):
+		sha = to_hex_sha(sha)
+		# try cache
+		if sha in self._hexsha_to_file:
+			return True
+			
+		# try filesystem
+		path = self.db_path(self.hexsha_to_object_path(sha))
+		if exists(path):
+			self._hexsha_to_file[sha] = path
+			return True
+		# END handle cache
+		return False
+	
+	def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True):
+		# open a tmp file to write the data to
+		fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+		writer = FDCompressedSha1Writer(fd)
+		
+		# WRITE HEADER: type SP size NULL
+		writer.write("%s %i%s" % (type, size, chr(0)))
+		
+		# WRITE ALL DATA
+		chunksize = self.stream_chunk_size
+		try:
+			try:
+				while True:
+					data_len = writer.write(stream.read(chunksize))
+					if data_len < chunksize:
+						# WRITE FOOTER
+						writer.write('\n')
+						break
+					# END check for stream end
+				# END duplicate data
+			finally:
+				writer.close()
+			# END assure file was closed
+		except:
+			os.remove(tmp_path)
+			raise
+		# END assure tmpfile removal on error
+		
+		
+		# in dry-run mode, we delete the file afterwards
+		sha = writer.sha(as_hex=True)
+		
+		if dry_run:
+			os.remove(tmp_path)
+		else:
+			# rename the file into place
+			obj_path = self.db_path(self.hexsha_to_object_path(sha))
+			obj_dir = dirname(obj_path)
+			if not isdir(obj_dir):
+				mkdir(obj_dir)
+			# END handle destination directory
+			rename(tmp_path, obj_path)
+		# END handle dry_run
+		
+		if not sha_as_hex:
+			sha = hex_to_bin(sha)
+		# END handle sha format
+		
+		return sha
 	
 	
 class PackedDB(FileDBBase, iObjectDBR):
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
new file mode 100644
index 00000000..04d3eaba
--- /dev/null
+++ b/lib/git/odb/utils.py
@@ -0,0 +1,78 @@
+import binascii
+import os
+import zlib
+from git.utils import make_sha
+
+__all__ = ('FDSha1Writer', )
+
+#{ Routines
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+def to_hex_sha(sha):
+	""":return: hexified version  of sha"""
+	if len(sha) == 40:
+		return sha
+	return bin_to_hex(sha)
+	
+def to_bin_sha(sha):
+	if len(sha) == 20:
+		return sha
+	return hex_to_bin(sha)
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+isdir = os.path.isdir
+rename = os.rename
+dirname = os.path.dirname
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+#} END Routines
+
+
+#{ Classes
+
+class FDCompressedSha1Writer(object):
+	"""Digests data written to it, making the sha available, then compress the 
+	data and write it to the file descriptor
+	:note: operates on raw file descriptors
+	:note: for this to work, you have to use the close-method of this instance"""
+	__slots__ = ("fd", "sha1", "zip")
+	
+	# default exception
+	exc = IOError("Failed to write all bytes to filedescriptor")
+	
+	def __init__(self, fd):
+		self.fd = fd
+		self.sha1 = make_sha("")
+		self.zip = zlib.compressobj()
+
+	def write(self, data):
+		""":raise IOError: If not all bytes could be written
+		:return: lenght of incoming data"""
+		self.sha1.update(data)
+		cdata = self.zip.compress(data)
+		bytes_written = write(self.fd, cdata)
+		if bytes_written != len(cdata):
+			raise self.exc
+		return bytes_written
+
+	def sha(self, as_hex = False):
+		""":return: sha so far
+		:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+		if as_hex:
+			return self.sha1.hexdigest()
+		return self.sha1.digest()
+
+	def close(self):
+		remainder = self.zip.flush()
+		if write(self.fd, remainder) != len(remainder):
+			raise self.exc
+		return close(self.fd)
+
+
+#} END classes