3 files changed, 117 insertions, 53 deletions
diff --git a/lib/git/index/base.py b/lib/git/index/base.py
index a605c3ec..96a9430c 100644
--- a/lib/git/index/base.py
+++ b/lib/git/index/base.py
@@ -5,13 +5,13 @@
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Module containing Index implementation, allowing to perform all kinds of index
 manipulations such as querying and merging. """
-import binascii
 import tempfile
 import os
 import sys
 import subprocess
 import glob
 from cStringIO import StringIO
+from binascii import b2a_hex
 
 from stat import (
 					S_ISLNK,
@@ -25,16 +25,12 @@ from stat import (
 from typ import (
 					BaseIndexEntry, 
 					IndexEntry, 
-					CE_NAMEMASK,
-					CE_STAGESHIFT
 				)
 
 from util import (
 					TemporaryFileSwap,
 					post_clear_cache, 
 					default_index,
-					pack, 
-					unpack
 				)
 
 import git.objects
@@ -60,20 +56,17 @@ from git.utils import (
 							LockedFD, 
 							join_path_native, 
 							file_contents_ro,
-							LockFile
-						)
-
-
-from gitdb.base import (
-							IStream
 						)
 
 from fun import (
 					write_cache,
 					read_cache,
+					write_tree_from_cache,
 					entry_key
 				)
 
+from gitdb.base import IStream
+
 __all__ = ( 'IndexFile', 'CheckoutError' )
 
 
@@ -161,10 +154,15 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)
 		return self
 		
-	def _serialize(self, stream, ignore_tree_extension_data=False):
+	def _entries_sorted(self):
+		""":return: list of entries, in a sorted fashion, first by path, then by stage"""
 		entries_sorted = self.entries.values()
-		entries_sorted.sort(key=lambda e: (e[3], e.stage))		# use path/stage as sort key
-		write_cache(entries_sorted,
+		entries_sorted.sort(key=lambda e: (e.path, e.stage))		# use path/stage as sort key
+		return entries_sorted
+		
+	def _serialize(self, stream, ignore_tree_extension_data=False):
+		entries = self._entries_sorted()
+		write_cache(entries,
 					stream,
 					(ignore_tree_extension_data and None) or self._extension_data) 
 		return self
@@ -403,7 +401,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			# TODO: is it necessary to convert the mode ? We did that when adding 
 			# it to the index, right ?
 			mode = self._stat_mode_to_index_mode(entry.mode)
-			blob = Blob(self.repo, entry.sha, mode, entry.path)
+			blob = Blob(self.repo, entry.hexsha, mode, entry.path)
 			blob.size = entry.size
 			output = (entry.stage, blob)
 			if predicate(output):
@@ -490,33 +488,31 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		# allows to lazily reread on demand
 		return self
 
-	def _write_tree(self, missing_ok=False):
+	def write_tree(self):
 		"""Writes this index to a corresponding Tree object into the repository's
 		object database and return it.
-
-		:param missing_ok:
-			If True, missing objects referenced by this index will not result
-			in an error.
-
-		:return: Tree object representing this index"""
+		
+		:return: Tree object representing this index
+		:note: The tree will be written even if one or more objects the tree refers to 
+			does not yet exist in the object database. This could happen if you added
+			Entries to the index directly.
+		:raise ValueError: if there are no entries in the cache
+		:raise UnmergedEntriesError: """
 		# we obtain no lock as we just flush our contents to disk as tree
 		if not self.entries:
 			raise ValueError("Cannot write empty index")
 		
+		# TODO: use memory db, this helps to prevent IO if the resulting tree
+		# already exists
+		entries = self._entries_sorted()
+		binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries)))
 		
+		# note: additional deserialization could be saved if write_tree_from_cache
+		# would return sorted tree entries
+		root_tree = Tree(self.repo, b2a_hex(binsha), path='')
+		root_tree._cache = tree_items
+		return root_tree
 		
-		return Tree(self.repo, tree_sha, 0, '')
-		
-	def write_tree(self, missing_ok = False):
-		index_path = self._index_path()
-		tmp_index_mover = TemporaryFileSwap(index_path)
-		
-		self.write(index_path, ignore_tree_extension_data=True)
-		tree_sha = self.repo.git.write_tree(missing_ok=missing_ok)
-		
-		del(tmp_index_mover)	   # as soon as possible
-		return Tree(self.repo, tree_sha, 0, '')
-
 	def _process_diff_args(self, args):
 		try:
 			args.pop(args.index(self))
@@ -525,7 +521,6 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		# END remove self
 		return args
 
-
 	def _to_relative_path(self, path):
 		""":return: Version of path relative to our git directory or raise ValueError
 		if it is not within our git direcotory"""
@@ -599,7 +594,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 
 			- BaseIndexEntry or type
 				Handling equals the one of Blob objects, but the stage may be
-				explicitly set.
+				explicitly set. Please note that Index Entries require binary sha's.
 
 		:param force:
 			If True, otherwise ignored or excluded files will be
@@ -666,7 +661,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			fprogress(filepath, True, filepath)
 			
 			return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode), 
-									istream.sha, 0, filepath))
+									istream.binsha, 0, filepath))
 		# END utility method
 
 
@@ -691,14 +686,14 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 
 			# HANLDE ENTRY OBJECT CREATION
 			# create objects if required, otherwise go with the existing shas
-			null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ]
+			null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ]
 			if null_entries_indices:
 				for ei in null_entries_indices:
 					null_entry = entries[ei]
 					new_entry = store_path(null_entry.path)
 					
 					# update null entry
-					entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path))
+					entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
 				# END for each entry index
 			# END null_entry handling
 
@@ -707,7 +702,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			# all object sha's
 			if path_rewriter:
 				for i,e in enumerate(entries):
-					entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e)))
+					entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
 				# END for each entry
 			# END handle path rewriting
 
diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py
index 2e653ea6..557941d5 100644
--- a/lib/git/index/fun.py
+++ b/lib/git/index/fun.py
@@ -2,6 +2,11 @@
 Contains standalone functions to accompany the index implementation and make it
 more versatile
 """
+from stat import S_IFDIR
+from cStringIO import StringIO
+
+from git.errors import UnmergedEntriesError
+from git.objects.fun import tree_to_stream
 from git.utils import (
 							IndexFileSHA1Writer, 
 						)
@@ -16,12 +21,11 @@ from util import 	(
 					unpack
 					)
 
-from binascii import (
-						hexlify, 
-						unhexlify
-					)
+from gitdb.base import IStream
+from gitdb.typ import str_tree_type
+from binascii import a2b_hex
 
-__all__ = ('write_cache', 'read_cache' )
+__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' )
 
 def write_cache_entry(entry, stream):
 	"""Write the given entry to the stream"""
@@ -34,7 +38,7 @@ def write_cache_entry(entry, stream):
 	assert plen == len(path), "Path %s too long to fit into index" % entry[3]
 	flags = plen | entry[2]
 	write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0],
-								entry[8], entry[9], entry[10], unhexlify(entry[1]), flags))
+								entry[8], entry[9], entry[10], entry[1], flags))
 	write(path)
 	real_size = ((stream.tell() - beginoffset + 8) & ~7)
 	write("\0" * ((beginoffset + real_size) - stream.tell()))
@@ -80,7 +84,7 @@ def read_entry(stream):
 
 	real_size = ((stream.tell() - beginoffset + 8) & ~7)
 	data = stream.read((beginoffset + real_size) - stream.tell())
-	return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size))
+	return IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
 
 def read_header(stream):
 		"""Return tuple(version_long, num_entries) from the given stream"""
@@ -136,3 +140,58 @@ def read_cache(stream):
 	
 	return (version, entries, extension_data, content_sha)
 	
+def write_tree_from_cache(entries, odb, sl, si=0):
+	"""Create a tree from the given sorted list of entries and put the respective
+	trees into the given object database
+	:param entries: **sorted** list of IndexEntries
+	:param odb: object database to store the trees in
+	:param si: start index at which we should start creating subtrees
+	:param sl: slice indicating the range we should process on the entries list
+	:return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of 
+		tree entries being a tuple of hexsha, mode, name"""
+	tree_items = list()
+	ci = sl.start
+	end = sl.stop
+	while ci < end:
+		entry = entries[ci]
+		if entry.stage != 0:
+			raise UnmergedEntriesError(entry)
+		# END abort on unmerged
+		ci += 1
+		rbound = entry.path.find('/', si)
+		if rbound == -1:
+			# its not a tree
+			tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
+		else:
+			# find common base range
+			base = entry.path[si:rbound]
+			xi = ci
+			while xi < end:
+				oentry = entries[xi]
+				xi += 1
+				orbound = oentry.path.find('/')
+				if orbound == -1 or oentry.path[si:orbound] != base:
+					break
+				# END abort on base mismatch
+			# END find common base
+			
+			# enter recursion
+			# ci - 1 as we want to count our current item as well
+			sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1)
+			tree_items.append((sha, S_IFDIR, base))
+			
+			# skip ahead
+			ci = xi
+		# END handle bounds 
+	# END for each entry
+	
+	# finally create the tree
+	sio = StringIO()
+	tree_to_stream(tree_items, sio.write)
+	sio.seek(0)
+	
+	istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
+	return (istream.binsha, tree_items)
+	
+	
+	
diff --git a/lib/git/index/typ.py b/lib/git/index/typ.py
index b5dac58a..6ef1d2f2 100644
--- a/lib/git/index/typ.py
+++ b/lib/git/index/typ.py
@@ -5,6 +5,11 @@ from util import (
 					unpack
 				)
 
+from binascii import (
+						b2a_hex,
+						a2b_hex
+					)
+
 __all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry')
 
 #{ Invariants
@@ -50,7 +55,7 @@ class BaseIndexEntry(tuple):
 	use numeric indices for performance reasons. """
 
 	def __str__(self):
-		return "%o %s %i\t%s" % (self.mode, self.sha, self.stage, self.path)
+		return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path)
 
 	@property
 	def mode(self):
@@ -58,9 +63,14 @@ class BaseIndexEntry(tuple):
 		return self[0]
 
 	@property
-	def sha(self):
-		""" hex sha of the blob """
+	def binsha(self):
+		"""binary sha of the blob """
 		return self[1]
+		
+	@property
+	def hexsha(self):
+		"""hex version of our sha"""
+		return b2a_hex(self[1])
 
 	@property
 	def stage(self):
@@ -88,7 +98,7 @@ class BaseIndexEntry(tuple):
 	@classmethod
 	def from_blob(cls, blob, stage = 0):
 		""":return: Fully equipped BaseIndexEntry at the given stage"""
-		return cls((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path))
+		return cls((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path))
 
 
 class IndexEntry(BaseIndexEntry):
@@ -145,12 +155,12 @@ class IndexEntry(BaseIndexEntry):
 
 		:param base: Instance of type BaseIndexEntry"""
 		time = pack(">LL", 0, 0)
-		return IndexEntry((base.mode, base.sha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
+		return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
 
 	@classmethod
 	def from_blob(cls, blob, stage = 0):
 		""":return: Minimal entry resembling the given blob object"""
 		time = pack(">LL", 0, 0)
-		return IndexEntry((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
+		return IndexEntry((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))