11 files changed, 266 insertions, 187 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index 7a0d000d..7860a3c1 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -28,7 +28,7 @@ from git.config import GitConfigParser
 from git.objects import *
 from git.refs import *
 from git.diff import *
-from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
+from git.errors import *
 from git.cmd import Git
 from git.repo import Repo
 from git.remote import *
diff --git a/lib/git/db.py b/lib/git/db.py
index f3698b76..b7cf0fc7 100644
--- a/lib/git/db.py
+++ b/lib/git/db.py
@@ -4,9 +4,12 @@ from gitdb.base import (
 								OStream
 							)
 
+from gitdb.util import to_hex_sha
+
 from gitdb.db import GitDB
 from gitdb.db import LooseObjectDB
 
+
 __all__ = ('GitCmdObjectDB', 'GitDB' )
 
 #class GitCmdObjectDB(CompoundDB, ObjectDBW):
@@ -24,11 +27,11 @@ class GitCmdObjectDB(LooseObjectDB):
 		self._git = git
 		
 	def info(self, sha):
-		t = self._git.get_object_header(sha)
+		t = self._git.get_object_header(to_hex_sha(sha))
 		return OInfo(*t)
 		
 	def stream(self, sha):
 		"""For now, all lookup is done by git itself"""
-		t = self._git.stream_object_data(sha)
+		t = self._git.stream_object_data(to_hex_sha(sha))
 		return OStream(*t)
 	
diff --git a/lib/git/diff.py b/lib/git/diff.py
index 9df0c499..f9a0a66f 100644
--- a/lib/git/diff.py
+++ b/lib/git/diff.py
@@ -196,7 +196,7 @@ class Diff(object):
                                     \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
                             """, re.VERBOSE | re.MULTILINE)
     # can be used for comparisons
-    null_hex_sha = "0"*40
+    NULL_HEX_SHA = "0"*40
     
     __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", 
                  "rename_from", "rename_to", "diff")
diff --git a/lib/git/errors.py b/lib/git/errors.py
index 5fe99915..93919d5e 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -6,43 +6,51 @@
 """ Module containing all exceptions thrown througout the git package, """
 
 class InvalidGitRepositoryError(Exception):
-    """ Thrown if the given repository appears to have an invalid format.  """
+	""" Thrown if the given repository appears to have an invalid format.  """
 
 
 class NoSuchPathError(OSError):
-    """ Thrown if a path could not be access by the system. """
+	""" Thrown if a path could not be access by the system. """
 
 
 class GitCommandError(Exception):
-    """ Thrown if execution of the git command fails with non-zero status code. """
-    def __init__(self, command, status, stderr=None):
-        self.stderr = stderr
-        self.status = status
-        self.command = command
-        
-    def __str__(self):
-        return ("'%s' returned exit status %i: %s" %
-                    (' '.join(str(i) for i in self.command), self.status, self.stderr))
+	""" Thrown if execution of the git command fails with non-zero status code. """
+	def __init__(self, command, status, stderr=None):
+		self.stderr = stderr
+		self.status = status
+		self.command = command
+		
+	def __str__(self):
+		return ("'%s' returned exit status %i: %s" %
+					(' '.join(str(i) for i in self.command), self.status, self.stderr))
 
 
 class CheckoutError( Exception ):
-    """Thrown if a file could not be checked out from the index as it contained
-    changes.
-
-    The .failed_files attribute contains a list of relative paths that failed
-    to be checked out as they contained changes that did not exist in the index.
-
-    The .failed_reasons attribute contains a string informing about the actual
-    cause of the issue.
-
-    The .valid_files attribute contains a list of relative paths to files that
-    were checked out successfully and hence match the version stored in the
-    index"""
-    def __init__(self, message, failed_files, valid_files, failed_reasons):
-        Exception.__init__(self, message)
-        self.failed_files = failed_files
-        self.failed_reasons = failed_reasons
-        self.valid_files = valid_files
-
-    def __str__(self):
-        return Exception.__str__(self) + ":%s" % self.failed_files
+	"""Thrown if a file could not be checked out from the index as it contained
+	changes.
+
+	The .failed_files attribute contains a list of relative paths that failed
+	to be checked out as they contained changes that did not exist in the index.
+
+	The .failed_reasons attribute contains a string informing about the actual
+	cause of the issue.
+
+	The .valid_files attribute contains a list of relative paths to files that
+	were checked out successfully and hence match the version stored in the
+	index"""
+	def __init__(self, message, failed_files, valid_files, failed_reasons):
+		Exception.__init__(self, message)
+		self.failed_files = failed_files
+		self.failed_reasons = failed_reasons
+		self.valid_files = valid_files
+
+	def __str__(self):
+		return Exception.__str__(self) + ":%s" % self.failed_files
+		
+		
+class CacheError(Exception):
+	"""Base for all errors related to the git index, which is called cache internally"""
+
+class UnmergedEntriesError(CacheError):
+	"""Thrown if an operation cannot proceed as there are still unmerged 
+	entries in the cache"""
diff --git a/lib/git/index/base.py b/lib/git/index/base.py
index a605c3ec..96a9430c 100644
--- a/lib/git/index/base.py
+++ b/lib/git/index/base.py
@@ -5,13 +5,13 @@
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Module containing Index implementation, allowing to perform all kinds of index
 manipulations such as querying and merging. """
-import binascii
 import tempfile
 import os
 import sys
 import subprocess
 import glob
 from cStringIO import StringIO
+from binascii import b2a_hex
 
 from stat import (
 					S_ISLNK,
@@ -25,16 +25,12 @@ from stat import (
 from typ import (
 					BaseIndexEntry, 
 					IndexEntry, 
-					CE_NAMEMASK,
-					CE_STAGESHIFT
 				)
 
 from util import (
 					TemporaryFileSwap,
 					post_clear_cache, 
 					default_index,
-					pack, 
-					unpack
 				)
 
 import git.objects
@@ -60,20 +56,17 @@ from git.utils import (
 							LockedFD, 
 							join_path_native, 
 							file_contents_ro,
-							LockFile
-						)
-
-
-from gitdb.base import (
-							IStream
 						)
 
 from fun import (
 					write_cache,
 					read_cache,
+					write_tree_from_cache,
 					entry_key
 				)
 
+from gitdb.base import IStream
+
 __all__ = ( 'IndexFile', 'CheckoutError' )
 
 
@@ -161,10 +154,15 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)
 		return self
 		
-	def _serialize(self, stream, ignore_tree_extension_data=False):
+	def _entries_sorted(self):
+		""":return: list of entries, in a sorted fashion, first by path, then by stage"""
 		entries_sorted = self.entries.values()
-		entries_sorted.sort(key=lambda e: (e[3], e.stage))		# use path/stage as sort key
-		write_cache(entries_sorted,
+		entries_sorted.sort(key=lambda e: (e.path, e.stage))		# use path/stage as sort key
+		return entries_sorted
+		
+	def _serialize(self, stream, ignore_tree_extension_data=False):
+		entries = self._entries_sorted()
+		write_cache(entries,
 					stream,
 					(ignore_tree_extension_data and None) or self._extension_data) 
 		return self
@@ -403,7 +401,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			# TODO: is it necessary to convert the mode ? We did that when adding 
 			# it to the index, right ?
 			mode = self._stat_mode_to_index_mode(entry.mode)
-			blob = Blob(self.repo, entry.sha, mode, entry.path)
+			blob = Blob(self.repo, entry.hexsha, mode, entry.path)
 			blob.size = entry.size
 			output = (entry.stage, blob)
 			if predicate(output):
@@ -490,33 +488,31 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		# allows to lazily reread on demand
 		return self
 
-	def _write_tree(self, missing_ok=False):
+	def write_tree(self):
 		"""Writes this index to a corresponding Tree object into the repository's
 		object database and return it.
-
-		:param missing_ok:
-			If True, missing objects referenced by this index will not result
-			in an error.
-
-		:return: Tree object representing this index"""
+		
+		:return: Tree object representing this index
+		:note: The tree will be written even if one or more objects the tree refers to 
+			does not yet exist in the object database. This could happen if you added
+			Entries to the index directly.
+		:raise ValueError: if there are no entries in the cache
+		:raise UnmergedEntriesError: """
 		# we obtain no lock as we just flush our contents to disk as tree
 		if not self.entries:
 			raise ValueError("Cannot write empty index")
 		
+		# TODO: use memory db, this helps to prevent IO if the resulting tree
+		# already exists
+		entries = self._entries_sorted()
+		binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries)))
 		
+		# note: additional deserialization could be saved if write_tree_from_cache
+		# would return sorted tree entries
+		root_tree = Tree(self.repo, b2a_hex(binsha), path='')
+		root_tree._cache = tree_items
+		return root_tree
 		
-		return Tree(self.repo, tree_sha, 0, '')
-		
-	def write_tree(self, missing_ok = False):
-		index_path = self._index_path()
-		tmp_index_mover = TemporaryFileSwap(index_path)
-		
-		self.write(index_path, ignore_tree_extension_data=True)
-		tree_sha = self.repo.git.write_tree(missing_ok=missing_ok)
-		
-		del(tmp_index_mover)	   # as soon as possible
-		return Tree(self.repo, tree_sha, 0, '')
-
 	def _process_diff_args(self, args):
 		try:
 			args.pop(args.index(self))
@@ -525,7 +521,6 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 		# END remove self
 		return args
 
-
 	def _to_relative_path(self, path):
 		""":return: Version of path relative to our git directory or raise ValueError
 		if it is not within our git direcotory"""
@@ -599,7 +594,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 
 			- BaseIndexEntry or type
 				Handling equals the one of Blob objects, but the stage may be
-				explicitly set.
+				explicitly set. Please note that Index Entries require binary sha's.
 
 		:param force:
 			If True, otherwise ignored or excluded files will be
@@ -666,7 +661,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			fprogress(filepath, True, filepath)
 			
 			return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode), 
-									istream.sha, 0, filepath))
+									istream.binsha, 0, filepath))
 		# END utility method
 
 
@@ -691,14 +686,14 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 
 			# HANLDE ENTRY OBJECT CREATION
 			# create objects if required, otherwise go with the existing shas
-			null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ]
+			null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ]
 			if null_entries_indices:
 				for ei in null_entries_indices:
 					null_entry = entries[ei]
 					new_entry = store_path(null_entry.path)
 					
 					# update null entry
-					entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path))
+					entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
 				# END for each entry index
 			# END null_entry handling
 
@@ -707,7 +702,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
 			# all object sha's
 			if path_rewriter:
 				for i,e in enumerate(entries):
-					entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e)))
+					entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
 				# END for each entry
 			# END handle path rewriting
 
diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py
index 2e653ea6..557941d5 100644
--- a/lib/git/index/fun.py
+++ b/lib/git/index/fun.py
@@ -2,6 +2,11 @@
 Contains standalone functions to accompany the index implementation and make it
 more versatile
 """
+from stat import S_IFDIR
+from cStringIO import StringIO
+
+from git.errors import UnmergedEntriesError
+from git.objects.fun import tree_to_stream
 from git.utils import (
 							IndexFileSHA1Writer, 
 						)
@@ -16,12 +21,11 @@ from util import 	(
 					unpack
 					)
 
-from binascii import (
-						hexlify, 
-						unhexlify
-					)
+from gitdb.base import IStream
+from gitdb.typ import str_tree_type
+from binascii import a2b_hex
 
-__all__ = ('write_cache', 'read_cache' )
+__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' )
 
 def write_cache_entry(entry, stream):
 	"""Write the given entry to the stream"""
@@ -34,7 +38,7 @@ def write_cache_entry(entry, stream):
 	assert plen == len(path), "Path %s too long to fit into index" % entry[3]
 	flags = plen | entry[2]
 	write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0],
-								entry[8], entry[9], entry[10], unhexlify(entry[1]), flags))
+								entry[8], entry[9], entry[10], entry[1], flags))
 	write(path)
 	real_size = ((stream.tell() - beginoffset + 8) & ~7)
 	write("\0" * ((beginoffset + real_size) - stream.tell()))
@@ -80,7 +84,7 @@ def read_entry(stream):
 
 	real_size = ((stream.tell() - beginoffset + 8) & ~7)
 	data = stream.read((beginoffset + real_size) - stream.tell())
-	return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size))
+	return IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size))
 
 def read_header(stream):
 		"""Return tuple(version_long, num_entries) from the given stream"""
@@ -136,3 +140,58 @@ def read_cache(stream):
 	
 	return (version, entries, extension_data, content_sha)
 	
+def write_tree_from_cache(entries, odb, sl, si=0):
+	"""Create a tree from the given sorted list of entries and put the respective
+	trees into the given object database
+	:param entries: **sorted** list of IndexEntries
+	:param odb: object database to store the trees in
+	:param si: start index at which we should start creating subtrees
+	:param sl: slice indicating the range we should process on the entries list
+	:return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of 
+		tree entries being a tuple of hexsha, mode, name"""
+	tree_items = list()
+	ci = sl.start
+	end = sl.stop
+	while ci < end:
+		entry = entries[ci]
+		if entry.stage != 0:
+			raise UnmergedEntriesError(entry)
+		# END abort on unmerged
+		ci += 1
+		rbound = entry.path.find('/', si)
+		if rbound == -1:
+			# its not a tree
+			tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
+		else:
+			# find common base range
+			base = entry.path[si:rbound]
+			xi = ci
+			while xi < end:
+				oentry = entries[xi]
+				xi += 1
+				orbound = oentry.path.find('/')
+				if orbound == -1 or oentry.path[si:orbound] != base:
+					break
+				# END abort on base mismatch
+			# END find common base
+			
+			# enter recursion
+			# ci - 1 as we want to count our current item as well
+			sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1)
+			tree_items.append((sha, S_IFDIR, base))
+			
+			# skip ahead
+			ci = xi
+		# END handle bounds 
+	# END for each entry
+	
+	# finally create the tree
+	sio = StringIO()
+	tree_to_stream(tree_items, sio.write)
+	sio.seek(0)
+	
+	istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
+	return (istream.binsha, tree_items)
+	
+	
+	
diff --git a/lib/git/index/typ.py b/lib/git/index/typ.py
index b5dac58a..6ef1d2f2 100644
--- a/lib/git/index/typ.py
+++ b/lib/git/index/typ.py
@@ -5,6 +5,11 @@ from util import (
 					unpack
 				)
 
+from binascii import (
+						b2a_hex,
+						a2b_hex
+					)
+
 __all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry')
 
 #{ Invariants
@@ -50,7 +55,7 @@ class BaseIndexEntry(tuple):
 	use numeric indices for performance reasons. """
 
 	def __str__(self):
-		return "%o %s %i\t%s" % (self.mode, self.sha, self.stage, self.path)
+		return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path)
 
 	@property
 	def mode(self):
@@ -58,9 +63,14 @@ class BaseIndexEntry(tuple):
 		return self[0]
 
 	@property
-	def sha(self):
-		""" hex sha of the blob """
+	def binsha(self):
+		"""binary sha of the blob """
 		return self[1]
+		
+	@property
+	def hexsha(self):
+		"""hex version of our sha"""
+		return b2a_hex(self[1])
 
 	@property
 	def stage(self):
@@ -88,7 +98,7 @@ class BaseIndexEntry(tuple):
 	@classmethod
 	def from_blob(cls, blob, stage = 0):
 		""":return: Fully equipped BaseIndexEntry at the given stage"""
-		return cls((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path))
+		return cls((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path))
 
 
 class IndexEntry(BaseIndexEntry):
@@ -145,12 +155,12 @@ class IndexEntry(BaseIndexEntry):
 
 		:param base: Instance of type BaseIndexEntry"""
 		time = pack(">LL", 0, 0)
-		return IndexEntry((base.mode, base.sha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
+		return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0))
 
 	@classmethod
 	def from_blob(cls, blob, stage = 0):
 		""":return: Minimal entry resembling the given blob object"""
 		time = pack(">LL", 0, 0)
-		return IndexEntry((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
+		return IndexEntry((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size))
 
 
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 36bbccb2..90aa8ca2 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -21,6 +21,7 @@ class Object(LazyMixin):
 		inst.data	# byte string containing the whole data of the object
 	"""
 	NULL_HEX_SHA = '0'*40
+	NULL_BIN_SHA = '\0'*20
 	TYPES = ("blob", "tree", "commit", "tag")
 	__slots__ = ("repo", "sha", "size", "data" )
 	type = None			# to be set by subclass
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
new file mode 100644
index 00000000..7882437d
--- /dev/null
+++ b/lib/git/objects/fun.py
@@ -0,0 +1,66 @@
+"""Module with functions which are supposed to be as fast as possible"""
+
+__all__ = ('tree_to_stream', 'tree_entries_from_data')
+
+def tree_to_stream(entries, write):
+	"""Write the give list of entries into a stream using its write method
+	:param entries: **sorted** list of tuples with (binsha, mode, name)
+	:param write: write method which takes a data string"""
+	ord_zero = ord('0')
+	bit_mask = 7			# 3 bits set
+	
+	for binsha, mode, name in entries:
+		mode_str = ''
+		for i in xrange(6):
+			mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
+		# END for each 8 octal value
+		
+		# git slices away the first octal if its zero
+		if mode_str[0] == '0':
+			mode_str = mode_str[1:]
+		# END save a byte
+
+		write("%s %s\0%s" % (mode_str, name, binsha)) 
+	# END for each item
+
+
+def tree_entries_from_data(data):
+	"""Reads the binary representation of a tree and returns tuples of Tree items
+	:param data: data block with tree data
+	:return: list(tuple(binsha, mode, tree_relative_path), ...)"""
+	ord_zero = ord('0')
+	len_data = len(data)
+	i = 0
+	out = list()
+	while i < len_data:
+		mode = 0
+		
+		# read mode
+		# Some git versions truncate the leading 0, some don't
+		# The type will be extracted from the mode later
+		while data[i] != ' ':
+			# move existing mode integer up one level being 3 bits
+			# and add the actual ordinal value of the character
+			mode = (mode << 3) + (ord(data[i]) - ord_zero)
+			i += 1
+		# END while reading mode
+		
+		# byte is space now, skip it
+		i += 1
+		
+		# parse name, it is NULL separated
+		
+		ns = i
+		while data[i] != '\0':
+			i += 1
+		# END while not reached NULL
+		name = data[ns:i]
+		
+		# byte is NULL, get next 20
+		i += 1
+		sha = data[i:i+20]
+		i = i + 20
+		
+		out.append((sha, mode, name))
+	# END for each byte in data stream
+	return out
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index eb8aa9eb..6b1d13c1 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -5,21 +5,21 @@
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 
 import os
+import utils
+import base
+
 from blob import Blob
 from submodule import Submodule
-import base
-import binascii
 import git.diff as diff
-import utils
-from git.utils import join_path
-
 join = os.path.join
 
-def sha_to_hex(sha):
-	"""Takes a string and returns the hex of the sha within"""
-	hexsha = binascii.hexlify(sha)
-	return hexsha
-	
+from fun import (
+					tree_entries_from_data, 
+					tree_to_stream
+				 )
+
+from gitdb.util import to_bin_sha
+from binascii import b2a_hex
 
 class TreeModifier(object):
 	"""A utility class providing methods to alter the underlying cache in a list-like
@@ -51,25 +51,24 @@ class TreeModifier(object):
 	#} END interface
 	
 	#{ Mutators
-	def add(self, hexsha, mode, name, force=False):
+	def add(self, sha, mode, name, force=False):
 		"""Add the given item to the tree. If an item with the given name already
 		exists, nothing will be done, but a ValueError will be raised if the 
 		sha and mode of the existing item do not match the one you add, unless 
 		force is True
-		:param hexsha: The 40 byte sha of the item to add
+		:param sha: The 20 or 40 byte sha of the item to add
 		:param mode: int representing the stat compatible mode of the item
 		:param force: If True, an item with your name and information will overwrite
 			any existing item with the same name, no matter which information it has
 		:return: self"""
 		if '/' in name:
 			raise ValueError("Name must not contain '/' characters")
-		if len(hexsha) != 40:
-			raise ValueError("Hexsha required, got %r" % hexsha)
 		if (mode >> 12) not in Tree._map_id_to_type:
 			raise ValueError("Invalid object type according to mode %o" % mode)
-		
+			
+		sha = to_bin_sha(sha)
 		index = self._index_by_name(name)
-		item = (hexsha, mode, name)
+		item = (sha, mode, name)
 		if index == -1:
 			self._cache.append(item)
 		else:
@@ -77,18 +76,19 @@ class TreeModifier(object):
 				self._cache[index] = item
 			else:
 				ex_item = self._cache[index]
-				if ex_item[0] != hexsha or ex_item[1] != mode:
+				if ex_item[0] != sha or ex_item[1] != mode:
 					raise ValueError("Item %r existed with different properties" % name)
 				# END handle mismatch
 			# END handle force
 		# END handle name exists
 		return self
 		
-	def add_unchecked(self, hexsha, mode, name):
+	def add_unchecked(self, binsha, mode, name):
 		"""Add the given item to the tree, its correctness is assumed, which 
 		puts the caller into responsibility to assure the input is correct. 
-		For more information on the parameters, see ``add``"""
-		self._cache.append((hexsha, mode, name))
+		For more information on the parameters, see ``add``
+		:param binsha: 20 byte binary sha"""
+		self._cache.append((binsha, mode, name))
 		
 	def __delitem__(self, name):
 		"""Deletes an item with the given name if it exists"""
@@ -146,70 +146,21 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
 	def _set_cache_(self, attr):
 		if attr == "_cache":
 			# Set the data when we need it
-			self._cache = self._get_tree_cache(self.data)
+			self._cache = tree_entries_from_data(self.data)
 		else:
 			super(Tree, self)._set_cache_(attr)
 
-	def _get_tree_cache(self, data):
-		""" :return: list(object_instance, ...)
-		:param data: data string containing our serialized information"""
-		return list(self._iter_from_data(data))
-		
 	def _iter_convert_to_object(self, iterable):
 		"""Iterable yields tuples of (hexsha, mode, name), which will be converted
 		to the respective object representation"""
-		for hexsha, mode, name in iterable:
+		for binsha, mode, name in iterable:
 			path = join(self.path, name)
 			type_id = mode >> 12
 			try:
-				yield self._map_id_to_type[type_id](self.repo, hexsha, mode, path)
+				yield self._map_id_to_type[type_id](self.repo, b2a_hex(binsha), mode, path)
 			except KeyError:
 				raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path))
 		# END for each item 
-		
-	def _iter_from_data(self, data):
-		"""
-		Reads the binary non-pretty printed representation of a tree and converts
-		it into Blob, Tree or Commit objects.
-		
-		Note: This method was inspired by the parse_tree method in dulwich.
-		
-		:yield: Tuple(hexsha, mode, tree_relative_path)
-		"""
-		ord_zero = ord('0')
-		len_data = len(data)
-		i = 0
-		while i < len_data:
-			mode = 0
-			
-			# read mode
-			# Some git versions truncate the leading 0, some don't
-			# The type will be extracted from the mode later
-			while data[i] != ' ':
-				# move existing mode integer up one level being 3 bits
-				# and add the actual ordinal value of the character
-				mode = (mode << 3) + (ord(data[i]) - ord_zero)
-				i += 1
-			# END while reading mode
-			
-			# byte is space now, skip it
-			i += 1
-			
-			# parse name, it is NULL separated
-			
-			ns = i
-			while data[i] != '\0':
-				i += 1
-			# END while not reached NULL
-			name = data[ns:i]
-			
-			# byte is NULL, get next 20
-			i += 1
-			sha = data[i:i+20]
-			i = i + 20
-			
-			yield (sha_to_hex(sha), mode, name)
-		# END for each byte in data stream
 
 	def __div__(self, file):
 		"""
@@ -250,7 +201,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
 		else:
 			for info in self._cache:
 				if info[2] == file:		# [2] == name
-					return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
+					return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
 			# END for each obj
 			raise KeyError( msg % file )
 		# END handle long paths
@@ -304,7 +255,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
 	def __getitem__(self, item):
 		if isinstance(item, int):
 			info = self._cache[item]
-			return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2]))
+			return self._map_id_to_type[info[1] >> 12](self.repo, b2a_hex(info[0]), info[1], join(self.path, info[2]))
 		
 		if isinstance(item, basestring):
 			# compatability
@@ -335,32 +286,16 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializabl
 	def __reversed__(self):
 		return reversed(self._iter_convert_to_object(self._cache))
 		
-	def _serialize(self, stream, presort=False):
+	def _serialize(self, stream):
 		"""Serialize this tree into the stream. Please note that we will assume 
 		our tree data to be in a sorted state. If this is not the case, serialization
 		will not generate a correct tree representation as these are assumed to be sorted
 		by algorithms"""
-		ord_zero = ord('0')
-		bit_mask = 7			# 3 bits set
-		hex_to_bin = binascii.a2b_hex
-		
-		for hexsha, mode, name in self._cache:
-			mode_str = ''
-			for i in xrange(6):
-				mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
-			# END for each 8 octal value
-			
-			# git slices away the first octal if its zero
-			if mode_str[0] == '0':
-				mode_str = mode_str[1:]
-			# END save a byte
-
-			stream.write("%s %s\0%s" % (mode_str, name, hex_to_bin(hexsha))) 
-		# END for each item
+		tree_to_stream(self._cache, stream.write)
 		return self
 		
 	def _deserialize(self, stream):
-		self._cache = self._get_tree_cache(stream.read())
+		self._cache = tree_entries_from_data(stream.read())
 		return self
 		
 		
diff --git a/lib/git/utils.py b/lib/git/utils.py
index 3fb7fbf8..38501292 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -10,12 +10,14 @@ import time
 import tempfile
 
 from gitdb.util import (
-	make_sha, 
-	FDStreamWrapper,
-	LockedFD, 
-	file_contents_ro, 
-	LazyMixin
-	)
+							make_sha, 
+							FDStreamWrapper,
+							LockedFD, 
+							file_contents_ro, 
+							LazyMixin, 
+							to_hex_sha, 
+							to_bin_sha
+						)
 
 
 def stream_copy(source, destination, chunk_size=512*1024):