29 files changed, 3676 insertions, 1863 deletions
diff --git a/CHANGES b/CHANGES
index 5d677b06..e9e1257e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,12 @@
 =======
 CHANGES
 =======
-  
+
+0.2 Beta 2
+===========
+ * Commit objects now carry the 'encoding' information of their message. It wasn't parsed previously, and defaults to UTF-8
+ * Commit.create_from_tree now uses a pure-python implementation, mimicing git-commit-tree
+
 0.2
 =====
 General
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index aac539eb..2f17c55b 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -22,5 +22,8 @@ from git.remote import *
 from git.index import *
 from git.utils import LockFile, BlockingLockFile
 
+# odb is NOT imported intentionally - if you really want it, you should get it 
+# yourself as its part of the core
+
 __all__ = [ name for name, obj in locals().items()
             if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 82daf551..5cae2998 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,427 +13,515 @@ from errors import GitCommandError
 GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
 
 execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
-                  'with_exceptions', 'as_process', 
-                  'output_stream' )
+				  'with_exceptions', 'as_process', 
+				  'output_stream' )
 
 
 
 def dashify(string):
-    return string.replace('_', '-')
+	return string.replace('_', '-')
 
 class Git(object):
-    """
-    The Git class manages communication with the Git binary.
-    
-    It provides a convenient interface to calling the Git binary, such as in::
-    
-     g = Git( git_dir )
-     g.init()                   # calls 'git init' program
-     rval = g.ls_files()        # calls 'git ls-files' program
-    
-    ``Debugging``
-        Set the GIT_PYTHON_TRACE environment variable print each invocation 
-        of the command to stdout.
-        Set its value to 'full' to see details about the returned values.
-    """
-    __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
-    
-    # CONFIGURATION
-    # The size in bytes read from stdout when copying git's output to another stream
-    max_chunk_size = 1024*64
-    
-    class AutoInterrupt(object):
-        """
-        Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
-        used to prevent processes piling up in case iterators stop reading.
-        Besides all attributes are wired through to the contained process object.
-        
-        The wait method was overridden to perform automatic status code checking
-        and possibly raise.
-        """
-        __slots__= ("proc", "args")
-        
-        def __init__(self, proc, args ):
-            self.proc = proc
-            self.args = args
-            
-        def __del__(self):
-            # did the process finish already so we have a return code ?
-            if self.proc.poll() is not None:
-                return
-                
-            # can be that nothing really exists anymore ... 
-            if os is None:
-                return
-                
-            # try to kill it
-            try:
-                os.kill(self.proc.pid, 2)   # interrupt signal
-            except AttributeError:
-                # try windows 
-                # for some reason, providing None for stdout/stderr still prints something. This is why 
-                # we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
-                # is whether we really want to see all these messages. Its annoying no matter what.
-                subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
-            # END exception handling 
-            
-        def __getattr__(self, attr):
-            return getattr(self.proc, attr)
-            
-        def wait(self):
-            """
-            Wait for the process and return its status code. 
-            
-            Raise
-                GitCommandError if the return status is not 0
-            """
-            status = self.proc.wait()
-            if status != 0:
-                raise GitCommandError(self.args, status, self.proc.stderr.read())
-            # END status handling 
-            return status
-            
-    
-    
-    def __init__(self, working_dir=None):
-        """
-        Initialize this instance with:
-        
-        ``working_dir``
-           Git directory we should work in. If None, we always work in the current 
-           directory as returned by os.getcwd().
-           It is meant to be the working tree directory if available, or the 
-           .git directory in case of bare repositories.
-        """
-        super(Git, self).__init__()
-        self._working_dir = working_dir
-        
-        # cached command slots
-        self.cat_file_header = None
-        self.cat_file_all = None
-
-    def __getattr__(self, name):
-        """
-        A convenience method as it allows to call the command as if it was 
-        an object.
-        Returns
-            Callable object that will execute call _call_process with your arguments.
-        """
-        if name[:1] == '_':
-            raise AttributeError(name)
-        return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
-    @property
-    def working_dir(self):
-        """
-        Returns
-            Git directory we are working on
-        """
-        return self._working_dir
-
-    def execute(self, command,
-                istream=None,
-                with_keep_cwd=False,
-                with_extended_output=False,
-                with_exceptions=True,
-                as_process=False, 
-                output_stream=None, 
-                **subprocess_kwargs
-                ):
-        """
-        Handles executing the command on the shell and consumes and returns
-        the returned information (stdout)
-
-        ``command``
-            The command argument list to execute.
-            It should be a string, or a sequence of program arguments. The
-            program to execute is the first item in the args sequence or string.
-
-        ``istream``
-            Standard input filehandle passed to subprocess.Popen.
-
-        ``with_keep_cwd``
-            Whether to use the current working directory from os.getcwd().
-            The cmd otherwise uses its own working_dir that it has been initialized
-            with if possible.
-
-        ``with_extended_output``
-            Whether to return a (status, stdout, stderr) tuple.
-
-        ``with_exceptions``
-            Whether to raise an exception when git returns a non-zero status.
-
-        ``as_process``
-            Whether to return the created process instance directly from which 
-            streams can be read on demand. This will render with_extended_output and 
-            with_exceptions ineffective - the caller will have 
-            to deal with the details himself.
-            It is important to note that the process will be placed into an AutoInterrupt
-            wrapper that will interrupt the process once it goes out of scope. If you 
-            use the command in iterators, you should pass the whole process instance 
-            instead of a single stream.
-            
-        ``output_stream``
-            If set to a file-like object, data produced by the git command will be 
-            output to the given stream directly.
-            This feature only has any effect if as_process is False. Processes will
-            always be created with a pipe as subprocess.Popen can only accept system
-            file descriptors, not python objects ( such as StringIO ).
-            This merely is a workaround as the data will be copied from the 
-            output pipe to the given output stream directly.
-            See also: Git.max_chunk_size
-            
-        ``**subprocess_kwargs``
-            Keyword arguments to be passed to subprocess.Popen. Please note that 
-            some of the valid kwargs are already set by this method, the ones you 
-            specify may not be the same ones.
-            
-        Returns::
-        
-         str(output)                                   # extended_output = False (Default)
-         tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-         
-         if ouput_stream is True, the stdout value will be your output stream:
-         output_stream                                  # extended_output = False
-         tuple(int(status), output_stream, str(stderr))# extended_output = True
-        
-        Raise
-            GitCommandError
-        
-        NOTE
-           If you add additional keyword arguments to the signature of this method, 
-           you must update the execute_kwargs tuple housed in this module.
-        """
-        if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
-            print ' '.join(command)
-
-        # Allow the user to have the command executed in their working dir.
-        if with_keep_cwd or self._working_dir is None:
-          cwd = os.getcwd()
-        else:
-          cwd=self._working_dir
-          
-        # Start the process
-        proc = subprocess.Popen(command,
-                                cwd=cwd,
-                                stdin=istream,
-                                stderr=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                close_fds=(os.name=='posix'),# unsupported on linux
-                                **subprocess_kwargs
-                                )
-        if as_process:
-            return self.AutoInterrupt(proc, command)
-        
-        # Wait for the process to return
-        status = 0
-        stdout_value = ''
-        stderr_value = ''
-        try:
-            if output_stream is None:
-                stdout_value, stderr_value = proc.communicate()      
-                # strip trailing "\n"
-                if stdout_value.endswith("\n"):
-                    stdout_value = stdout_value[:-1]
-                if stderr_value.endswith("\n"):
-                    stderr_value = stderr_value[:-1]
-                status = proc.returncode
-            else:
-                max_chunk_size = self.max_chunk_size
-                while True:
-                    chunk = proc.stdout.read(max_chunk_size)
-                    output_stream.write(chunk)
-                    if len(chunk) < max_chunk_size:
-                        break
-                # END reading output stream
-                stdout_value = output_stream
-                stderr_value = proc.stderr.read()
-                # strip trailing "\n"
-                if stderr_value.endswith("\n"):
-                    stderr_value = stderr_value[:-1]
-                status = proc.wait()
-            # END stdout handling
-        finally:
-            proc.stdout.close()
-            proc.stderr.close()
-
-        if with_exceptions and status != 0:
-            raise GitCommandError(command, status, stderr_value)
-
-        if GIT_PYTHON_TRACE == 'full':
-            if stderr_value:
-              print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
-            elif stdout_value:
-              print "%s -> %d: '%s'" % (command, status, stdout_value)
-            else:
-              print "%s -> %d" % (command, status)
-
-        # Allow access to the command's status code
-        if with_extended_output:
-            return (status, stdout_value, stderr_value)
-        else:
-            return stdout_value
-
-    def transform_kwargs(self, **kwargs):
-        """
-        Transforms Python style kwargs into git command line options.
-        """
-        args = []
-        for k, v in kwargs.items():
-            if len(k) == 1:
-                if v is True:
-                    args.append("-%s" % k)
-                elif type(v) is not bool:
-                    args.append("-%s%s" % (k, v))
-            else:
-                if v is True:
-                    args.append("--%s" % dashify(k))
-                elif type(v) is not bool:
-                    args.append("--%s=%s" % (dashify(k), v))
-        return args
-
-    @classmethod
-    def __unpack_args(cls, arg_list):
-        if not isinstance(arg_list, (list,tuple)):
-            return [ str(arg_list) ]
-            
-        outlist = list()
-        for arg in arg_list:
-            if isinstance(arg_list, (list, tuple)):
-                outlist.extend(cls.__unpack_args( arg ))
-            # END recursion 
-            else:
-                outlist.append(str(arg))
-        # END for each arg
-        return outlist
-
-    def _call_process(self, method, *args, **kwargs):
-        """
-        Run the given git command with the specified arguments and return
-        the result as a String
-
-        ``method``
-            is the command. Contained "_" characters will be converted to dashes,
-            such as in 'ls_files' to call 'ls-files'.
-
-        ``args``
-            is the list of arguments. If None is included, it will be pruned.
-            This allows your commands to call git more conveniently as None
-            is realized as non-existent
-
-        ``kwargs``
-            is a dict of keyword arguments.
-            This function accepts the same optional keyword arguments
-            as execute().
-
-        Examples::
-            git.rev_list('master', max_count=10, header=True)
-
-        Returns
-            Same as execute()
-        """
-
-        # Handle optional arguments prior to calling transform_kwargs
-        # otherwise these'll end up in args, which is bad.
-        _kwargs = {}
-        for kwarg in execute_kwargs:
-            try:
-                _kwargs[kwarg] = kwargs.pop(kwarg)
-            except KeyError:
-                pass
-
-        # Prepare the argument list
-        opt_args = self.transform_kwargs(**kwargs)
-        
-        ext_args = self.__unpack_args([a for a in args if a is not None])
-        args = opt_args + ext_args
-
-        call = ["git", dashify(method)]
-        call.extend(args)
-
-        return self.execute(call, **_kwargs)
-        
-    def _parse_object_header(self, header_line):
-        """
-        ``header_line``
-            <hex_sha> type_string size_as_int
-            
-        Returns
-            (hex_sha, type_string, size_as_int)
-            
-        Raises
-            ValueError if the header contains indication for an error due to incorrect 
-            input sha
-        """
-        tokens = header_line.split()
-        if len(tokens) != 3:
-            raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
-        if len(tokens[0]) != 40:
-            raise ValueError("Failed to parse header: %r" % header_line) 
-        return (tokens[0], tokens[1], int(tokens[2]))
-    
-    def __prepare_ref(self, ref):
-        # required for command to separate refs on stdin
-        refstr = str(ref)               # could be ref-object
-        if refstr.endswith("\n"):
-            return refstr
-        return refstr + "\n"
-    
-    def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
-        cur_val = getattr(self, attr_name)
-        if cur_val is not None:
-            return cur_val
-            
-        options = { "istream" : subprocess.PIPE, "as_process" : True }
-        options.update( kwargs )
-        
-        cmd = self._call_process( cmd_name, *args, **options )
-        setattr(self, attr_name, cmd )
-        return cmd
-    
-    def __get_object_header(self, cmd, ref):
-        cmd.stdin.write(self.__prepare_ref(ref))
-        cmd.stdin.flush()
-        return self._parse_object_header(cmd.stdout.readline())
-    
-    def get_object_header(self, ref):
-        """
-        Use this method to quickly examine the type and size of the object behind 
-        the given ref. 
-        
-        NOTE
-            The method will only suffer from the costs of command invocation 
-            once and reuses the command in subsequent calls. 
-        
-        Return:
-            (hexsha, type_string, size_as_int)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
-        return self.__get_object_header(cmd, ref)
-        
-    def get_object_data(self, ref):
-        """
-        As get_object_header, but returns object data as well
-        
-        Return:
-            (hexsha, type_string, size_as_int,data_string)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
-        hexsha, typename, size = self.__get_object_header(cmd, ref)
-        data = cmd.stdout.read(size)
-        cmd.stdout.read(1)      # finishing newlines
-        
-        return (hexsha, typename, size, data)
-        
-    def clear_cache(self):
-        """
-        Clear all kinds of internal caches to release resources.
-        
-        Currently persistent commands will be interrupted.
-        
-        Returns
-            self
-        """
-        self.cat_file_all = None
-        self.cat_file_header = None
-        return self
+	"""
+	The Git class manages communication with the Git binary.
+	
+	It provides a convenient interface to calling the Git binary, such as in::
+	
+	 g = Git( git_dir )
+	 g.init()					# calls 'git init' program
+	 rval = g.ls_files()		# calls 'git ls-files' program
+	
+	``Debugging``
+		Set the GIT_PYTHON_TRACE environment variable print each invocation 
+		of the command to stdout.
+		Set its value to 'full' to see details about the returned values.
+	"""
+	__slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+	
+	# CONFIGURATION
+	# The size in bytes read from stdout when copying git's output to another stream
+	max_chunk_size = 1024*64
+	
+	class AutoInterrupt(object):
+		"""
+		Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
+		used to prevent processes piling up in case iterators stop reading.
+		Besides all attributes are wired through to the contained process object.
+		
+		The wait method was overridden to perform automatic status code checking
+		and possibly raise.
+		"""
+		__slots__= ("proc", "args")
+		
+		def __init__(self, proc, args ):
+			self.proc = proc
+			self.args = args
+			
+		def __del__(self):
+			# did the process finish already so we have a return code ?
+			if self.proc.poll() is not None:
+				return
+				
+			# can be that nothing really exists anymore ... 
+			if os is None:
+				return
+				
+			# try to kill it
+			try:
+				os.kill(self.proc.pid, 2)	# interrupt signal
+			except AttributeError:
+				# try windows 
+				# for some reason, providing None for stdout/stderr still prints something. This is why 
+				# we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
+				# is whether we really want to see all these messages. Its annoying no matter what.
+				subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
+			# END exception handling 
+			
+		def __getattr__(self, attr):
+			return getattr(self.proc, attr)
+			
+		def wait(self):
+			"""
+			Wait for the process and return its status code. 
+			
+			Raise
+				GitCommandError if the return status is not 0
+			"""
+			status = self.proc.wait()
+			if status != 0:
+				raise GitCommandError(self.args, status, self.proc.stderr.read())
+			# END status handling 
+			return status
+	# END auto interrupt
+	
+	class CatFileContentStream(object):
+		"""Object representing a sized read-only stream returning the contents of 
+		an object.
+		It behaves like a stream, but counts the data read and simulates an empty 
+		stream once our sized content region is empty.
+		If not all data is read to the end of the objects's lifetime, we read the 
+		rest to assure the underlying stream continues to work"""
+		
+		__slots__ = ('_stream', '_nbr', '_size')
+		
+		def __init__(self, size, stream):
+			self._stream = stream
+			self._size = size
+			self._nbr = 0			# num bytes read
+			
+		def read(self, size=-1):
+			bytes_left = self._size - self._nbr
+			if bytes_left == 0:
+				return ''
+			if size > -1:
+				# assure we don't try to read past our limit
+				size = min(bytes_left, size)
+			else:
+				# they try to read all, make sure its not more than what remains
+				size = bytes_left
+			# END check early depletion
+			data = self._stream.read(size)
+			self._nbr += len(data)
+			
+			# check for depletion, read our final byte to make the stream usable by others
+			if self._size - self._nbr == 0:
+				self._stream.read(1)	# final newline
+			# END finish reading
+			
+			return data
+			
+		def readline(self, size=-1):
+			if self._nbr == self._size:
+				return ''
+			
+			# clamp size to lowest allowed value
+			bytes_left = self._size - self._nbr
+			if size > -1:
+				size = min(bytes_left, size)
+			else:
+				size = bytes_left
+			# END handle size
+			
+			data = self._stream.readline(size)
+			self._nbr += len(data)
+			
+			# handle final byte
+			# we inline everything, it must be fast !
+			if self._size - self._nbr == 0:
+				self._stream.read(1)
+			# END finish reading
+			
+			return data
+			
+		def readlines(self, size=-1):
+			if self._nbr == self._size:
+				return list()
+			
+			# leave all additional logic to our readline method, we just check the size
+			out = list()
+			nbr = 0
+			while True:
+				line = self.readline()
+				if not line:
+					break
+				out.append(line)
+				if size > -1:
+					nbr += len(line)
+					if nbr > size:
+						break
+				# END handle size constraint
+			# END readline loop
+			return out
+			
+		def __iter__(self):
+			return self
+			
+		def next(self):
+			line = self.readline()
+			if not line:
+				raise StopIteration
+			return line
+			
+		def __del__(self):
+			bytes_left = self._size - self._nbr
+			if bytes_left:
+				# seek and discard
+				self._stream.seek(bytes_left + 1, os.SEEK_CUR)	# includes terminating newline
+			# END handle incomplete read
+	
+	
+	def __init__(self, working_dir=None):
+		"""
+		Initialize this instance with:
+		
+		``working_dir``
+		   Git directory we should work in. If None, we always work in the current 
+		   directory as returned by os.getcwd().
+		   It is meant to be the working tree directory if available, or the 
+		   .git directory in case of bare repositories.
+		"""
+		super(Git, self).__init__()
+		self._working_dir = working_dir
+		
+		# cached command slots
+		self.cat_file_header = None
+		self.cat_file_all = None
+
+	def __getattr__(self, name):
+		"""
+		A convenience method as it allows to call the command as if it was 
+		an object.
+		Returns
+			Callable object that will execute call _call_process with your arguments.
+		"""
+		if name[:1] == '_':
+			raise AttributeError(name)
+		return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+	@property
+	def working_dir(self):
+		"""
+		Returns
+			Git directory we are working on
+		"""
+		return self._working_dir
+
+	def execute(self, command,
+				istream=None,
+				with_keep_cwd=False,
+				with_extended_output=False,
+				with_exceptions=True,
+				as_process=False, 
+				output_stream=None, 
+				**subprocess_kwargs
+				):
+		"""
+		Handles executing the command on the shell and consumes and returns
+		the returned information (stdout)
+
+		``command``
+			The command argument list to execute.
+			It should be a string, or a sequence of program arguments. The
+			program to execute is the first item in the args sequence or string.
+
+		``istream``
+			Standard input filehandle passed to subprocess.Popen.
+
+		``with_keep_cwd``
+			Whether to use the current working directory from os.getcwd().
+			The cmd otherwise uses its own working_dir that it has been initialized
+			with if possible.
+
+		``with_extended_output``
+			Whether to return a (status, stdout, stderr) tuple.
+
+		``with_exceptions``
+			Whether to raise an exception when git returns a non-zero status.
+
+		``as_process``
+			Whether to return the created process instance directly from which 
+			streams can be read on demand. This will render with_extended_output and 
+			with_exceptions ineffective - the caller will have 
+			to deal with the details himself.
+			It is important to note that the process will be placed into an AutoInterrupt
+			wrapper that will interrupt the process once it goes out of scope. If you 
+			use the command in iterators, you should pass the whole process instance 
+			instead of a single stream.
+			
+		``output_stream``
+			If set to a file-like object, data produced by the git command will be 
+			output to the given stream directly.
+			This feature only has any effect if as_process is False. Processes will
+			always be created with a pipe due to issues with subprocess.
+			This merely is a workaround as data will be copied from the 
+			output pipe to the given output stream directly.
+			
+		``**subprocess_kwargs``
+			Keyword arguments to be passed to subprocess.Popen. Please note that 
+			some of the valid kwargs are already set by this method, the ones you 
+			specify may not be the same ones.
+			
+		Returns::
+		
+		 str(output)								   # extended_output = False (Default)
+		 tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+		 
+		 if ouput_stream is True, the stdout value will be your output stream:
+		 output_stream									# extended_output = False
+		 tuple(int(status), output_stream, str(stderr))# extended_output = True
+		
+		Raise
+			GitCommandError
+		
+		NOTE
+		   If you add additional keyword arguments to the signature of this method, 
+		   you must update the execute_kwargs tuple housed in this module.
+		"""
+		if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+			print ' '.join(command)
+
+		# Allow the user to have the command executed in their working dir.
+		if with_keep_cwd or self._working_dir is None:
+		  cwd = os.getcwd()
+		else:
+		  cwd=self._working_dir
+		  
+		# Start the process
+		proc = subprocess.Popen(command,
+								cwd=cwd,
+								stdin=istream,
+								stderr=subprocess.PIPE,
+								stdout=subprocess.PIPE,
+								close_fds=(os.name=='posix'),# unsupported on linux
+								**subprocess_kwargs
+								)
+		if as_process:
+			return self.AutoInterrupt(proc, command)
+		
+		# Wait for the process to return
+		status = 0
+		stdout_value = ''
+		stderr_value = ''
+		try:
+			if output_stream is None:
+				stdout_value, stderr_value = proc.communicate() 
+				# strip trailing "\n"
+				if stdout_value.endswith("\n"):
+					stdout_value = stdout_value[:-1]
+				if stderr_value.endswith("\n"):
+					stderr_value = stderr_value[:-1]
+				status = proc.returncode
+			else:
+				stream_copy(proc.stdout, output_stream, self.max_chunk_size)
+				stdout_value = output_stream
+				stderr_value = proc.stderr.read()
+				# strip trailing "\n"
+				if stderr_value.endswith("\n"):
+					stderr_value = stderr_value[:-1]
+				status = proc.wait()
+			# END stdout handling
+		finally:
+			proc.stdout.close()
+			proc.stderr.close()
+
+		if with_exceptions and status != 0:
+			raise GitCommandError(command, status, stderr_value)
+
+		if GIT_PYTHON_TRACE == 'full':
+			if stderr_value:
+			  print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+			elif stdout_value:
+			  print "%s -> %d: '%s'" % (command, status, stdout_value)
+			else:
+			  print "%s -> %d" % (command, status)
+
+		# Allow access to the command's status code
+		if with_extended_output:
+			return (status, stdout_value, stderr_value)
+		else:
+			return stdout_value
+
+	def transform_kwargs(self, **kwargs):
+		"""
+		Transforms Python style kwargs into git command line options.
+		"""
+		args = []
+		for k, v in kwargs.items():
+			if len(k) == 1:
+				if v is True:
+					args.append("-%s" % k)
+				elif type(v) is not bool:
+					args.append("-%s%s" % (k, v))
+			else:
+				if v is True:
+					args.append("--%s" % dashify(k))
+				elif type(v) is not bool:
+					args.append("--%s=%s" % (dashify(k), v))
+		return args
+
+	@classmethod
+	def __unpack_args(cls, arg_list):
+		if not isinstance(arg_list, (list,tuple)):
+			return [ str(arg_list) ]
+			
+		outlist = list()
+		for arg in arg_list:
+			if isinstance(arg_list, (list, tuple)):
+				outlist.extend(cls.__unpack_args( arg ))
+			# END recursion 
+			else:
+				outlist.append(str(arg))
+		# END for each arg
+		return outlist
+
+	def _call_process(self, method, *args, **kwargs):
+		"""
+		Run the given git command with the specified arguments and return
+		the result as a String
+
+		``method``
+			is the command. Contained "_" characters will be converted to dashes,
+			such as in 'ls_files' to call 'ls-files'.
+
+		``args``
+			is the list of arguments. If None is included, it will be pruned.
+			This allows your commands to call git more conveniently as None
+			is realized as non-existent
+
+		``kwargs``
+			is a dict of keyword arguments.
+			This function accepts the same optional keyword arguments
+			as execute().
+
+		Examples::
+			git.rev_list('master', max_count=10, header=True)
+
+		Returns
+			Same as execute()
+		"""
+
+		# Handle optional arguments prior to calling transform_kwargs
+		# otherwise these'll end up in args, which is bad.
+		_kwargs = {}
+		for kwarg in execute_kwargs:
+			try:
+				_kwargs[kwarg] = kwargs.pop(kwarg)
+			except KeyError:
+				pass
+
+		# Prepare the argument list
+		opt_args = self.transform_kwargs(**kwargs)
+		
+		ext_args = self.__unpack_args([a for a in args if a is not None])
+		args = opt_args + ext_args
+
+		call = ["git", dashify(method)]
+		call.extend(args)
+
+		return self.execute(call, **_kwargs)
+		
+	def _parse_object_header(self, header_line):
+		"""
+		``header_line``
+			<hex_sha> type_string size_as_int
+			
+		Returns
+			(hex_sha, type_string, size_as_int)
+			
+		Raises
+			ValueError if the header contains indication for an error due to incorrect 
+			input sha
+		"""
+		tokens = header_line.split()
+		if len(tokens) != 3:
+			raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
+		if len(tokens[0]) != 40:
+			raise ValueError("Failed to parse header: %r" % header_line) 
+		return (tokens[0], tokens[1], int(tokens[2]))
+	
+	def __prepare_ref(self, ref):
+		# required for command to separate refs on stdin
+		refstr = str(ref)				# could be ref-object
+		if refstr.endswith("\n"):
+			return refstr
+		return refstr + "\n"
+	
+	def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+		cur_val = getattr(self, attr_name)
+		if cur_val is not None:
+			return cur_val
+			
+		options = { "istream" : subprocess.PIPE, "as_process" : True }
+		options.update( kwargs )
+		
+		cmd = self._call_process( cmd_name, *args, **options )
+		setattr(self, attr_name, cmd )
+		return cmd
+	
+	def __get_object_header(self, cmd, ref):
+		cmd.stdin.write(self.__prepare_ref(ref))
+		cmd.stdin.flush()
+		return self._parse_object_header(cmd.stdout.readline())
+	
+	def get_object_header(self, ref):
+		""" Use this method to quickly examine the type and size of the object behind 
+		the given ref. 
+		
+		:note: The method will only suffer from the costs of command invocation 
+			once and reuses the command in subsequent calls. 
+		
+		:return: (hexsha, type_string, size_as_int) """
+		cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+		return self.__get_object_header(cmd, ref)
+		
+	def get_object_data(self, ref):
+		""" As get_object_header, but returns object data as well
+		:return: (hexsha, type_string, size_as_int,data_string)
+		:note: not threadsafe
+		"""
+		hexsha, typename, size, stream = self.stream_object_data(ref)
+		data = stream.read(size)
+		del(stream)
+		return (hexsha, typename, size, data)
+		
+	def stream_object_data(self, ref):
+		"""As get_object_header, but returns the data as a stream
+		:return: (hexsha, type_string, size_as_int, stream)
+		:note: This method is not threadsafe, you need one independent	Command instance
+			per thread to be safe !"""
+		cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+		hexsha, typename, size = self.__get_object_header(cmd, ref)
+		return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
+		
+	def clear_cache(self):
+		"""
+		Clear all kinds of internal caches to release resources.
+		
+		Currently persistent commands will be interrupted.
+		
+		Returns
+			self
+		"""
+		self.cat_file_all = None
+		self.cat_file_header = None
+		return self
diff --git a/lib/git/errors.py b/lib/git/errors.py
index f66fb528..d8a35e02 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -8,19 +8,25 @@ Module containing all exceptions thrown througout the git package,
 """
 
 class InvalidGitRepositoryError(Exception):
-    """
-    Thrown if the given repository appears to have an invalid format. 
-    """
+    """ Thrown if the given repository appears to have an invalid format.  """
+
+class ODBError(Exception):
+	"""All errors thrown by the object database"""
+	
+class InvalidDBRoot(ODBError):
+	"""Thrown if an object database cannot be initialized at the given path"""
+	
+class BadObject(ODBError):
+	"""The object with the given SHA does not exist"""
+	
+class BadObjectType(ODBError):
+	"""The object had an unsupported type"""
 
 class NoSuchPathError(OSError):
-    """
-    Thrown if a path could not be access by the system.
-    """
+    """ Thrown if a path could not be access by the system. """
 
 class GitCommandError(Exception):
-    """
-    Thrown if execution of the git command fails with non-zero status code.
-    """
+    """ Thrown if execution of the git command fails with non-zero status code. """
     def __init__(self, command, status, stderr=None):
         self.stderr = stderr
         self.status = status
diff --git a/lib/git/index.py b/lib/git/index.py
index 8ccc3fe3..36428315 100644
--- a/lib/git/index.py
+++ b/lib/git/index.py
@@ -21,7 +21,7 @@ import git.diff as diff
 
 from errors import GitCommandError
 from git.objects import Blob, Tree, Object, Commit
-from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
+from git.utils import IndexFileSHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
 
 
 class CheckoutError( Exception ):
@@ -461,7 +461,7 @@ class IndexFile(LazyMixin, diff.Diffable):
         write_op = ConcurrentWriteOperation(file_path or self._file_path)
         stream = write_op._begin_writing()
 
-        stream = SHA1Writer(stream)
+        stream = IndexFileSHA1Writer(stream)
 
         # header
         stream.write("DIRC")
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..5a3a15a7 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,224 +4,220 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 import os
-from git.utils import LazyMixin, join_path_native
+from git.utils import LazyMixin, join_path_native, stream_copy
 import utils
-    
+	
 _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
 
 class Object(LazyMixin):
-    """
-    Implements an Object which may be Blobs, Trees, Commits and Tags
-    
-    This Object also serves as a constructor for instances of the correct type::
-    
-        inst = Object.new(repo,id)
-        inst.sha        # objects sha in hex
-        inst.size   # objects uncompressed data size
-        inst.data   # byte string containing the whole data of the object
-    """
-    NULL_HEX_SHA = '0'*40
-    TYPES = ("blob", "tree", "commit", "tag")
-    __slots__ = ("repo", "sha", "size", "data" )
-    type = None         # to be set by subclass
-    
-    def __init__(self, repo, id):
-        """
-        Initialize an object by identifying it by its id. All keyword arguments
-        will be set on demand if None.
-        
-        ``repo``
-            repository this object is located in
-            
-        ``id``
-            SHA1 or ref suitable for git-rev-parse
-        """
-        super(Object,self).__init__()
-        self.repo = repo
-        self.sha = id
+	"""
+	Implements an Object which may be Blobs, Trees, Commits and Tags
+	
+	This Object also serves as a constructor for instances of the correct type::
+	
+		inst = Object.new(repo,id)
+		inst.sha		# objects sha in hex
+		inst.size	# objects uncompressed data size
+		inst.data	# byte string containing the whole data of the object
+	"""
+	NULL_HEX_SHA = '0'*40
+	TYPES = ("blob", "tree", "commit", "tag")
+	__slots__ = ("repo", "sha", "size", "data" )
+	type = None			# to be set by subclass
+	
+	def __init__(self, repo, id):
+		"""
+		Initialize an object by identifying it by its id. All keyword arguments
+		will be set on demand if None.
+		
+		``repo``
+			repository this object is located in
+			
+		``id``
+			SHA1 or ref suitable for git-rev-parse
+		"""
+		super(Object,self).__init__()
+		self.repo = repo
+		self.sha = id
 
-    @classmethod
-    def new(cls, repo, id):
-        """
-        Return
-            New Object instance of a type appropriate to the object type behind 
-            id. The id of the newly created object will be a hexsha even though 
-            the input id may have been a Reference or Rev-Spec
-            
-        Note
-            This cannot be a __new__ method as it would always call __init__
-            with the input id which is not necessarily a hexsha.
-        """
-        hexsha, typename, size = repo.git.get_object_header(id)
-        obj_type = utils.get_object_type_by_name(typename)
-        inst = obj_type(repo, hexsha)
-        inst.size = size
-        return inst
-    
-    def _set_self_from_args_(self, args_dict):
-        """
-        Initialize attributes on self from the given dict that was retrieved
-        from locals() in the calling method.
-        
-        Will only set an attribute on self if the corresponding value in args_dict
-        is not None
-        """
-        for attr, val in args_dict.items():
-            if attr != "self" and val is not None:
-                setattr( self, attr, val )
-        # END set all non-None attributes
-    
-    def _set_cache_(self, attr):
-        """
-        Retrieve object information
-        """
-        if attr  == "size":
-            hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        elif attr == "data":
-            hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        else:
-            super(Object,self)._set_cache_(attr)
-        
-    def __eq__(self, other):
-        """
-        Returns
-            True if the objects have the same SHA1
-        """
-        return self.sha == other.sha
-        
-    def __ne__(self, other):
-        """
-        Returns
-            True if the objects do not have the same SHA1
-        """
-        return self.sha != other.sha
-        
-    def __hash__(self):
-        """
-        Returns
-            Hash of our id allowing objects to be used in dicts and sets
-        """
-        return hash(self.sha)
-        
-    def __str__(self):
-        """
-        Returns
-            string of our SHA1 as understood by all git commands
-        """
-        return self.sha
-        
-    def __repr__(self):
-        """
-        Returns
-            string with pythonic representation of our object
-        """
-        return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+	@classmethod
+	def new(cls, repo, id):
+		"""
+		Return
+			New Object instance of a type appropriate to the object type behind 
+			id. The id of the newly created object will be a hexsha even though 
+			the input id may have been a Reference or Rev-Spec
+			
+		Note
+			This cannot be a __new__ method as it would always call __init__
+			with the input id which is not necessarily a hexsha.
+		"""
+		hexsha, typename, size = repo.git.get_object_header(id)
+		obj_type = utils.get_object_type_by_name(typename)
+		inst = obj_type(repo, hexsha)
+		inst.size = size
+		return inst
+	
+	def _set_self_from_args_(self, args_dict):
+		"""
+		Initialize attributes on self from the given dict that was retrieved
+		from locals() in the calling method.
+		
+		Will only set an attribute on self if the corresponding value in args_dict
+		is not None
+		"""
+		for attr, val in args_dict.items():
+			if attr != "self" and val is not None:
+				setattr( self, attr, val )
+		# END set all non-None attributes
+	
+	def _set_cache_(self, attr):
+		"""
+		Retrieve object information
+		"""
+		if attr	 == "size":
+			oinfo = self.repo.odb.info(self.sha)
+			self.size = oinfo.size
+			assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
+		elif attr == "data":
+			ostream = self.repo.odb.stream(self.sha)
+			self.size = ostream.size
+			self.data = ostream.read()
+			assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+		else:
+			super(Object,self)._set_cache_(attr)
+		
+	def __eq__(self, other):
+		"""
+		Returns
+			True if the objects have the same SHA1
+		"""
+		return self.sha == other.sha
+		
+	def __ne__(self, other):
+		"""
+		Returns
+			True if the objects do not have the same SHA1
+		"""
+		return self.sha != other.sha
+		
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our id allowing objects to be used in dicts and sets
+		"""
+		return hash(self.sha)
+		
+	def __str__(self):
+		"""
+		Returns
+			string of our SHA1 as understood by all git commands
+		"""
+		return self.sha
+		
+	def __repr__(self):
+		"""
+		Returns
+			string with pythonic representation of our object
+		"""
+		return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
 
-    @property
-    def data_stream(self):
-        """
-        Returns 
-            File Object compatible stream to the uncompressed raw data of the object
-        """
-        proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
-        return utils.ProcessStreamAdapter(proc, "stdout") 
+	@property
+	def data_stream(self):
+		""" :return:  File Object compatible stream to the uncompressed raw data of the object
+		:note: returned streams must be read in order"""
+		return self.repo.odb.stream(self.sha)
 
-    def stream_data(self, ostream):
-        """
-        Writes our data directly to the given output stream
-        
-        ``ostream``
-            File object compatible stream object.
-            
-        Returns
-            self
-        """
-        self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
-        return self
+	def stream_data(self, ostream):
+		"""Writes our data directly to the given output stream
+		:param ostream: File object compatible stream object.
+		:return: self"""
+		istream = self.repo.odb.stream(self.sha)
+		stream_copy(istream, ostream)
+		return self
+		
 
 class IndexObject(Object):
-    """
-    Base for all objects that can be part of the index file , namely Tree, Blob and
-    SubModule objects
-    """
-    __slots__ = ("path", "mode") 
-    
-    def __init__(self, repo, sha, mode=None, path=None):
-        """
-        Initialize a newly instanced IndexObject
-        ``repo``
-            is the Repo we are located in
+	"""
+	Base for all objects that can be part of the index file , namely Tree, Blob and
+	SubModule objects
+	"""
+	__slots__ = ("path", "mode") 
+	
+	def __init__(self, repo, sha, mode=None, path=None):
+		"""
+		Initialize a newly instanced IndexObject
+		``repo``
+			is the Repo we are located in
 
-        ``sha`` : string
-            is the git object id as hex sha
+		``sha`` : string
+			is the git object id as hex sha
 
-        ``mode`` : int
-            is the file mode as int, use the stat module to evaluate the infomration
+		``mode`` : int
+			is the file mode as int, use the stat module to evaluate the infomration
 
-        ``path`` : str
-            is the path to the file in the file system, relative to the git repository root, i.e.
-            file.ext or folder/other.ext
-                
-        NOTE
-            Path may not be set of the index object has been created directly as it cannot
-            be retrieved without knowing the parent tree.
-        """
-        super(IndexObject, self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-        if isinstance(mode, basestring):
-            self.mode = self._mode_str_to_int(mode)
-    
-    def __hash__(self):
-        """
-        Returns
-            Hash of our path as index items are uniquely identifyable by path, not 
-            by their data !
-        """
-        return hash(self.path)
-    
-    def _set_cache_(self, attr):
-        if attr in IndexObject.__slots__:
-            # they cannot be retrieved lateron ( not without searching for them )
-            raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
-        else:
-            super(IndexObject, self)._set_cache_(attr)
-    
-    @classmethod
-    def _mode_str_to_int(cls, modestr):
-        """
-        ``modestr``
-            string like 755 or 644 or 100644 - only the last 6 chars will be used
-            
-        Returns
-            String identifying a mode compatible to the mode methods ids of the 
-            stat module regarding the rwx permissions for user, group and other, 
-            special flags and file system flags, i.e. whether it is a symlink
-            for example.
-        """
-        mode = 0
-        for iteration,char in enumerate(reversed(modestr[-6:])):
-            mode += int(char) << iteration*3
-        # END for each char
-        return mode
-        
-    @property
-    def name(self):
-        """
-        Returns
-            Name portion of the path, effectively being the basename
-        """
-        return os.path.basename(self.path)
-        
-    @property
-    def abspath(self):
-        """
-        Returns
-            Absolute path to this index object in the file system ( as opposed to the 
-            .path field which is a path relative to the git repository ).
-            
-            The returned path will be native to the system and contains '\' on windows. 
-        """
-        return join_path_native(self.repo.working_tree_dir, self.path)
-        
+		``path`` : str
+			is the path to the file in the file system, relative to the git repository root, i.e.
+			file.ext or folder/other.ext
+				
+		NOTE
+			Path may not be set of the index object has been created directly as it cannot
+			be retrieved without knowing the parent tree.
+		"""
+		super(IndexObject, self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+		if isinstance(mode, basestring):
+			self.mode = self._mode_str_to_int(mode)
+	
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our path as index items are uniquely identifyable by path, not 
+			by their data !
+		"""
+		return hash(self.path)
+	
+	def _set_cache_(self, attr):
+		if attr in IndexObject.__slots__:
+			# they cannot be retrieved lateron ( not without searching for them )
+			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+		else:
+			super(IndexObject, self)._set_cache_(attr)
+	
+	@classmethod
+	def _mode_str_to_int(cls, modestr):
+		"""
+		``modestr``
+			string like 755 or 644 or 100644 - only the last 6 chars will be used
+			
+		Returns
+			String identifying a mode compatible to the mode methods ids of the 
+			stat module regarding the rwx permissions for user, group and other, 
+			special flags and file system flags, i.e. whether it is a symlink
+			for example.
+		"""
+		mode = 0
+		for iteration,char in enumerate(reversed(modestr[-6:])):
+			mode += int(char) << iteration*3
+		# END for each char
+		return mode
+		
+	@property
+	def name(self):
+		"""
+		Returns
+			Name portion of the path, effectively being the basename
+		"""
+		return os.path.basename(self.path)
+		
+	@property
+	def abspath(self):
+		"""
+		Returns
+			Absolute path to this index object in the file system ( as opposed to the 
+			.path field which is a path relative to the git repository ).
+			
+			The returned path will be native to the system and contains '\' on windows. 
+		"""
+		return join_path_native(self.repo.working_tree_dir, self.path)
+		
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..9a3c2c95 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,434 @@
 from git.utils import Iterable
 import git.diff as diff
 import git.stats as stats
+from git.actor import Actor
 from tree import Tree
+from git.odb import IStream
+from cStringIO import StringIO
 import base
 import utils
-import tempfile
+import time
 import os
 
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
-    """
-    Wraps a git Commit object.
-    
-    This class will act lazily on some of its attributes and will query the 
-    value on demand only if it involves calling the git binary.
-    """
-    
-    # object configuration 
-    type = "commit"
-    __slots__ = ("tree",
-                 "author", "authored_date", "author_tz_offset",
-                 "committer", "committed_date", "committer_tz_offset",
-                 "message", "parents")
-    _id_attribute_ = "sha"
-    
-    def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
-                 committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
-        """
-        Instantiate a new Commit. All keyword arguments taking None as default will 
-        be implicitly set if id names a valid sha. 
-        
-        The parameter documentation indicates the type of the argument after a colon ':'.
-
-        ``sha``
-            is the sha id of the commit or a ref
-
-        ``parents`` : tuple( Commit, ... )
-            is a tuple of commit ids or actual Commits
-
-        ``tree`` : Tree
-            is the corresponding tree id or an actual Tree
-
-        ``author`` : Actor
-            is the author string ( will be implicitly converted into an Actor object )
-
-        ``authored_date`` : int_seconds_since_epoch
-            is the authored DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``author_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``committer`` : Actor
-            is the committer string
-
-        ``committed_date`` : int_seconds_since_epoch
-            is the committed DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``committer_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``message`` : string
-            is the commit message
-
-        Returns
-            git.Commit
-        """
-        super(Commit,self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-
-        if parents is not None:
-            self.parents = tuple( self.__class__(repo, p) for p in parents )
-        # END for each parent to convert
-            
-        if self.sha and tree is not None:
-            self.tree = Tree(repo, tree, path='')
-        # END id to tree conversion
-        
-    @classmethod
-    def _get_intermediate_items(cls, commit):
-        return commit.parents
-
-    def _set_cache_(self, attr):
-        """
-        Called by LazyMixin superclass when the given uninitialized member needs 
-        to be set.
-        We set all values at once.
-        """
-        if attr in Commit.__slots__:
-            # prepare our data lines to match rev-list
-            data_lines = self.data.splitlines()
-            data_lines.insert(0, "commit %s" % self.sha)
-            temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
-            self.parents = temp.parents
-            self.tree = temp.tree
-            self.author = temp.author
-            self.authored_date = temp.authored_date
-            self.author_tz_offset = temp.author_tz_offset
-            self.committer = temp.committer
-            self.committed_date = temp.committed_date
-            self.committer_tz_offset = temp.committer_tz_offset
-            self.message = temp.message
-        else:
-            super(Commit, self)._set_cache_(attr)
-
-    @property
-    def summary(self):
-        """
-        Returns
-            First line of the commit message.
-        """
-        return self.message.split('\n', 1)[0]
-        
-    def count(self, paths='', **kwargs):
-        """
-        Count the number of commits reachable from this commit
-
-        ``paths``
-            is an optinal path or a list of paths restricting the return value 
-            to commits actually containing the paths
-
-        ``kwargs``
-            Additional options to be passed to git-rev-list. They must not alter
-            the ouput style of the command, or parsing will yield incorrect results
-        Returns
-            int
-        """
-        # yes, it makes a difference whether empty paths are given or not in our case
-        # as the empty paths version will ignore merge commits for some reason.
-        if paths:
-            return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
-        else:
-            return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-        
-
-    @property
-    def name_rev(self):
-        """
-        Returns
-            String describing the commits hex sha based on the closest Reference.
-            Mostly useful for UI purposes
-        """
-        return self.repo.git.name_rev(self)
-
-    @classmethod
-    def iter_items(cls, repo, rev, paths='', **kwargs):
-        """
-        Find all commits matching the given criteria.
-
-        ``repo``
-            is the Repo
-
-        ``rev``
-            revision specifier, see git-rev-parse for viable options
-
-        ``paths``
-            is an optinal path or list of paths, if set only Commits that include the path 
-            or paths will be considered
-
-        ``kwargs``
-            optional keyword arguments to git rev-list where
-            ``max_count`` is the maximum number of commits to fetch
-            ``skip`` is the number of commits to skip
-            ``since`` all commits since i.e. '1970-01-01'
-
-        Returns
-            iterator yielding Commit items
-        """
-        options = {'pretty': 'raw', 'as_process' : True }
-        options.update(kwargs)
-        
-        args = list()
-        if paths:
-            args.extend(('--', paths))
-        # END if paths
-
-        proc = repo.git.rev_list(rev, args, **options)
-        return cls._iter_from_process_or_stream(repo, proc, True)
-        
-    def iter_parents(self, paths='', **kwargs):
-        """
-        Iterate _all_ parents of this commit.
-        
-        ``paths``
-            Optional path or list of paths limiting the Commits to those that 
-            contain at least one of the paths
-        
-        ``kwargs``
-            All arguments allowed by git-rev-list
-            
-        Return:
-            Iterator yielding Commit objects which are parents of self
-        """
-        # skip ourselves
-        skip = kwargs.get("skip", 1)
-        if skip == 0:   # skip ourselves 
-            skip = 1
-        kwargs['skip'] = skip
-        
-        return self.iter_items( self.repo, self, paths, **kwargs )
-
-    @property
-    def stats(self):
-        """
-        Create a git stat from changes between this commit and its first parent 
-        or from all changes done if this is the very first commit.
-        
-        Return
-            git.Stats
-        """
-        if not self.parents:
-            text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
-            text2 = ""
-            for line in text.splitlines()[1:]:
-                (insertions, deletions, filename) = line.split("\t")
-                text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
-            text = text2
-        else:
-            text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
-        return stats.Stats._list_from_string(self.repo, text)
-
-    @classmethod
-    def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
-        """
-        Parse out commit information into a list of Commit objects
-
-        ``repo``
-            is the Repo
-
-        ``proc``
-            git-rev-list process instance (raw format)
-
-        ``from_rev_list``
-            If True, the stream was created by rev-list in which case we parse 
-            the message differently
-        Returns
-            iterator returning Commit objects
-        """
-        stream = proc_or_stream
-        if not hasattr(stream,'next'):
-            stream = proc_or_stream.stdout
-            
-        for line in stream:
-            commit_tokens = line.split() 
-            id = commit_tokens[1]
-            assert commit_tokens[0] == "commit"
-            tree = stream.next().split()[1]
-
-            parents = []
-            next_line = None
-            for parent_line in stream:
-                if not parent_line.startswith('parent'):
-                    next_line = parent_line
-                    break
-                # END abort reading parents
-                parents.append(parent_line.split()[-1])
-            # END for each parent line
-            
-            author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
-            committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-            
-            # empty line
-            stream.next()
-            
-            message_lines = []
-            if from_rev_list:
-                for msg_line in stream:
-                    if not msg_line.startswith('    '):
-                        # and forget about this empty marker
-                        break
-                    # END abort message reading 
-                    # strip leading 4 spaces
-                    message_lines.append(msg_line[4:])
-                # END while there are message lines
-            else:
-                # a stream from our data simply gives us the plain message
-                for msg_line in stream:
-                    message_lines.append(msg_line)
-            # END message parsing
-            message = '\n'.join(message_lines)
-            
-            yield Commit(repo, id, parents=tuple(parents), tree=tree,
-                         author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
-                         committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
-                         message=message)
-        # END for each line in stream
-        
-        
-    @classmethod
-    def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
-        """
-        Commit the given tree, creating a commit object.
-        
-        ``repo``
-            is the Repo
-            
-        ``tree``
-            Sha of a tree or a tree object to become the tree of the new commit
-        
-        ``message``
-            Commit message. It may be an empty string if no message is provided.
-            It will be converted to a string in any case.
-            
-        ``parent_commits``
-            Optional Commit objects to use as parents for the new commit.
-            If empty list, the commit will have no parents at all and become 
-            a root commit.
-            If None , the current head commit will be the parent of the 
-            new commit object
-            
-        ``head``
-            If True, the HEAD will be advanced to the new commit automatically.
-            Else the HEAD will remain pointing on the previous commit. This could 
-            lead to undesired results when diffing files.
-            
-        Returns
-            Commit object representing the new commit
-            
-        Note:
-            Additional information about hte committer and Author are taken from the
-            environment or from the git configuration, see git-commit-tree for 
-            more information
-        """
-        parents = parent_commits
-        if parent_commits is None:
-            try:
-                parent_commits = [ repo.head.commit ]
-            except ValueError:
-                # empty repositories have no head commit
-                parent_commits = list()
-            # END handle parent commits
-        # END if parent commits are unset
-        
-        parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-        
-        # create message stream
-        tmp_file_path = tempfile.mktemp()
-        fp = open(tmp_file_path,"wb")
-        fp.write(str(message))
-        fp.close()
-        fp = open(tmp_file_path,"rb")
-        fp.seek(0)
-        
-        try:
-            # write the current index as tree
-            commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
-            new_commit = cls(repo, commit_sha)
-            
-            if head:
-                try:
-                    repo.head.commit = new_commit
-                except ValueError:
-                    # head is not yet set to the ref our HEAD points to.
-                    import git.refs
-                    master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
-                    repo.head.reference = master
-                # END handle empty repositories
-            # END advance head handling 
-            
-            return new_commit
-        finally:
-            fp.close()
-            os.remove(tmp_file_path)
-        
-    def __str__(self):
-        """ Convert commit to string which is SHA1 """
-        return self.sha
-
-    def __repr__(self):
-        return '<git.Commit "%s">' % self.sha
 
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
+	"""
+	Wraps a git Commit object.
+	
+	This class will act lazily on some of its attributes and will query the 
+	value on demand only if it involves calling the git binary.
+	"""
+	
+	# ENVIRONMENT VARIABLES
+	# read when creating new commits
+	env_author_name = "GIT_AUTHOR_NAME"
+	env_author_email = "GIT_AUTHOR_EMAIL"
+	env_author_date = "GIT_AUTHOR_DATE"
+	env_committer_name = "GIT_COMMITTER_NAME"
+	env_committer_email = "GIT_COMMITTER_EMAIL"
+	env_committer_date = "GIT_COMMITTER_DATE"
+	env_email = "EMAIL"
+	
+	# CONFIGURATION KEYS
+	conf_email = 'email'
+	conf_name = 'name'
+	conf_encoding = 'i18n.commitencoding'
+	
+	# INVARIANTS
+	default_encoding = "UTF-8"
+	
+	
+	# object configuration 
+	type = "commit"
+	__slots__ = ("tree",
+				 "author", "authored_date", "author_tz_offset",
+				 "committer", "committed_date", "committer_tz_offset",
+				 "message", "parents", "encoding")
+	_id_attribute_ = "sha"
+	
+	def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+				 committer=None, committed_date=None, committer_tz_offset=None, 
+				 message=None,  parents=None, encoding=None):
+		"""
+		Instantiate a new Commit. All keyword arguments taking None as default will 
+		be implicitly set if id names a valid sha. 
+		
+		The parameter documentation indicates the type of the argument after a colon ':'.
+
+		:param sha: is the sha id of the commit or a ref
+		:param parents: tuple( Commit, ... ) 
+			is a tuple of commit ids or actual Commits
+		:param tree: Tree
+			is the corresponding tree id or an actual Tree
+		:param author: Actor
+			is the author string ( will be implicitly converted into an Actor object )
+		:param authored_date: int_seconds_since_epoch
+			is the authored DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param author_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param committer: Actor
+			is the committer string
+		:param committed_date: int_seconds_since_epoch
+			is the committed DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param committer_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param message: string
+			is the commit message
+		:param encoding: string
+			encoding of the message, defaults to UTF-8
+		:return: git.Commit
+		
+		:note: Timezone information is in the same format and in the same sign 
+			as what time.altzone returns. The sign is inverted compared to git's 
+			UTC timezone.
+		"""
+		super(Commit,self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+		
+	@classmethod
+	def _get_intermediate_items(cls, commit):
+		return commit.parents
+
+	def _set_cache_(self, attr):
+		""" Called by LazyMixin superclass when the given uninitialized member needs 
+		to be set.
+		We set all values at once. """
+		if attr in Commit.__slots__:
+			# read the data in a chunk, its faster - then provide a file wrapper
+			# Could use self.data, but lets try to get it with less calls
+			hexsha, typename, size, data = self.repo.git.get_object_data(self)
+			self._deserialize(StringIO(data))
+		else:
+			super(Commit, self)._set_cache_(attr)
+
+	@property
+	def summary(self):
+		"""
+		Returns
+			First line of the commit message.
+		"""
+		return self.message.split('\n', 1)[0]
+		
+	def count(self, paths='', **kwargs):
+		"""
+		Count the number of commits reachable from this commit
+
+		``paths``
+			is an optinal path or a list of paths restricting the return value 
+			to commits actually containing the paths
+
+		``kwargs``
+			Additional options to be passed to git-rev-list. They must not alter
+			the ouput style of the command, or parsing will yield incorrect results
+		Returns
+			int
+		"""
+		# yes, it makes a difference whether empty paths are given or not in our case
+		# as the empty paths version will ignore merge commits for some reason.
+		if paths:
+			return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+		else:
+			return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+		
+
+	@property
+	def name_rev(self):
+		"""
+		Returns
+			String describing the commits hex sha based on the closest Reference.
+			Mostly useful for UI purposes
+		"""
+		return self.repo.git.name_rev(self)
+
+	@classmethod
+	def iter_items(cls, repo, rev, paths='', **kwargs):
+		"""
+		Find all commits matching the given criteria.
+
+		``repo``
+			is the Repo
+
+		``rev``
+			revision specifier, see git-rev-parse for viable options
+
+		``paths``
+			is an optinal path or list of paths, if set only Commits that include the path 
+			or paths will be considered
+
+		``kwargs``
+			optional keyword arguments to git rev-list where
+			``max_count`` is the maximum number of commits to fetch
+			``skip`` is the number of commits to skip
+			``since`` all commits since i.e. '1970-01-01'
+
+		Returns
+			iterator yielding Commit items
+		"""
+		if 'pretty' in kwargs:
+			raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+		# END handle pretty
+		args = list()
+		if paths:
+			args.extend(('--', paths))
+		# END if paths
+
+		proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+		return cls._iter_from_process_or_stream(repo, proc)
+		
+	def iter_parents(self, paths='', **kwargs):
+		"""
+		Iterate _all_ parents of this commit.
+		
+		``paths``
+			Optional path or list of paths limiting the Commits to those that 
+			contain at least one of the paths
+		
+		``kwargs``
+			All arguments allowed by git-rev-list
+			
+		Return:
+			Iterator yielding Commit objects which are parents of self
+		"""
+		# skip ourselves
+		skip = kwargs.get("skip", 1)
+		if skip == 0:	# skip ourselves 
+			skip = 1
+		kwargs['skip'] = skip
+		
+		return self.iter_items( self.repo, self, paths, **kwargs )
+
+	@property
+	def stats(self):
+		"""
+		Create a git stat from changes between this commit and its first parent 
+		or from all changes done if this is the very first commit.
+		
+		Return
+			git.Stats
+		"""
+		if not self.parents:
+			text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+			text2 = ""
+			for line in text.splitlines()[1:]:
+				(insertions, deletions, filename) = line.split("\t")
+				text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+			text = text2
+		else:
+			text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+		return stats.Stats._list_from_string(self.repo, text)
+
+	@classmethod
+	def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+		"""Parse out commit information into a list of Commit objects
+		We expect one-line per commit, and parse the actual commit information directly
+		from our lighting fast object database
+
+		:param proc: git-rev-list process instance - one sha per line
+		:return: iterator returning Commit objects"""
+		stream = proc_or_stream
+		if not hasattr(stream,'readline'):
+			stream = proc_or_stream.stdout
+			
+		readline = stream.readline
+		while True:
+			line = readline()
+			if not line:
+				break
+			sha = line.strip()
+			if len(sha) > 40:
+				# split additional information, as returned by bisect for instance
+				sha, rest = line.split(None, 1)
+			# END handle extra info
+			
+			assert len(sha) == 40, "Invalid line: %s" % sha
+			yield Commit(repo, sha)
+		# END for each line in stream
+		
+		
+	@classmethod
+	def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+		"""Commit the given tree, creating a commit object.
+		
+		:param repo: Repo object the commit should be part of 
+		:param tree: Sha of a tree or a tree object to become the tree of the new commit
+		:param message: Commit message. It may be an empty string if no message is provided.
+			It will be converted to a string in any case.
+		:param parent_commits:
+			Optional Commit objects to use as parents for the new commit.
+			If empty list, the commit will have no parents at all and become 
+			a root commit.
+			If None , the current head commit will be the parent of the 
+			new commit object
+		:param head:
+			If True, the HEAD will be advanced to the new commit automatically.
+			Else the HEAD will remain pointing on the previous commit. This could 
+			lead to undesired results when diffing files.
+			
+		:return: Commit object representing the new commit
+			
+		:note:
+			Additional information about the committer and Author are taken from the
+			environment or from the git configuration, see git-commit-tree for 
+			more information
+		"""
+		parents = parent_commits
+		if parent_commits is None:
+			try:
+				parent_commits = [ repo.head.commit ]
+			except ValueError:
+				# empty repositories have no head commit
+				parent_commits = list()
+			# END handle parent commits
+		# END if parent commits are unset
+		
+		# retrieve all additional information, create a commit object, and 
+		# serialize it
+		# Generally: 
+		# * Environment variables override configuration values
+		# * Sensible defaults are set according to the git documentation
+		
+		# COMMITER AND AUTHOR INFO
+		cr = repo.config_reader()
+		env = os.environ
+		default_email = utils.get_user_id()
+		default_name = default_email.split('@')[0]
+		
+		conf_name = cr.get_value('user', cls.conf_name, default_name)
+		conf_email = cr.get_value('user', cls.conf_email, default_email)
+		
+		author_name = env.get(cls.env_author_name, conf_name)
+		author_email = env.get(cls.env_author_email, default_email)
+		
+		committer_name = env.get(cls.env_committer_name, conf_name)
+		committer_email = env.get(cls.env_committer_email, conf_email)
+		
+		# PARSE THE DATES
+		unix_time = int(time.time())
+		offset = time.altzone
+		
+		author_date_str = env.get(cls.env_author_date, '')
+		if author_date_str:
+			author_time, author_offset = utils.parse_date(author_date_str)
+		else:
+			author_time, author_offset = unix_time, offset
+		# END set author time
+		
+		committer_date_str = env.get(cls.env_committer_date, '')
+		if committer_date_str: 
+			committer_time, committer_offset = utils.parse_date(committer_date_str)
+		else:
+			committer_time, committer_offset = unix_time, offset
+		# END set committer time
+		
+		# assume utf8 encoding
+		enc_section, enc_option = cls.conf_encoding.split('.')
+		conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
+		
+		author = Actor(author_name, author_email)
+		committer = Actor(committer_name, committer_email)
+		
+		
+		# CREATE NEW COMMIT
+		new_commit = cls(repo, cls.NULL_HEX_SHA, tree, 
+						author, author_time, author_offset, 
+						committer, committer_time, committer_offset,
+						message, parent_commits, conf_encoding)
+		
+		stream = StringIO()
+		new_commit._serialize(stream)
+		streamlen = stream.tell()
+		stream.seek(0)
+		
+		istream = repo.odb.store(IStream(cls.type, streamlen, stream))
+		new_commit.sha = istream.sha
+		
+		if head:
+			try:
+				repo.head.commit = new_commit
+			except ValueError:
+				# head is not yet set to the ref our HEAD points to
+				# Happens on first commit
+				import git.refs
+				master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+				repo.head.reference = master
+			# END handle empty repositories
+		# END advance head handling 
+		
+		return new_commit
+	
+		
+	def __str__(self):
+		""" Convert commit to string which is SHA1 """
+		return self.sha
+
+	def __repr__(self):
+		return '<git.Commit "%s">' % self.sha
+
+	#{ Serializable Implementation
+	
+	def _serialize(self, stream):
+		write = stream.write
+		write("tree %s\n" % self.tree)
+		for p in self.parents:
+			write("parent %s\n" % p)
+			
+		a = self.author
+		c = self.committer
+		fmt = "%s %s <%s> %s %s\n"
+		write(fmt % ("author", a.name, a.email, 
+						self.authored_date, 
+						utils.altz_to_utctz_str(self.author_tz_offset)))
+			
+		write(fmt % ("committer", c.name, c.email, 
+						self.committed_date,
+						utils.altz_to_utctz_str(self.committer_tz_offset)))
+		
+		if self.encoding != self.default_encoding:
+			write("encoding %s\n" % self.encoding)
+		
+		write("\n")
+		write(self.message)
+		return self
+	
+	def _deserialize(self, stream):
+		""":param from_rev_list: if true, the stream format is coming from the rev-list command
+		Otherwise it is assumed to be a plain data stream from our object"""
+		readline = stream.readline
+		self.tree = Tree(self.repo, readline().split()[1], 0, '')
+
+		self.parents = list()
+		next_line = None
+		while True:
+			parent_line = readline()
+			if not parent_line.startswith('parent'):
+				next_line = parent_line
+				break
+			# END abort reading parents
+			self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+		# END for each parent line
+		self.parents = tuple(self.parents)
+		
+		self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+		self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+		
+		
+		# now we can have the encoding line, or an empty line followed by the optional
+		# message.
+		self.encoding = self.default_encoding
+		# read encoding or empty line to separate message
+		enc = readline()
+		enc = enc.strip()
+		if enc:
+			self.encoding = enc[enc.find(' ')+1:]
+			# now comes the message separator 
+			readline()
+		# END handle encoding
+		
+		# a stream from our data simply gives us the plain message
+		# The end of our message stream is marked with a newline that we strip
+		self.message = stream.read()
+		return self
+		
+	#} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
                            visit_once = False, ignore_self=1 ):
         """For documentation, see utils.Traversable.traverse
         
-        Trees are set to visist_once = False to gain more performance in the traversal"""
+        Trees are set to visit_once = False to gain more performance in the traversal"""
         return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
 
     # List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,302 @@ Module for general utility functions
 import re
 from collections import deque as Deque
 from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date', 
+			'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 
+			'verify_utctz')
 
 def get_object_type_by_name(object_type_name):
-    """
-    Returns
-        type suitable to handle the given object type name.
-        Use the type to create new instances.
-        
-    ``object_type_name``
-        Member of TYPES
-        
-    Raises
-        ValueError: In case object_type_name is unknown
-    """
-    if object_type_name == "commit":
-        import commit
-        return commit.Commit
-    elif object_type_name == "tag":
-        import tag
-        return tag.TagObject
-    elif object_type_name == "blob":
-        import blob
-        return blob.Blob
-    elif object_type_name == "tree":
-        import tree
-        return tree.Tree
-    else:
-        raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-        
-    
+	"""
+	Returns
+		type suitable to handle the given object type name.
+		Use the type to create new instances.
+		
+	``object_type_name``
+		Member of TYPES
+		
+	Raises
+		ValueError: In case object_type_name is unknown
+	"""
+	if object_type_name == "commit":
+		import commit
+		return commit.Commit
+	elif object_type_name == "tag":
+		import tag
+		return tag.TagObject
+	elif object_type_name == "blob":
+		import blob
+		return blob.Blob
+	elif object_type_name == "tree":
+		import tree
+		return tree.Tree
+	else:
+		raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+		
+
+def get_user_id():
+	""":return: string identifying the currently active system user as name@node
+	:note: user can be set with the 'USER' environment variable, usually set on windows"""
+	ukn = 'UNKNOWN'
+	username = os.environ.get('USER', ukn)
+	if username == ukn and hasattr(os, 'getlogin'):
+		username = os.getlogin()
+	# END get username from login
+	return "%s@%s" % (username, platform.node())
+		
+
+def utctz_to_altz(utctz):
+	"""we convert utctz to the timezone in seconds, it is the format time.altzone
+	returns. Git stores it as UTC timezon which has the opposite sign as well, 
+	which explains the -1 * ( that was made explicit here )
+	:param utctz: git utc timezone string, i.e. +0200"""
+	return -1 * int(float(utctz)/100*3600)
+	
+def altz_to_utctz_str(altz):
+	"""As above, but inverses the operation, returning a string that can be used
+	in commit objects"""
+	utci = -1 * int((altz / 3600)*100)
+	utcs = str(abs(utci))
+	utcs = "0"*(4-len(utcs)) + utcs
+	prefix = (utci < 0 and '-') or '+'
+	return prefix + utcs
+	
+
+def verify_utctz(offset):
+	""":raise ValueError: if offset is incorrect
+	:return: offset"""
+	fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+	if len(offset) != 5:
+		raise fmt_exc
+	if offset[0] not in "+-":
+		raise fmt_exc
+	if	offset[1] not in digits or \
+		offset[2] not in digits or \
+		offset[3] not in digits or \
+		offset[4] not in digits:
+		raise fmt_exc
+	# END for each char
+	return offset
+
+def parse_date(string_date):
+	"""
+	Parse the given date as one of the following
+		* Git internal format: timestamp offset
+		* RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. 
+		* ISO 8601 2005-04-07T22:13:13
+		 The T can be a space as well
+		 
+	:return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+	:raise ValueError: If the format could not be understood
+	:note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY 
+	"""
+	# git time
+	try:
+		if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+			timestamp, offset = string_date.split()
+			timestamp = int(timestamp)
+			return timestamp, utctz_to_altz(verify_utctz(offset))
+		else:
+			offset = "+0000"					# local time by default
+			if string_date[-5] in '-+':
+				offset = verify_utctz(string_date[-5:])
+				string_date = string_date[:-6]	# skip space as well
+			# END split timezone info
+			
+			# now figure out the date and time portion - split time
+			date_formats = list()
+			splitter = -1
+			if ',' in string_date:
+				date_formats.append("%a, %d %b %Y")
+				splitter = string_date.rfind(' ')
+			else:
+				# iso plus additional
+				date_formats.append("%Y-%m-%d")
+				date_formats.append("%Y.%m.%d")
+				date_formats.append("%m/%d/%Y")
+				date_formats.append("%d.%m.%Y")
+				
+				splitter = string_date.rfind('T')
+				if splitter == -1:
+					splitter = string_date.rfind(' ')
+				# END handle 'T' and ' '
+			# END handle rfc or iso 
+			
+			assert splitter > -1
+			
+			# split date and time
+			time_part = string_date[splitter+1:]	# skip space
+			date_part = string_date[:splitter]
+			
+			# parse time
+			tstruct = time.strptime(time_part, "%H:%M:%S")
+			
+			for fmt in date_formats:
+				try:
+					dtstruct = time.strptime(date_part, fmt)
+					fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, 
+												tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+												dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+					return int(time.mktime(fstruct)), utctz_to_altz(offset)
+				except ValueError:
+					continue
+				# END exception handling
+			# END for each fmt
+			
+			# still here ? fail
+			raise ValueError("no format matched")
+		# END handle format
+	except Exception:
+		raise ValueError("Unsupported date format: %s" % string_date)  
+	# END handle exceptions
+
+	
 # precompiled regex
 _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
 
 def parse_actor_and_date(line):
-    """
-    Parse out the actor (author or committer) info from a line like::
-    
-     author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-    
-    Returns
-        [Actor, int_seconds_since_epoch, int_timezone_offset]
-    """
-    m = _re_actor_epoch.search(line)
-    actor, epoch, offset = m.groups()
-    return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-    
-    
-    
+	"""
+	Parse out the actor (author or committer) info from a line like::
+	
+	 author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+	
+	Returns
+		[Actor, int_seconds_since_epoch, int_timezone_offset]
+	"""
+	m = _re_actor_epoch.search(line)
+	actor, epoch, offset = m.groups()
+	return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
+	
+	
+	
 class ProcessStreamAdapter(object):
-    """
-    Class wireing all calls to the contained Process instance.
-    
-    Use this type to hide the underlying process to provide access only to a specified 
-    stream. The process is usually wrapped into an AutoInterrupt class to kill 
-    it if the instance goes out of scope.
-    """
-    __slots__ = ("_proc", "_stream")
-    def __init__(self, process, stream_name):
-        self._proc = process
-        self._stream = getattr(process, stream_name)
-    
-    def __getattr__(self, attr):
-        return getattr(self._stream, attr)
-        
-        
+	"""
+	Class wireing all calls to the contained Process instance.
+	
+	Use this type to hide the underlying process to provide access only to a specified 
+	stream. The process is usually wrapped into an AutoInterrupt class to kill 
+	it if the instance goes out of scope.
+	"""
+	__slots__ = ("_proc", "_stream")
+	def __init__(self, process, stream_name):
+		self._proc = process
+		self._stream = getattr(process, stream_name)
+	
+	def __getattr__(self, attr):
+		return getattr(self._stream, attr)
+		
+		
 class Traversable(object):
-    """Simple interface to perforam depth-first or breadth-first traversals 
-    into one direction.
-    Subclasses only need to implement one function.
-    Instances of the Subclass must be hashable"""
-    __slots__ = tuple()
-    
-    @classmethod
-    def _get_intermediate_items(cls, item):
-        """
-        Returns:
-            List of items connected to the given item.
-            Must be implemented in subclass
-        """
-        raise NotImplementedError("To be implemented in subclass")
-            
-    
-    def traverse( self, predicate = lambda i,d: True,
-                           prune = lambda i,d: False, depth = -1, branch_first=True,
-                           visit_once = True, ignore_self=1, as_edge = False ):
-        """
-        ``Returns``
-            iterator yieling of items found when traversing self
-            
-        ``predicate``
-            f(i,d) returns False if item i at depth d should not be included in the result
-            
-        ``prune``
-            f(i,d) return True if the search should stop at item i at depth d.
-            Item i will not be returned.
-            
-        ``depth``
-            define at which level the iteration should not go deeper
-            if -1, there is no limit
-            if 0, you would effectively only get self, the root of the iteration
-            i.e. if 1, you would only get the first level of predessessors/successors
-            
-        ``branch_first``
-            if True, items will be returned branch first, otherwise depth first
-            
-        ``visit_once``
-            if True, items will only be returned once, although they might be encountered
-            several times. Loops are prevented that way.
-        
-        ``ignore_self``
-            if True, self will be ignored and automatically pruned from
-            the result. Otherwise it will be the first item to be returned.
-            If as_edge is True, the source of the first edge is None
-            
-        ``as_edge``
-            if True, return a pair of items, first being the source, second the 
-            destinatination, i.e. tuple(src, dest) with the edge spanning from 
-            source to destination"""
-        visited = set()
-        stack = Deque()
-        stack.append( ( 0 ,self, None ) )       # self is always depth level 0
-    
-        def addToStack( stack, item, branch_first, depth ):
-            lst = self._get_intermediate_items( item )
-            if not lst:
-                return
-            if branch_first:
-                stack.extendleft( ( depth , i, item ) for i in lst )
-            else:
-                reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
-                stack.extend( reviter )
-        # END addToStack local method
-    
-        while stack:
-            d, item, src = stack.pop()          # depth of item, item, item_source
-            
-            if visit_once and item in visited:
-                continue
-                
-            if visit_once:
-                visited.add(item)
-            
-            rval = ( as_edge and (src, item) ) or item
-            if prune( rval, d ):
-                continue
-    
-            skipStartItem = ignore_self and ( item == self )
-            if not skipStartItem and predicate( rval, d ):
-                yield rval
-    
-            # only continue to next level if this is appropriate !
-            nd = d + 1
-            if depth > -1 and nd > depth:
-                continue
-    
-            addToStack( stack, item, branch_first, nd )
-        # END for each item on work stack
+	"""Simple interface to perforam depth-first or breadth-first traversals 
+	into one direction.
+	Subclasses only need to implement one function.
+	Instances of the Subclass must be hashable"""
+	__slots__ = tuple()
+	
+	@classmethod
+	def _get_intermediate_items(cls, item):
+		"""
+		Returns:
+			List of items connected to the given item.
+			Must be implemented in subclass
+		"""
+		raise NotImplementedError("To be implemented in subclass")
+			
+	
+	def traverse( self, predicate = lambda i,d: True,
+						   prune = lambda i,d: False, depth = -1, branch_first=True,
+						   visit_once = True, ignore_self=1, as_edge = False ):
+		"""
+		``Returns``
+			iterator yieling of items found when traversing self
+			
+		``predicate``
+			f(i,d) returns False if item i at depth d should not be included in the result
+			
+		``prune``
+			f(i,d) return True if the search should stop at item i at depth d.
+			Item i will not be returned.
+			
+		``depth``
+			define at which level the iteration should not go deeper
+			if -1, there is no limit
+			if 0, you would effectively only get self, the root of the iteration
+			i.e. if 1, you would only get the first level of predessessors/successors
+			
+		``branch_first``
+			if True, items will be returned branch first, otherwise depth first
+			
+		``visit_once``
+			if True, items will only be returned once, although they might be encountered
+			several times. Loops are prevented that way.
+		
+		``ignore_self``
+			if True, self will be ignored and automatically pruned from
+			the result. Otherwise it will be the first item to be returned.
+			If as_edge is True, the source of the first edge is None
+			
+		``as_edge``
+			if True, return a pair of items, first being the source, second the 
+			destinatination, i.e. tuple(src, dest) with the edge spanning from 
+			source to destination"""
+		visited = set()
+		stack = Deque()
+		stack.append( ( 0 ,self, None ) )		# self is always depth level 0
+	
+		def addToStack( stack, item, branch_first, depth ):
+			lst = self._get_intermediate_items( item )
+			if not lst:
+				return
+			if branch_first:
+				stack.extendleft( ( depth , i, item ) for i in lst )
+			else:
+				reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+				stack.extend( reviter )
+		# END addToStack local method
+	
+		while stack:
+			d, item, src = stack.pop()			# depth of item, item, item_source
+			
+			if visit_once and item in visited:
+				continue
+				
+			if visit_once:
+				visited.add(item)
+			
+			rval = ( as_edge and (src, item) ) or item
+			if prune( rval, d ):
+				continue
+	
+			skipStartItem = ignore_self and ( item == self )
+			if not skipStartItem and predicate( rval, d ):
+				yield rval
+	
+			# only continue to next level if this is appropriate !
+			nd = d + 1
+			if depth > -1 and nd > depth:
+				continue
+	
+			addToStack( stack, item, branch_first, nd )
+		# END for each item on work stack
+		
+
+class Serializable(object):
+	"""Defines methods to serialize and deserialize objects from and into a data stream"""
+	
+	def _serialize(self, stream):
+		"""Serialize the data of this object into the given data stream
+		:note: a serialized object would ``_deserialize`` into the same objet
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def _deserialize(self, stream):
+		"""Deserialize all information regarding this object from the stream
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py
new file mode 100644
index 00000000..5789d7eb
--- /dev/null
+++ b/lib/git/odb/__init__.py
@@ -0,0 +1,6 @@
+"""Initialize the object database module"""
+
+# default imports
+from db import *
+from stream import *
+
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
new file mode 100644
index 00000000..a8de28ec
--- /dev/null
+++ b/lib/git/odb/db.py
@@ -0,0 +1,337 @@
+"""Contains implementations of database retrieveing objects"""
+from git.utils import IndexFileSHA1Writer
+from git.errors import (
+	InvalidDBRoot, 
+	BadObject, 
+	BadObjectType
+	)
+
+from stream import (
+		DecompressMemMapReader,
+		FDCompressedSha1Writer,
+		Sha1Writer,
+		OStream,
+		OInfo
+	)
+
+from utils import (
+		ENOENT,
+		to_hex_sha,
+		exists,
+		hex_to_bin,
+		isdir,
+		mkdir,
+		rename,
+		dirname,
+		join
+	)
+
+from fun import ( 
+	chunk_size,
+	loose_object_header_info, 
+	write_object
+	)
+
+import tempfile
+import mmap
+import os
+
+
+__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB', 
+			'CompoundDB', 'ReferenceDB', 'GitObjectDB' )
+
+class ObjectDBR(object):
+	"""Defines an interface for object database lookup.
+	Objects are identified either by hex-sha (40 bytes) or 
+	by sha (20 bytes)"""
+	
+	def __contains__(self, sha):
+		return self.has_obj
+	
+	#{ Query Interface 
+	def has_object(self, sha):
+		"""
+		:return: True if the object identified by the given 40 byte hexsha or 20 bytes
+			binary sha is contained in the database
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def info(self, sha):
+		""" :return: OInfo instance
+		:param sha: 40 bytes hexsha or 20 bytes binary sha
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def info_async(self, input_channel):
+		"""Retrieve information of a multitude of objects asynchronously
+		:param input_channel: Channel yielding the sha's of the objects of interest
+		:return: Channel yielding OInfo|InvalidOInfo, in any order"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream(self, sha):
+		""":return: OStream instance
+		:param sha: 40 bytes hexsha or 20 bytes binary sha
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream_async(self, input_channel):
+		"""Retrieve the OStream of multiple objects
+		:param input_channel: see ``info``
+		:param max_threads: see ``ObjectDBW.store``
+		:return: Channel yielding OStream|InvalidOStream instances in any order"""
+		raise NotImplementedError("To be implemented in subclass")
+			
+	#} END query interface
+	
+class ObjectDBW(object):
+	"""Defines an interface to create objects in the database"""
+	
+	def __init__(self, *args, **kwargs):
+		self._ostream = None
+	
+	#{ Edit Interface
+	def set_ostream(self, stream):
+		"""Adjusts the stream to which all data should be sent when storing new objects
+		:param stream: if not None, the stream to use, if None the default stream
+			will be used.
+		:return: previously installed stream, or None if there was no override
+		:raise TypeError: if the stream doesn't have the supported functionality"""
+		cstream = self._ostream
+		self._ostream = stream
+		return cstream
+		
+	def ostream(self):
+		""":return: overridden output stream this instance will write to, or None
+			if it will write to the default stream"""
+		return self._ostream
+	
+	def store(self, istream):
+		"""Create a new object in the database
+		:return: the input istream object with its sha set to its corresponding value
+		:param istream: IStream compatible instance. If its sha is already set 
+			to a value, the object will just be stored in the our database format, 
+			in which case the input stream is expected to be in object format ( header + contents ).
+		:raise IOError: if data could not be written"""
+		raise NotImplementedError("To be implemented in subclass")
+	
+	def store_async(self, input_channel):
+		"""Create multiple new objects in the database asynchronously. The method will 
+		return right away, returning an output channel which receives the results as 
+		they are computed.
+		
+		:return: Channel yielding your IStream which served as input, in any order.
+			The IStreams sha will be set to the sha it received during the process, 
+			or its error attribute will be set to the exception informing about the error.
+		:param input_channel: Channel yielding IStream instance.
+			As the same instances will be used in the output channel, you can create a map
+			between the id(istream) -> istream
+		:note:As some ODB implementations implement this operation as atomic, they might 
+			abort the whole operation if one item could not be processed. Hence check how 
+			many items have actually been produced."""
+		raise NotImplementedError("To be implemented in subclass")
+	
+	#} END edit interface
+	
+
+class FileDBBase(object):
+	"""Provides basic facilities to retrieve files of interest, including 
+	caching facilities to help mapping hexsha's to objects"""
+	
+	def __init__(self, root_path):
+		"""Initialize this instance to look for its files at the given root path
+		All subsequent operations will be relative to this path
+		:raise InvalidDBRoot: 
+		:note: The base will perform basic checking for accessability, but the subclass
+			is required to verify that the root_path contains the database structure it needs"""
+		super(FileDBBase, self).__init__()
+		if not os.path.isdir(root_path):
+			raise InvalidDBRoot(root_path)
+		self._root_path = root_path
+		
+		
+	#{ Interface 
+	def root_path(self):
+		""":return: path at which this db operates"""
+		return self._root_path
+	
+	def db_path(self, rela_path):
+		"""
+		:return: the given relative path relative to our database root, allowing 
+			to pontentially access datafiles"""
+		return join(self._root_path, rela_path)
+	#} END interface
+		
+	
+	
+class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
+	"""A database which operates on loose object files"""
+	
+	# CONFIGURATION
+	# chunks in which data will be copied between streams
+	stream_chunk_size = chunk_size
+	
+	
+	def __init__(self, root_path):
+		super(LooseObjectDB, self).__init__(root_path)
+		self._hexsha_to_file = dict()
+		# Additional Flags - might be set to 0 after the first failure
+		# Depending on the root, this might work for some mounts, for others not, which
+		# is why it is per instance
+		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+	
+	#{ Interface 
+	def object_path(self, hexsha):
+		"""
+		:return: path at which the object with the given hexsha would be stored, 
+			relative to the database root"""
+		return join(hexsha[:2], hexsha[2:])
+	
+	def readable_db_object_path(self, hexsha):
+		"""
+		:return: readable object path to the object identified by hexsha
+		:raise BadObject: If the object file does not exist"""
+		try:
+			return self._hexsha_to_file[hexsha]
+		except KeyError:
+			pass
+		# END ignore cache misses 
+			
+		# try filesystem
+		path = self.db_path(self.object_path(hexsha))
+		if exists(path):
+			self._hexsha_to_file[hexsha] = path
+			return path
+		# END handle cache
+		raise BadObject(hexsha)
+		
+	#} END interface
+	
+	def _map_loose_object(self, sha):
+		"""
+		:return: memory map of that file to allow random read access
+		:raise BadObject: if object could not be located"""
+		db_path = self.db_path(self.object_path(to_hex_sha(sha)))
+		try:
+			fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags)
+		except OSError,e:
+			if e.errno != ENOENT:
+				# try again without noatime
+				try:
+					fd = os.open(db_path, os.O_RDONLY)
+				except OSError:
+					raise BadObject(to_hex_sha(sha))
+				# didn't work because of our flag, don't try it again
+				self._fd_open_flags = 0
+			else:
+				raise BadObject(to_hex_sha(sha))
+			# END handle error
+		# END exception handling
+		try:
+			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+		finally:
+			os.close(fd)
+		# END assure file is closed
+		
+	def set_ostream(self, stream):
+		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
+		if stream is not None and not isinstance(stream, Sha1Writer):
+			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+		return super(LooseObjectDB, self).set_ostream(stream)
+			
+	def info(self, sha):
+		m = self._map_loose_object(sha)
+		try:
+			type, size = loose_object_header_info(m)
+			return OInfo(sha, type, size)
+		finally:
+			m.close()
+		# END assure release of system resources
+		
+	def stream(self, sha):
+		m = self._map_loose_object(sha)
+		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+		return OStream(sha, type, size, stream)
+		
+	def has_object(self, sha):
+		try:
+			self.readable_db_object_path(to_hex_sha(sha))
+			return True
+		except BadObject:
+			return False
+		# END check existance
+	
+	def store(self, istream):
+		"""note: The sha we produce will be hex by nature"""
+		assert istream.sha is None, "Direct istream writing not yet implemented"
+		tmp_path = None
+		writer = self.ostream()
+		if writer is None:
+			# open a tmp file to write the data to
+			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+			writer = FDCompressedSha1Writer(fd)
+		# END handle custom writer
+	
+		try:
+			try:
+				write_object(istream.type, istream.size, istream.read, writer.write,
+								chunk_size=self.stream_chunk_size)
+			except:
+				if tmp_path:
+					os.remove(tmp_path)
+				raise
+			# END assure tmpfile removal on error
+		finally:
+			if tmp_path:
+				writer.close()
+		# END assure target stream is closed
+		
+		sha = writer.sha(as_hex=True)
+		
+		if tmp_path:
+			obj_path = self.db_path(self.object_path(sha))
+			obj_dir = dirname(obj_path)
+			if not isdir(obj_dir):
+				mkdir(obj_dir)
+			# END handle destination directory
+			rename(tmp_path, obj_path)
+		# END handle dry_run
+		
+		istream.sha = sha
+		return istream
+	
+	
+class PackedDB(FileDBBase, ObjectDBR):
+	"""A database operating on a set of object packs"""
+	
+	
+class CompoundDB(ObjectDBR):
+	"""A database which delegates calls to sub-databases"""
+	
+
+class ReferenceDB(CompoundDB):
+	"""A database consisting of database referred to in a file"""
+	
+	
+#class GitObjectDB(CompoundDB, ObjectDBW):
+class GitObjectDB(LooseObjectDB):
+	"""A database representing the default git object store, which includes loose 
+	objects, pack files and an alternates file
+	
+	It will create objects only in the loose object database.
+	:note: for now, we use the git command to do all the lookup, just until he 
+		have packs and the other implementations
+	"""
+	def __init__(self, root_path, git):
+		"""Initialize this instance with the root and a git command"""
+		super(GitObjectDB, self).__init__(root_path)
+		self._git = git
+		
+	def info(self, sha):
+		t = self._git.get_object_header(sha)
+		return OInfo(t[0], t[1], t[2])
+		
+	def stream(self, sha):
+		"""For now, all lookup is done by git itself"""
+		t = self._git.stream_object_data(sha)
+		return OStream(t[0], t[1], t[2], t[3])
+	
diff --git a/lib/git/odb/fun.py b/lib/git/odb/fun.py
new file mode 100644
index 00000000..870a6f02
--- /dev/null
+++ b/lib/git/odb/fun.py
@@ -0,0 +1,108 @@
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+from git.errors import (
+	BadObjectType
+	)
+
+import zlib
+decompressobj = zlib.decompressobj
+
+
+# INVARIANTS
+type_id_to_type_map = 	{
+							1 : "commit",
+							2 : "tree",
+							3 : "blob",
+							4 : "tag"
+						}
+
+# used when dealing with larger streams
+chunk_size = 1000*1000
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'object_header_info', 
+			'write_object' )
+
+#{ Routines
+
+def is_loose_object(m):
+	""":return: True the file contained in memory map m appears to be a loose object.
+	Only the first two bytes are needed"""
+	b0, b1 = map(ord, m[:2])
+	word = (b0 << 8) + b1
+	return b0 == 0x78 and (word % 31) == 0
+
+def loose_object_header_info(m):
+	""":return: tuple(type_string, uncompressed_size_in_bytes) the type string of the 
+		object as well as its uncompressed size in bytes.
+	:param m: memory map from which to read the compressed object data"""
+	decompress_size = 8192		# is used in cgit as well
+	hdr = decompressobj().decompress(m, decompress_size)
+	type_name, size = hdr[:hdr.find("\0")].split(" ")
+	return type_name, int(size)
+	
+def object_header_info(m):
+	""":return: tuple(type_string, uncompressed_size_in_bytes 
+	:param mmap: mapped memory map. It will be 
+		seeked to the actual start of the object contents, which can be used
+		to initialize a zlib decompress object.
+	:note: This routine can only handle new-style objects which are assumably contained
+		in packs
+		"""
+	assert not is_loose_object(m), "Use loose_object_header_info instead"
+	
+	c = b0							# first byte
+	i = 1							# next char to read
+	type_id = (c >> 4) & 7			# numeric type
+	size = c & 15					# starting size
+	s = 4							# starting bit-shift size
+	while c & 0x80:
+		c = ord(m[i])
+		i += 1
+		size += (c & 0x7f) << s
+		s += 7
+	# END character loop
+	
+	# finally seek the map to the start of the data stream
+	m.seek(i)
+	try:
+		return (type_id_to_type_map[type_id], size)
+	except KeyError:
+		# invalid object type - we could try to be smart now and decode part 
+		# of the stream to get the info, problem is that we had trouble finding 
+		# the exact start of the content stream
+		raise BadObjectType(type_id)
+	# END handle exceptions
+	
+def write_object(type, size, read, write, chunk_size=chunk_size):
+	"""Write the object as identified by type, size and source_stream into the 
+	target_stream
+	
+	:param type: type string of the object
+	:param size: amount of bytes to write from source_stream
+	:param read: read method of a stream providing the content data
+	:param write: write method of the output stream
+	:param close_target_stream: if True, the target stream will be closed when 
+		the routine exits, even if an error is thrown
+	:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+	tbw = 0												# total num bytes written
+	dbw = 0												# num data bytes written
+	
+	# WRITE HEADER: type SP size NULL
+	tbw += write("%s %i\0" % (type, size))
+
+	# WRITE ALL DATA UP TO SIZE
+	while True:
+		cs = min(chunk_size, size-dbw)
+		data_len = write(read(cs))
+		dbw += data_len
+		if data_len < cs or dbw == size:
+			tbw += dbw
+			break
+		# END check for stream end
+	# END duplicate data
+	return tbw
+
+	
+#} END routines
diff --git a/lib/git/odb/stream.py b/lib/git/odb/stream.py
new file mode 100644
index 00000000..d1181382
--- /dev/null
+++ b/lib/git/odb/stream.py
@@ -0,0 +1,446 @@
+import zlib
+from cStringIO import StringIO
+from git.utils import make_sha
+import errno
+
+from utils import (
+		to_hex_sha,
+		to_bin_sha, 
+		write, 
+		close
+	)
+
+__all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream', 
+			'DecompressMemMapReader', 'FDCompressedSha1Writer')
+
+
+# ZLIB configuration
+# used when compressing objects - 1 to 9 ( slowest )
+Z_BEST_SPEED = 1
+
+
+#{ ODB Bases
+
+class OInfo(tuple):
+	"""Carries information about an object in an ODB, provdiing information 
+	about the sha of the object, the type_string as well as the uncompressed size
+	in bytes.
+	
+	It can be accessed using tuple notation and using attribute access notation::
+	
+		assert dbi[0] == dbi.sha
+		assert dbi[1] == dbi.type
+		assert dbi[2] == dbi.size
+	
+	The type is designed to be as lighteight as possible."""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, type, size):
+		return tuple.__new__(cls, (sha, type, size))
+	
+	def __init__(self, *args):
+		tuple.__init__(self)
+	
+	#{ Interface 
+	@property
+	def sha(self):
+		return self[0]
+		
+	@property
+	def type(self):
+		return self[1]
+		
+	@property
+	def size(self):
+		return self[2]
+	#} END interface
+
+
+class OStream(OInfo):
+	"""Base for object streams retrieved from the database, providing additional 
+	information about the stream.
+	Generally, ODB streams are read-only as objects are immutable"""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, type, size, stream, *args, **kwargs):
+		"""Helps with the initialization of subclasses"""
+		return tuple.__new__(cls, (sha, type, size, stream))
+	
+	
+	def __init__(self, *args, **kwargs):
+		tuple.__init__(self)
+	#{ Interface 
+	
+	def is_compressed(self):
+		""":return: True if reads of this stream yield zlib compressed data. Default False
+		:note: this does not imply anything about the actual internal storage.
+			Hence the data could be uncompressed, but read compressed, or vice versa"""
+		raise False
+		
+	#} END interface
+	
+	#{ Stream Reader Interface 
+	
+	def read(self, size=-1):
+		return self[3].read(size)
+		
+	#} END stream reader interface
+
+
+class IStream(list):
+	"""Represents an input content stream to be fed into the ODB. It is mutable to allow 
+	the ODB to record information about the operations outcome right in this instance.
+	
+	It provides interfaces for the OStream and a StreamReader to allow the instance
+	to blend in without prior conversion.
+	
+	The only method your content stream must support is 'read'"""
+	__slots__ = tuple()
+	
+	def __new__(cls, type, size, stream, sha=None, compressed=False):
+		return list.__new__(cls, (sha, type, size, stream, compressed, None))
+		
+	def __init__(self, type, size, stream, sha=None, compressed=None):
+		list.__init__(self, (sha, type, size, stream, compressed, None))
+	
+	#{ Interface 
+	
+	def hexsha(self):
+		""":return: our sha, hex encoded, 40 bytes"""
+		return to_hex_sha(self[0])
+	
+	def binsha(self):
+		""":return: our sha as binary, 20 bytes"""
+		return to_bin_sha(self[0])
+		
+	def _error(self):
+		""":return: the error that occurred when processing the stream, or None"""
+		return self[5]
+		
+	def _set_error(self, exc):
+		"""Set this input stream to the given exc, may be None to reset the error"""
+		self[5] = exc
+			
+	error = property(_error, _set_error)
+	
+	#} END interface
+	
+	#{ Stream Reader Interface
+	
+	def read(self, size=-1):
+		"""Implements a simple stream reader interface, passing the read call on 
+			to our internal stream"""
+		return self[3].read(size)
+		
+	#} END stream reader interface 
+	
+	#{  interface
+	
+	def _set_sha(self, sha):
+		self[0] = sha
+		
+	def _sha(self):
+		return self[0]
+		
+	sha = property(_sha, _set_sha)
+	
+	
+	def _type(self):
+		return self[1]
+	
+	def _set_type(self, type):
+		self[1] = type
+		
+	type = property(_type, _set_type)
+	
+	def _size(self):
+		return self[2]
+		
+	def _set_size(self, size):
+		self[2] = size
+	
+	size = property(_size, _set_size)
+	
+	def _stream(self):
+		return self[3]
+		
+	def _set_stream(self, stream):
+		self[3] = stream
+	
+	stream = property(_stream, _set_stream)
+	
+	#} END odb info interface 
+	
+	#{ OStream interface 
+	
+	def is_compressed(self):
+		return self[4]
+		
+	#} END OStream interface
+		
+
+class InvalidOInfo(tuple):
+	"""Carries information about a sha identifying an object which is invalid in 
+	the queried database. The exception attribute provides more information about
+	the cause of the issue"""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, exc):
+		return tuple.__new__(cls, (sha, exc))
+		
+	def __init__(self, sha, exc):
+		tuple.__init__(self, (sha, exc))
+	
+	@property
+	def sha(self):
+		return self[0]
+		
+	@property
+	def error(self):
+		""":return: exception instance explaining the failure"""
+		return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+	"""Carries information about an invalid ODB stream"""
+	__slots__ = tuple()
+	
+#} END ODB Bases
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(object):
+	"""Reads data in chunks from a memory map and decompresses it. The client sees 
+	only the uncompressed data, respective file-like read calls are handling on-demand
+	buffered decompression accordingly
+	
+	A constraint on the total size of bytes is activated, simulating 
+	a logical file within a possibly larger physical memory area
+	
+	To read efficiently, you clearly don't want to read individual bytes, instead, 
+	read a few kilobytes at least.
+	
+	:note: The chunk-size should be carefully selected as it will involve quite a bit 
+		of string copying due to the way the zlib is implemented. Its very wasteful, 
+		hence we try to find a good tradeoff between allocation time and number of 
+		times we actually allocate. An own zlib implementation would be good here
+		to better support streamed reading - it would only need to keep the mmap
+		and decompress it into chunks, thats all ... """
+	__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close')
+	
+	max_read_size = 512*1024
+	
+	def __init__(self, m, close_on_deletion, size):
+		"""Initialize with mmap for stream reading"""
+		self._m = m
+		self._zip = zlib.decompressobj()
+		self._buf = None						# buffer of decompressed bytes
+		self._buflen = 0						# length of bytes in buffer
+		self._s = size							# size of uncompressed data to read in total
+		self._br = 0							# num uncompressed bytes read
+		self._cws = 0							# start byte of compression window
+		self._cwe = 0							# end byte of compression window
+		self._close = close_on_deletion			# close the memmap on deletion ?
+		
+	def __del__(self):
+		if self._close:
+			self._m.close()
+		# END handle resource freeing
+		
+	@classmethod
+	def new(self, m, close_on_deletion=False):
+		"""Create a new DecompressMemMapReader instance for acting as a read-only stream
+		This method parses the object header from m and returns the parsed 
+		type and size, as well as the created stream instance.
+		:param m: memory map on which to oparate
+		:param close_on_deletion: if True, the memory map will be closed once we are 
+			being deleted"""
+		inst = DecompressMemMapReader(m, close_on_deletion, 0)
+		
+		# read header
+		maxb = 512				# should really be enough, cgit uses 8192 I believe
+		inst._s = maxb
+		hdr = inst.read(maxb)
+		hdrend = hdr.find("\0")
+		type, size = hdr[:hdrend].split(" ")
+		size = int(size)
+		inst._s = size
+		
+		# adjust internal state to match actual header length that we ignore
+		# The buffer will be depleted first on future reads
+		inst._br = 0
+		hdrend += 1									# count terminating \0
+		inst._buf = StringIO(hdr[hdrend:])
+		inst._buflen = len(hdr) - hdrend
+		
+		return type, size, inst
+		
+	def read(self, size=-1):
+		if size < 1:
+			size = self._s - self._br
+		else:
+			size = min(size, self._s - self._br)
+		# END clamp size
+		
+		if size == 0:
+			return str()
+		# END handle depletion
+		
+		# protect from memory peaks
+		# If he tries to read large chunks, our memory patterns get really bad
+		# as we end up copying a possibly huge chunk from our memory map right into
+		# memory. This might not even be possible. Nonetheless, try to dampen the 
+		# effect a bit by reading in chunks, returning a huge string in the end.
+		# Our performance now depends on StringIO. This way we don't need two large
+		# buffers in peak times, but only one large one in the end which is 
+		# the return buffer
+		# NO: We don't do it - if the user thinks its best, he is right. If he 
+		# has trouble, he will start reading in chunks. According to our tests
+		# its still faster if we read 10 Mb at once instead of chunking it.
+		
+		# if size > self.max_read_size:
+			# sio = StringIO()
+			# while size:
+				# read_size = min(self.max_read_size, size)
+				# data = self.read(read_size)
+				# sio.write(data)
+				# size -= len(data)
+				# if len(data) < read_size:
+					# break
+			# # END data loop
+			# sio.seek(0)
+			# return sio.getvalue()
+		# # END handle maxread
+		# 
+		# deplete the buffer, then just continue using the decompress object 
+		# which has an own buffer. We just need this to transparently parse the 
+		# header from the zlib stream
+		dat = str()
+		if self._buf:
+			if self._buflen >= size:
+				# have enough data
+				dat = self._buf.read(size)
+				self._buflen -= size
+				self._br += size
+				return dat
+			else:
+				dat = self._buf.read()		# ouch, duplicates data
+				size -= self._buflen
+				self._br += self._buflen
+				
+				self._buflen = 0
+				self._buf = None
+			# END handle buffer len
+		# END handle buffer
+		
+		# decompress some data
+		# Abstract: zlib needs to operate on chunks of our memory map ( which may 
+		# be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+		# attribute which possible reads our whole map to the end, forcing 
+		# everything to be read from disk even though just a portion was requested.
+		# As this would be a nogo, we workaround it by passing only chunks of data, 
+		# moving the window into the memory map along as we decompress, which keeps 
+		# the tail smaller than our chunk-size. This causes 'only' the chunk to be
+		# copied once, and another copy of a part of it when it creates the unconsumed
+		# tail. We have to use it to hand in the appropriate amount of bytes durin g
+		# the next read.
+		tail = self._zip.unconsumed_tail
+		if tail:
+			# move the window, make it as large as size demands. For code-clarity, 
+			# we just take the chunk from our map again instead of reusing the unconsumed
+			# tail. The latter one would safe some memory copying, but we could end up
+			# with not getting enough data uncompressed, so we had to sort that out as well.
+			# Now we just assume the worst case, hence the data is uncompressed and the window
+			# needs to be as large as the uncompressed bytes we want to read.
+			self._cws = self._cwe - len(tail)
+			self._cwe = self._cws + size
+			
+			
+			indata = self._m[self._cws:self._cwe]		# another copy ... :(
+			# get the actual window end to be sure we don't use it for computations
+			self._cwe = self._cws + len(indata) 
+		else:
+			cws = self._cws
+			self._cws = self._cwe
+			self._cwe = cws + size 
+			indata = self._m[self._cws:self._cwe]		# ... copy it again :(
+		# END handle tail
+			
+		dcompdat = self._zip.decompress(indata, size)
+		
+		self._br += len(dcompdat)
+		if dat:
+			dcompdat = dat + dcompdat
+			
+		return dcompdat
+		
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+	"""Simple stream writer which produces a sha whenever you like as it degests
+	everything it is supposed to write"""
+	
+	def __init__(self):
+		self.sha1 = make_sha("")
+
+	#{ Stream Interface
+
+	def write(self, data):
+		""":raise IOError: If not all bytes could be written
+		:return: lenght of incoming data"""
+		self.sha1.update(data)
+		return len(data)
+
+	# END stream interface 
+
+	#{ Interface
+	
+	def sha(self, as_hex = False):
+		""":return: sha so far
+		:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+		if as_hex:
+			return self.sha1.hexdigest()
+		return self.sha1.digest()
+	
+	#} END interface 
+
+class FDCompressedSha1Writer(Sha1Writer):
+	"""Digests data written to it, making the sha available, then compress the 
+	data and write it to the file descriptor
+	:note: operates on raw file descriptors
+	:note: for this to work, you have to use the close-method of this instance"""
+	__slots__ = ("fd", "sha1", "zip")
+	
+	# default exception
+	exc = IOError("Failed to write all bytes to filedescriptor")
+	
+	def __init__(self, fd):
+		super(FDCompressedSha1Writer, self).__init__()
+		self.fd = fd
+		self.zip = zlib.compressobj(Z_BEST_SPEED)
+
+	#{ Stream Interface
+
+	def write(self, data):
+		""":raise IOError: If not all bytes could be written
+		:return: lenght of incoming data"""
+		self.sha1.update(data)
+		cdata = self.zip.compress(data)
+		bytes_written = write(self.fd, cdata)
+		if bytes_written != len(cdata):
+			raise self.exc
+		return len(data)
+
+	def close(self):
+		remainder = self.zip.flush()
+		if write(self.fd, remainder) != len(remainder):
+			raise self.exc
+		return close(self.fd)
+
+	#} END stream interface
+
+#} END W streams
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
new file mode 100644
index 00000000..6863e97b
--- /dev/null
+++ b/lib/git/odb/utils.py
@@ -0,0 +1,38 @@
+import binascii
+import os
+import errno
+
+#{ Routines
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+def to_hex_sha(sha):
+	""":return: hexified version  of sha"""
+	if len(sha) == 40:
+		return sha
+	return bin_to_hex(sha)
+	
+def to_bin_sha(sha):
+	if len(sha) == 20:
+		return sha
+	return hex_to_bin(sha)
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+isdir = os.path.isdir
+rename = os.rename
+dirname = os.path.dirname
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+
+
+#} END Routines
+
+
diff --git a/lib/git/repo.py b/lib/git/repo.py
index f4caa3fb..78e5f526 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -4,12 +4,6 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 
-import os
-import sys
-import re
-import gzip
-import StringIO
-
 from errors import InvalidGitRepositoryError, NoSuchPathError
 from cmd import Git
 from actor import Actor
@@ -19,6 +13,15 @@ from objects import *
 from config import GitConfigParser
 from remote import Remote
 
+from odb import GitObjectDB
+
+import os
+import sys
+import re
+import gzip
+import StringIO
+
+
 def touch(filename):
     fp = open(filename, "a")
     fp.close()
@@ -53,7 +56,7 @@ class Repo(object):
     'git_dir' is the .git repository directoy, which is always set.
     """
     DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
-    __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" )
+    __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
     
     # precompiled regex
     re_whitespace = re.compile(r'\s+')
@@ -65,27 +68,22 @@ class Repo(object):
     # represents the configuration level of a configuration file
     config_level = ("system", "global", "repository")
 
-    def __init__(self, path=None):
-        """
-        Create a new Repo instance
+    def __init__(self, path=None, odbt = GitObjectDB):
+        """ Create a new Repo instance
 
-        ``path``
-            is the path to either the root git directory or the bare git repo
-
-        Examples::
+		:param path: is the path to either the root git directory or the bare git repo::
 
             repo = Repo("/Users/mtrier/Development/git-python")
             repo = Repo("/Users/mtrier/Development/git-python.git")
             repo = Repo("~/Development/git-python.git")
             repo = Repo("$REPOSITORIES/Development/git-python.git")
-            
-        Raises
-            InvalidGitRepositoryError or NoSuchPathError
-
-        Returns
-            ``git.Repo``
-        """
-
+        
+        :param odbt: Object DataBase type - a type which is constructed by providing 
+        	the directory containing the database objects, i.e. .git/objects. It will
+        	be used to access all object data
+        :raise InvalidGitRepositoryError:
+        :raise NoSuchPathError:
+		:return: git.Repo """
         epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
 
         if not os.path.exists(epath):
@@ -130,6 +128,12 @@ class Repo(object):
         
         self.working_dir = self._working_tree_dir or self.git_dir
         self.git = Git(self.working_dir)
+        
+        # special handling, in special times
+        args = [os.path.join(self.git_dir, 'objects')]
+        if issubclass(odbt, GitObjectDB):
+        	args.append(self.git)
+		self.odb = odbt(*args)
 
     def __eq__(self, rhs):
     	if isinstance(rhs, Repo):
diff --git a/lib/git/utils.py b/lib/git/utils.py
index c21528b1..60a7de48 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -27,6 +27,21 @@ def make_sha(source=''):
         sha1 = sha.sha(source)
         return sha1
 
+def stream_copy(source, destination, chunk_size=512*1024):
+	"""Copy all data from the source stream into the destination stream in chunks
+	of size chunk_size
+	:return: amount of bytes written"""
+	br = 0
+	while True:
+		chunk = source.read(chunk_size)
+		destination.write(chunk)
+		br += len(chunk)
+		if len(chunk) < chunk_size:
+			break
+	# END reading output stream
+	return br
+	
+
 def join_path(a, *p):
     """Join path tokens together similar to os.path.join, but always use 
     '/' instead of possibly '\' on windows."""
@@ -61,12 +76,14 @@ def join_path_native(a, *p):
     return to_native_path(join_path(a, *p))
 
 
-class SHA1Writer(object):
+class IndexFileSHA1Writer(object):
     """
     Wrapper around a file-like object that remembers the SHA1 of 
     the data written to it. It will write a sha when the stream is closed
     or if the asked for explicitly usign write_sha.
     
+    Only useful to the indexfile
+    
     Note:
         Based on the dulwich project
     """
@@ -78,7 +95,7 @@ class SHA1Writer(object):
 
     def write(self, data):
         self.sha1.update(data)
-        self.f.write(data)
+        return self.f.write(data)
 
     def write_sha(self):
         sha = self.sha1.digest()
diff --git a/test/fixtures/rev_list b/test/fixtures/rev_list
index 95a1ebff..1a576118 100644
--- a/test/fixtures/rev_list
+++ b/test/fixtures/rev_list
@@ -1,24 +1,3 @@
-commit 4c8124ffcf4039d292442eeccabdeca5af5c5017
-tree 672eca9b7f9e09c22dcb128c283e8c3c8d7697a4
-parent 634396b2f541a9f2d58b00be1a07f0c358b999b3
-author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-
-    implement Grit#heads
-
-commit 634396b2f541a9f2d58b00be1a07f0c358b999b3
-tree b35b4bf642d667fdd613eebcfe4e17efd420fb8a
-author Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700
-
-    initial grit setup
-
-commit ab25fd8483882c3bda8a458ad2965d2248654335
-tree c20b5ec543bde1e43a931449b196052c06ed8acc
-parent 6e64c55896aabb9a7d8e9f8f296f426d21a78c2c
-parent 7f874954efb9ba35210445be456c74e037ba6af2
-author Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700
-
-    Merge branch 'site'
-    Some other stuff
+4c8124ffcf4039d292442eeccabdeca5af5c5017
+634396b2f541a9f2d58b00be1a07f0c358b999b3
+ab25fd8483882c3bda8a458ad2965d2248654335
diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py
new file mode 100644
index 00000000..7d2a9f4a
--- /dev/null
+++ b/test/git/performance/lib.py
@@ -0,0 +1,65 @@
+"""Contains library functions"""
+import os
+from test.testlib import *
+import shutil
+import tempfile
+
+from git import (
+	Repo
+	)
+
+#{ Invvariants
+k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE"
+#} END invariants
+
+
+#{ Utilities
+def resolve_or_fail(env_var):
+	""":return: resolved environment variable or raise EnvironmentError"""
+	try:
+		return os.environ[env_var]
+	except KeyError:
+		raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var)
+	# END exception handling
+
+#} END utilities
+
+
+#{ Base Classes 
+
+class TestBigRepoR(TestBase):
+	"""TestCase providing access to readonly 'big' repositories using the following 
+	member variables:
+	
+	* gitrepo
+	
+	 * Read-Only git repository - actually the repo of git itself"""
+	 
+	#{ Invariants
+	head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
+	head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
+	#} END invariants 
+	
+	@classmethod
+	def setUpAll(cls):
+		super(TestBigRepoR, cls).setUpAll()
+		cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo))
+
+
+class TestBigRepoRW(TestBigRepoR):
+	"""As above, but provides a big repository that we can write to.
+	
+	Provides ``self.gitrwrepo``"""
+	
+	@classmethod
+	def setUpAll(cls):
+		super(TestBigRepoRW, cls).setUpAll()
+		dirname = tempfile.mktemp()
+		os.mkdir(dirname)
+		cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True)
+	
+	@classmethod
+	def tearDownAll(cls):
+		shutil.rmtree(cls.gitrwrepo.working_dir)
+		
+#} END base classes
diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py
new file mode 100644
index 00000000..0571d0d9
--- /dev/null
+++ b/test/git/performance/test_commit.py
@@ -0,0 +1,98 @@
+# test_performance.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from lib import *
+from git import *
+from test.git.test_commit import assert_commit_serialization
+from cStringIO import StringIO
+from time import time
+import sys
+
+class TestPerformance(TestBigRepoRW):
+
+	# ref with about 100 commits in its history
+	ref_100 = '0.1.6'
+
+	def _query_commit_info(self, c):
+		c.author
+		c.authored_date
+		c.author_tz_offset
+		c.committer
+		c.committed_date
+		c.committer_tz_offset
+		c.message
+		c.parents
+		
+	def test_iteration(self):
+		no = 0
+		nc = 0
+		
+		# find the first commit containing the given path - always do a full 
+		# iteration ( restricted to the path in question ), but in fact it should 
+		# return quite a lot of commits, we just take one and hence abort the operation
+		
+		st = time()
+		for c in self.rorepo.iter_commits(self.ref_100):
+			nc += 1
+			self._query_commit_info(c)
+			for obj in c.tree.traverse():
+				obj.size
+				no += 1
+			# END for each object
+		# END for each commit
+		elapsed_time = time() - st
+		print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time) 
+		
+	def test_commit_traversal(self):
+		# bound to cat-file parsing performance
+		nc = 0
+		st = time()
+		for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False):
+			nc += 1
+			self._query_commit_info(c)
+		# END for each traversed commit
+		elapsed_time = time() - st
+		print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
+		
+	def test_commit_iteration(self):
+		# bound to stream parsing performance
+		nc = 0
+		st = time()
+		for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k):
+			nc += 1
+			self._query_commit_info(c)
+		# END for each traversed commit
+		elapsed_time = time() - st
+		print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
+		
+	def test_commit_serialization(self):
+		assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True)
+		
+		rwrepo = self.gitrwrepo
+		make_object = rwrepo.odb.store
+		# direct serialization - deserialization can be tested afterwards
+		# serialization is probably limited on IO
+		hc = rwrepo.commit(self.head_sha_2k)
+		
+		commits = list()
+		nc = 5000
+		st = time()
+		for i in xrange(nc):
+			cm = Commit(	rwrepo, Commit.NULL_HEX_SHA, hc.tree, 
+							hc.author, hc.authored_date, hc.author_tz_offset, 
+							hc.committer, hc.committed_date, hc.committer_tz_offset, 
+							str(i), parents=hc.parents, encoding=hc.encoding)
+			
+			stream = StringIO()
+			cm._serialize(stream)
+			slen = stream.tell()
+			stream.seek(0)
+			
+			cm.sha = make_object(IStream(Commit.type, slen, stream)).sha
+		# END commit creation
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
new file mode 100644
index 00000000..7b1ee838
--- /dev/null
+++ b/test/git/performance/test_odb.py
@@ -0,0 +1,61 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from lib import (
+	TestBigRepoR
+	)
+
+
+class TestObjDBPerformance(TestBigRepoR):
+	
+	def test_random_access(self):
+		
+		# GET COMMITS
+		# TODO: use the actual db for this
+		st = time()
+		root_commit = self.gitrorepo.commit(self.head_sha_2k)
+		commits = list(root_commit.traverse())
+		nc = len(commits)
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+			
+			
+		# GET TREES
+		# walk all trees of all commits
+		st = time()
+		blobs_per_commit = list()
+		nt = 0
+		for commit in commits:
+			tree = commit.tree
+			blobs = list()
+			for item in tree.traverse():
+				nt += 1
+				if item.type == 'blob':
+					blobs.append(item)
+				# direct access for speed
+			# END while trees are there for walking
+			blobs_per_commit.append(blobs)
+		# END for each commit
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
+		
+		# GET BLOBS
+		st = time()
+		nb = 0
+		too_many = 15000
+		for blob_list in blobs_per_commit:
+			for blob in blob_list:
+				blob.data
+			# END for each blobsha
+			nb += len(blob_list)
+			if nb > too_many:
+				break
+		# END for each bloblist
+		elapsed = time() - st
+		
+		print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py
new file mode 100644
index 00000000..01ec9fc4
--- /dev/null
+++ b/test/git/performance/test_streams.py
@@ -0,0 +1,144 @@
+"""Performance data streaming performance"""
+
+from test.testlib import *
+from git.odb import *
+
+from array import array
+from cStringIO import StringIO
+from time import time
+import os
+import sys
+import stat
+import random
+import subprocess
+
+
+from lib import (
+	TestBigRepoR
+	)
+
+
+
+def make_memory_file(size_in_bytes, randomize=False):
+	""":return: tuple(size_of_stream, stream)
+	:param randomize: try to produce a very random stream"""
+	actual_size = size_in_bytes / 4
+	producer = xrange(actual_size)
+	if randomize:
+		producer = list(producer)
+		random.shuffle(producer)
+	# END randomize
+	a = array('i', producer)
+	return actual_size*4, StringIO(a.tostring())
+
+
+class TestObjDBPerformance(TestBigRepoR):
+	
+	large_data_size_bytes = 1000*1000*10		# some MiB should do it
+	moderate_data_size_bytes = 1000*1000*1		# just 1 MiB
+	
+	@with_bare_rw_repo
+	def test_large_data_streaming(self, rwrepo):
+		ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+		
+		for randomize in range(2):
+			desc = (randomize and 'random ') or ''
+			print >> sys.stderr, "Creating %s data ..." % desc
+			st = time()
+			size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+			elapsed = time() - st
+			print >> sys.stderr, "Done (in %f s)" % elapsed
+			
+			# writing - due to the compression it will seem faster than it is 
+			st = time()
+			sha = ldb.store(IStream('blob', size, stream)).sha
+			elapsed_add = time() - st
+			assert ldb.has_object(sha)
+			db_file = ldb.readable_db_object_path(sha)
+			fsize_kib = os.path.getsize(db_file) / 1000
+			
+			
+			size_kib = size / 1000
+			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+			
+			# reading all at once
+			st = time()
+			ostream = ldb.stream(sha)
+			shadata = ostream.read()
+			elapsed_readall = time() - st
+			
+			stream.seek(0)
+			assert shadata == stream.getvalue()
+			print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+			
+			
+			# reading in chunks of 1 MiB
+			cs = 512*1000
+			chunks = list()
+			st = time()
+			ostream = ldb.stream(sha)
+			while True:
+				data = ostream.read(cs)
+				chunks.append(data)
+				if len(data) < cs:
+					break
+			# END read in chunks
+			elapsed_readchunks = time() - st
+			
+			stream.seek(0)
+			assert ''.join(chunks) == stream.getvalue()
+			
+			cs_kib = cs / 1000
+			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+			
+			# del db file so git has something to do
+			os.remove(db_file)
+			
+			# VS. CGIT 
+			##########
+			# CGIT ! Can using the cgit programs be faster ?
+			proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
+			
+			# write file - pump everything in at once to be a fast as possible
+			data = stream.getvalue()	# cache it
+			st = time()
+			proc.stdin.write(data)
+			proc.stdin.close()
+			gitsha = proc.stdout.read().strip()
+			proc.wait()
+			gelapsed_add = time() - st
+			del(data)
+			assert gitsha == sha		# we do it the same way, right ?
+			
+			#  as its the same sha, we reuse our path
+			fsize_kib = os.path.getsize(db_file) / 1000
+			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
+			
+			# compare ... 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
+			
+			
+			# read all
+			st = time()
+			s, t, size, data = rwrepo.git.get_object_data(gitsha)
+			gelapsed_readall = time() - st
+			print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)
+
+			# compare 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
+			
+			
+			# read chunks
+			st = time()
+			s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
+			while True:
+				data = stream.read(cs)
+				if len(data) < cs:
+					break
+			# END read stream
+			gelapsed_readchunks = time() - st
+			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
+			
+			# compare 
+			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
+		# END for each randomization factor
diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py
new file mode 100644
index 00000000..76adffec
--- /dev/null
+++ b/test/git/performance/test_utils.py
@@ -0,0 +1,59 @@
+"""Performance of utilities"""
+from time import time
+import sys
+import stat
+
+from lib import (
+	TestBigRepoR
+	)
+
+
+class TestUtilPerformance(TestBigRepoR):
+	
+	def test_access(self):
+		# compare dict vs. slot access
+		class Slotty(object):
+			__slots__ = "attr"
+			def __init__(self):
+				self.attr = 1
+				
+		class Dicty(object):
+			def __init__(self):
+				self.attr = 1
+				
+		class BigSlotty(object):
+			__slots__ = ('attr', ) + tuple('abcdefghijk')
+			def __init__(self):
+				for attr in self.__slots__:
+					setattr(self, attr, 1)
+					
+		class BigDicty(object):
+			def __init__(self):
+				for attr in BigSlotty.__slots__:
+					setattr(self, attr, 1)
+		
+		ni = 1000000
+		for cls in (Slotty, Dicty, BigSlotty, BigDicty):
+			cli = cls()
+			st = time()
+			for i in xrange(ni):
+				cli.attr
+			# END for each access
+			elapsed = time() - st
+			print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed)
+		# END for each class type
+		
+		# check num of sequence-acceses
+		for cls in (list, tuple):
+			x = 10
+			st = time()
+			s = cls(range(x))
+			for i in xrange(ni):
+				s[0]
+				s[1]
+				s[2]
+			# END for
+			elapsed = time() - st
+			na = ni * 3
+			print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed)
+		# END for each sequence 
diff --git a/test/git/test_commit.py b/test/git/test_commit.py
index 48937c93..e65e2e59 100644
--- a/test/git/test_commit.py
+++ b/test/git/test_commit.py
@@ -6,172 +6,229 @@
 
 from test.testlib import *
 from git import *
+from git.odb import IStream
+
+from cStringIO import StringIO
+import time
+import sys
 
-class TestCommit(TestBase):
 
-    def test_bake(self):
+def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False):
+	"""traverse all commits in the history of commit identified by commit_id and check 
+	if the serialization works.
+	:param print_performance_info: if True, we will show how fast we are"""
+	ns = 0		# num serializations
+	nds = 0		# num deserializations
+	
+	st = time.time()
+	for cm in rwrepo.commit(commit_id).traverse():
+		nds += 1
+		
+		# assert that we deserialize commits correctly, hence we get the same 
+		# sha on serialization
+		stream = StringIO()
+		cm._serialize(stream)
+		ns += 1
+		streamlen = stream.tell()
+		stream.seek(0)
+		
+		istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream))
+		assert istream.sha == cm.sha
+		
+		nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha,
+						cm.author, cm.authored_date, cm.author_tz_offset, 
+						cm.committer, cm.committed_date, cm.committer_tz_offset, 
+						cm.message, cm.parents, cm.encoding)
+		
+		assert nc.parents == cm.parents
+		stream = StringIO()
+		nc._serialize(stream)
+		ns += 1
+		streamlen = stream.tell()
+		stream.seek(0)
+		
+		# reuse istream
+		istream.size = streamlen
+		istream.stream = stream
+		istream.sha = None
+		nc.sha = rwrepo.odb.store(istream).sha
+		
+		# if it worked, we have exactly the same contents !
+		assert nc.sha == cm.sha
+	# END check commits
+	elapsed = time.time() - st
+	
+	if print_performance_info:
+		print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed)
+	# END handle performance info
+	
 
-        commit = Commit(self.rorepo, **{'sha': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'})
-        commit.author # bake
+class TestCommit(TestBase):
 
-        assert_equal("Sebastian Thiel", commit.author.name)
-        assert_equal("byronimo@gmail.com", commit.author.email)
-        assert commit.author == commit.committer
-        assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int)
-        assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int)
-        assert commit.message == "Added missing information to docstrings of commit and stats module"
+	def test_bake(self):
 
+		commit = Commit(self.rorepo, '2454ae89983a4496a445ce347d7a41c0bb0ea7ae')
+		commit.author # bake
 
-    def test_stats(self):
-        commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781')
-        stats = commit.stats
-        
-        def check_entries(d):
-            assert isinstance(d, dict)
-            for key in ("insertions", "deletions", "lines"):
-                assert key in d
-        # END assertion helper 
-        assert stats.files 
-        assert stats.total
-        
-        check_entries(stats.total) 
-        assert "files" in stats.total
-        
-        for filepath, d in stats.files.items():
-            check_entries(d)
-        # END for each stated file
-        
-        # assure data is parsed properly
-        michael = Actor._from_string("Michael Trier <mtrier@gmail.com>")
-        assert commit.author == michael
-        assert commit.committer == michael
-        assert commit.authored_date == 1210193388
-        assert commit.committed_date == 1210193388
-        assert commit.author_tz_offset == 14400, commit.author_tz_offset
-        assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
-        assert commit.message == "initial project"
-        
-    def test_traversal(self):
-        start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
-        first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
-        p0 = start.parents[0]
-        p1 = start.parents[1]
-        p00 = p0.parents[0]
-        p10 = p1.parents[0]
-        
-        # basic branch first, depth first
-        dfirst = start.traverse(branch_first=False)
-        bfirst = start.traverse(branch_first=True)
-        assert dfirst.next() == p0
-        assert dfirst.next() == p00
-        
-        assert bfirst.next() == p0
-        assert bfirst.next() == p1
-        assert bfirst.next() == p00
-        assert bfirst.next() == p10
-        
-        # at some point, both iterations should stop
-        assert list(bfirst)[-1] == first
-        stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True)
-        l = list(stoptraverse)
-        assert len(l[0]) == 2
-        
-        # ignore self
-        assert start.traverse(ignore_self=False).next() == start
-        
-        # depth 
-        assert len(list(start.traverse(ignore_self=False, depth=0))) == 1
-        
-        # prune
-        assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1
-        
-        # predicate
-        assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1
-        
-        # traversal should stop when the beginning is reached
-        self.failUnlessRaises(StopIteration, first.traverse().next)
-        
-        # parents of the first commit should be empty ( as the only parent has a null 
-        # sha )
-        assert len(first.parents) == 0
-        
-    def test_iteration(self):
-        # we can iterate commits
-        all_commits = Commit.list_items(self.rorepo, self.rorepo.head)
-        assert all_commits
-        assert all_commits == list(self.rorepo.iter_commits())
-        
-        # this includes merge commits
-        mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d')
-        assert mcomit in all_commits
-        
-        # we can limit the result to paths
-        ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES'))
-        assert ltd_commits and len(ltd_commits) < len(all_commits)
-        
-        # show commits of multiple paths, resulting in a union of commits
-        less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS')))
-        assert len(ltd_commits) < len(less_ltd_commits)
-        
-        
-    @patch_object(Git, '_call_process')
-    def test_rev_list_bisect_all(self, git):
-        """
-        'git rev-list --bisect-all' returns additional information
-        in the commit header.  This test ensures that we properly parse it.
-        """
+		assert_equal("Sebastian Thiel", commit.author.name)
+		assert_equal("byronimo@gmail.com", commit.author.email)
+		assert commit.author == commit.committer
+		assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int)
+		assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int)
+		assert commit.message == "Added missing information to docstrings of commit and stats module\n"
 
-        git.return_value = fixture('rev_list_bisect_all')
 
-        revs = self.rorepo.git.rev_list('HEAD',
-                                      pretty='raw',
-                                      first_parent=True,
-                                      bisect_all=True)
-        assert_true(git.called)
+	def test_stats(self):
+		commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781')
+		stats = commit.stats
+		
+		def check_entries(d):
+			assert isinstance(d, dict)
+			for key in ("insertions", "deletions", "lines"):
+				assert key in d
+		# END assertion helper 
+		assert stats.files 
+		assert stats.total
+		
+		check_entries(stats.total) 
+		assert "files" in stats.total
+		
+		for filepath, d in stats.files.items():
+			check_entries(d)
+		# END for each stated file
+		
+		# assure data is parsed properly
+		michael = Actor._from_string("Michael Trier <mtrier@gmail.com>")
+		assert commit.author == michael
+		assert commit.committer == michael
+		assert commit.authored_date == 1210193388
+		assert commit.committed_date == 1210193388
+		assert commit.author_tz_offset == 14400, commit.author_tz_offset
+		assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
+		assert commit.message == "initial project\n"
+		
+	def test_traversal(self):
+		start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
+		first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
+		p0 = start.parents[0]
+		p1 = start.parents[1]
+		p00 = p0.parents[0]
+		p10 = p1.parents[0]
+		
+		# basic branch first, depth first
+		dfirst = start.traverse(branch_first=False)
+		bfirst = start.traverse(branch_first=True)
+		assert dfirst.next() == p0
+		assert dfirst.next() == p00
+		
+		assert bfirst.next() == p0
+		assert bfirst.next() == p1
+		assert bfirst.next() == p00
+		assert bfirst.next() == p10
+		
+		# at some point, both iterations should stop
+		assert list(bfirst)[-1] == first
+		stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True)
+		l = list(stoptraverse)
+		assert len(l[0]) == 2
+		
+		# ignore self
+		assert start.traverse(ignore_self=False).next() == start
+		
+		# depth 
+		assert len(list(start.traverse(ignore_self=False, depth=0))) == 1
+		
+		# prune
+		assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1
+		
+		# predicate
+		assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1
+		
+		# traversal should stop when the beginning is reached
+		self.failUnlessRaises(StopIteration, first.traverse().next)
+		
+		# parents of the first commit should be empty ( as the only parent has a null 
+		# sha )
+		assert len(first.parents) == 0
+		
+	def test_iteration(self):
+		# we can iterate commits
+		all_commits = Commit.list_items(self.rorepo, self.rorepo.head)
+		assert all_commits
+		assert all_commits == list(self.rorepo.iter_commits())
+		
+		# this includes merge commits
+		mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d')
+		assert mcomit in all_commits
+		
+		# we can limit the result to paths
+		ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES'))
+		assert ltd_commits and len(ltd_commits) < len(all_commits)
+		
+		# show commits of multiple paths, resulting in a union of commits
+		less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS')))
+		assert len(ltd_commits) < len(less_ltd_commits)
+		
+	def test_iter_items(self):
+		# pretty not allowed
+		self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw")
+		
+	def test_rev_list_bisect_all(self):
+		"""
+		'git rev-list --bisect-all' returns additional information
+		in the commit header.  This test ensures that we properly parse it.
+		"""
+		revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474',
+									  first_parent=True,
+									  bisect_all=True)
 
-        commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True)
-        expected_ids = (
-            'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b',
-            '33ebe7acec14b25c5f84f35a664803fcab2f7781',
-            'a6604a00a652e754cb8b6b0b9f194f839fc38d7c',
-            '8df638c22c75ddc9a43ecdde90c0c9939f5009e7',
-            'c231551328faa864848bde6ff8127f59c9566e90',
-        )
-        for sha1, commit in zip(expected_ids, commits):
-            assert_equal(sha1, commit.sha)
+		commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs))
+		expected_ids = (
+			'7156cece3c49544abb6bf7a0c218eb36646fad6d',
+			'1f66cfbbce58b4b552b041707a12d437cc5f400a',
+			'33ebe7acec14b25c5f84f35a664803fcab2f7781',
+			'933d23bf95a5bd1624fbcdf328d904e1fa173474'
+		)
+		for sha1, commit in zip(expected_ids, commits):
+			assert_equal(sha1, commit.sha)
 
-    def test_count(self):
-        assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143
-        
-    def test_list(self):
-        assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit)
+	def test_count(self):
+		assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143
+		
+	def test_list(self):
+		assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit)
 
-    def test_str(self):
-        commit = Commit(self.rorepo, 'abc')
-        assert_equal ("abc", str(commit))
+	def test_str(self):
+		commit = Commit(self.rorepo, 'abc')
+		assert_equal ("abc", str(commit))
 
-    def test_repr(self):
-        commit = Commit(self.rorepo, 'abc')
-        assert_equal('<git.Commit "abc">', repr(commit))
+	def test_repr(self):
+		commit = Commit(self.rorepo, 'abc')
+		assert_equal('<git.Commit "abc">', repr(commit))
 
-    def test_equality(self):
-        commit1 = Commit(self.rorepo, 'abc')
-        commit2 = Commit(self.rorepo, 'abc')
-        commit3 = Commit(self.rorepo, 'zyx')
-        assert_equal(commit1, commit2)
-        assert_not_equal(commit2, commit3)
-        
-    def test_iter_parents(self):
-        # should return all but ourselves, even if skip is defined
-        c = self.rorepo.commit('0.1.5')
-        for skip in (0, 1):
-            piter = c.iter_parents(skip=skip)
-            first_parent = piter.next()
-            assert first_parent != c
-            assert first_parent == c.parents[0]
-        # END for each 
-        
-    def test_base(self):
-        name_rev = self.rorepo.head.commit.name_rev
-        assert isinstance(name_rev, basestring)
-        
+	def test_equality(self):
+		commit1 = Commit(self.rorepo, 'abc')
+		commit2 = Commit(self.rorepo, 'abc')
+		commit3 = Commit(self.rorepo, 'zyx')
+		assert_equal(commit1, commit2)
+		assert_not_equal(commit2, commit3)
+		
+	def test_iter_parents(self):
+		# should return all but ourselves, even if skip is defined
+		c = self.rorepo.commit('0.1.5')
+		for skip in (0, 1):
+			piter = c.iter_parents(skip=skip)
+			first_parent = piter.next()
+			assert first_parent != c
+			assert first_parent == c.parents[0]
+		# END for each 
+		
+	def test_base(self):
+		name_rev = self.rorepo.head.commit.name_rev
+		assert isinstance(name_rev, basestring)
+		
+	@with_bare_rw_repo
+	def test_serialization(self, rwrepo):
+		# create all commits of our repo
+		assert_commit_serialization(rwrepo, '0.1.6')
+		
diff --git a/test/git/test_diff.py b/test/git/test_diff.py
index 2f6a19bd..a113b992 100644
--- a/test/git/test_diff.py
+++ b/test/git/test_diff.py
@@ -20,7 +20,7 @@ class TestDiff(TestBase):
         return diffs
     
     def test_list_from_string_new_mode(self):
-        output = ListProcessAdapter(fixture('diff_new_mode'))
+        output = StringProcessAdapter(fixture('diff_new_mode'))
         diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
         self._assert_diff_format(diffs)
         
@@ -28,7 +28,7 @@ class TestDiff(TestBase):
         assert_equal(10, len(diffs[0].diff.splitlines()))
 
     def test_diff_with_rename(self):
-        output = ListProcessAdapter(fixture('diff_rename'))
+        output = StringProcessAdapter(fixture('diff_rename'))
         diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
         self._assert_diff_format(diffs)
         
@@ -47,7 +47,7 @@ class TestDiff(TestBase):
                     "diff_tree_numstat_root" )
         
         for fixture_name in fixtures:
-            diff_proc = ListProcessAdapter(fixture(fixture_name))
+            diff_proc = StringProcessAdapter(fixture(fixture_name))
             diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
         # END for each fixture
 
diff --git a/test/git/test_odb.py b/test/git/test_odb.py
new file mode 100644
index 00000000..c3a03714
--- /dev/null
+++ b/test/git/test_odb.py
@@ -0,0 +1,66 @@
+"""Test for object db"""
+
+from test.testlib import *
+from git.odb import *
+from git.odb.stream import Sha1Writer
+from git import Blob
+from git.errors import BadObject
+
+from cStringIO import StringIO
+import os
+
+
+class TestDB(TestBase):
+	"""Test the different db class implementations"""
+	
+	# data
+	two_lines = "1234\nhello world"
+	
+	all_data = (two_lines, )
+	
+	def _assert_object_writing(self, db):
+		"""General tests to verify object writing, compatible to ObjectDBW
+		:note: requires write access to the database"""
+		# start in 'dry-run' mode, using a simple sha1 writer
+		ostreams = (Sha1Writer, None)
+		for ostreamcls in ostreams:
+			for data in self.all_data:
+				dry_run = ostreamcls is not None
+				ostream = None
+				if ostreamcls is not None:
+					ostream = ostreamcls()
+				# END create ostream
+				
+				prev_ostream = db.set_ostream(ostream)
+				assert type(prev_ostream) in ostreams or prev_ostream in ostreams 
+					
+				istream = IStream(Blob.type, len(data), StringIO(data))
+				my_istream = db.store(istream)
+				sha = istream.sha
+				assert my_istream is istream
+				assert db.has_object(sha) != dry_run
+				assert len(sha) == 40		# for now we require 40 byte shas as default
+				
+				# verify data - the slow way, we want to run code
+				if not dry_run:
+					info = db.info(sha)
+					assert Blob.type == info.type
+					assert info.size == len(data)
+					
+					ostream = db.stream(sha)
+					assert ostream.read() == data
+					assert ostream.type == Blob.type
+					assert ostream.size == len(data)
+				else:
+					self.failUnlessRaises(BadObject, db.info, sha)
+					self.failUnlessRaises(BadObject, db.stream, sha)
+			# END for each data set
+		# END for each dry_run mode
+				
+	@with_bare_rw_repo
+	def test_writing(self, rwrepo):
+		ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+		
+		# write data
+		self._assert_object_writing(ldb)
+	
diff --git a/test/git/test_performance.py b/test/git/test_performance.py
deleted file mode 100644
index 72acfcac..00000000
--- a/test/git/test_performance.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# test_performance.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from test.testlib import *
-from git import *
-from time import time
-
-class TestPerformance(TestBase):
-
-    def _query_commit_info(self, c):
-        c.author
-        c.authored_date
-        c.author_tz_offset
-        c.committer
-        c.committed_date
-        c.committer_tz_offset
-        c.message
-        c.parents
-        
-    def test_iteration(self):
-        num_objs = 0
-        num_commits = 0
-        
-        # find the first commit containing the given path - always do a full 
-        # iteration ( restricted to the path in question ), but in fact it should 
-        # return quite a lot of commits, we just take one and hence abort the operation
-        
-        st = time()
-        for c in self.rorepo.iter_commits('0.1.6'):
-            num_commits += 1
-            self._query_commit_info(c)
-            for obj in c.tree.traverse():
-                obj.size
-                num_objs += 1
-            # END for each object
-        # END for each commit
-        elapsed_time = time() - st
-        print "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (num_commits, num_objs, elapsed_time, num_objs/elapsed_time) 
-        
-    def test_commit_traversal(self):
-        num_commits = 0
-        
-        st = time()
-        for c in self.rorepo.commit('0.1.6').traverse(branch_first=False):
-            num_commits += 1
-            self._query_commit_info(c)
-        # END for each traversed commit
-        elapsed_time = time() - st
-        print "Traversed %i Commits in %s [s] ( %f commits/s )" % (num_commits, elapsed_time, num_commits/elapsed_time)
diff --git a/test/git/test_repo.py b/test/git/test_repo.py
index ce79402a..d2c7c742 100644
--- a/test/git/test_repo.py
+++ b/test/git/test_repo.py
@@ -10,325 +10,396 @@ from git import *
 from git.utils import join_path_native
 import tempfile
 import shutil
+from cStringIO import StringIO
 
 class TestRepo(TestBase):
-    
-    @raises(InvalidGitRepositoryError)
-    def test_new_should_raise_on_invalid_repo_location(self):
-        Repo(tempfile.gettempdir())
-
-    @raises(NoSuchPathError)
-    def test_new_should_raise_on_non_existant_path(self):
-        Repo("repos/foobar")
-
-    def test_repo_creation_from_different_paths(self):
-        r_from_gitdir = Repo(self.rorepo.git_dir)
-        assert r_from_gitdir.git_dir == self.rorepo.git_dir
-        assert r_from_gitdir.git_dir.endswith('.git')
-        assert not self.rorepo.git.working_dir.endswith('.git')
-        assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir
-
-    def test_description(self):
-        txt = "Test repository"
-        self.rorepo.description = txt
-        assert_equal(self.rorepo.description, txt)
-
-    def test_heads_should_return_array_of_head_objects(self):
-        for head in self.rorepo.heads:
-            assert_equal(Head, head.__class__)
-
-    def test_heads_should_populate_head_data(self):
-        for head in self.rorepo.heads:
-            assert head.name
-            assert isinstance(head.commit,Commit)
-        # END for each head 
-        
-        assert isinstance(self.rorepo.heads.master, Head)
-        assert isinstance(self.rorepo.heads['master'], Head)
-        
-    def test_tree_from_revision(self):
-        tree = self.rorepo.tree('0.1.6')
-        assert tree.type == "tree"
-        assert self.rorepo.tree(tree) == tree
-        
-        # try from invalid revision that does not exist
-        self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world')
-
-    @patch_object(Git, '_call_process')
-    def test_commits(self, git):
-        git.return_value = ListProcessAdapter(fixture('rev_list'))
-
-        commits = list( self.rorepo.iter_commits('master', max_count=10) )
-        
-        c = commits[0]
-        assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha)
-        assert_equal(["634396b2f541a9f2d58b00be1a07f0c358b999b3"], [p.sha for p in c.parents])
-        assert_equal("672eca9b7f9e09c22dcb128c283e8c3c8d7697a4", c.tree.sha)
-        assert_equal("Tom Preston-Werner", c.author.name)
-        assert_equal("tom@mojombo.com", c.author.email)
-        assert_equal(1191999972, c.authored_date)
-        assert_equal("Tom Preston-Werner", c.committer.name)
-        assert_equal("tom@mojombo.com", c.committer.email)
-        assert_equal(1191999972, c.committed_date)
-        assert_equal("implement Grit#heads", c.message)
-
-        c = commits[1]
-        assert_equal(tuple(), c.parents)
-
-        c = commits[2]
-        assert_equal(["6e64c55896aabb9a7d8e9f8f296f426d21a78c2c", "7f874954efb9ba35210445be456c74e037ba6af2"], map(lambda p: p.sha, c.parents))
-        assert_equal("Merge branch 'site'", c.summary)
-
-        assert_true(git.called)
-
-    def test_trees(self):
-        mc = 30
-        num_trees = 0
-        for tree in self.rorepo.iter_trees('0.1.5', max_count=mc):
-            num_trees += 1
-            assert isinstance(tree, Tree)
-        # END for each tree
-        assert num_trees == mc
-
-
-    def _test_empty_repo(self, repo):
-        # test all kinds of things with an empty, freshly initialized repo. 
-        # It should throw good errors
-        
-        # entries should be empty
-        assert len(repo.index.entries) == 0
-        
-        # head is accessible
-        assert repo.head
-        assert repo.head.ref
-        assert not repo.head.is_valid()
-        
-        # we can change the head to some other ref
-        head_ref = Head.from_path(repo, Head.to_full_path('some_head'))
-        assert not head_ref.is_valid()
-        repo.head.ref = head_ref
-        
-        # is_dirty can handle all kwargs
-        for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)):
-            assert not repo.is_dirty(*args)
-        # END for each arg 
-        
-        # we can add a file to the index ( if we are not bare )
-        if not repo.bare:
-            pass
-        # END test repos with working tree
-        
-
-    def test_init(self):
-        prev_cwd = os.getcwd()
-        os.chdir(tempfile.gettempdir())
-        git_dir_rela = "repos/foo/bar.git"
-        del_dir_abs = os.path.abspath("repos")
-        git_dir_abs = os.path.abspath(git_dir_rela)
-        try:
-            # with specific path
-            for path in (git_dir_rela, git_dir_abs):
-                r = Repo.init(path=path, bare=True)
-                assert isinstance(r, Repo)
-                assert r.bare == True
-                assert os.path.isdir(r.git_dir)
-                
-                self._test_empty_repo(r)
-                shutil.rmtree(git_dir_abs)
-            # END for each path
-            
-            os.makedirs(git_dir_rela)
-            os.chdir(git_dir_rela)
-            r = Repo.init(bare=False)
-            r.bare == False
-            
-            self._test_empty_repo(r)
-        finally:
-            try:
-                shutil.rmtree(del_dir_abs)
-            except OSError:
-                pass
-            os.chdir(prev_cwd)
-        # END restore previous state
-        
-    def test_bare_property(self):
-        self.rorepo.bare
-
-    @patch_object(Repo, '__init__')
-    @patch_object(Git, '_call_process')
-    def test_init_with_options(self, git, repo):
-        git.return_value = True
-        repo.return_value = None
-
-        r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"})
-        assert isinstance(r, Repo)
-
-        assert_true(git.called)
-        assert_true(repo.called)
-
-    @patch_object(Repo, '__init__')
-    @patch_object(Git, '_call_process')
-    def test_clone(self, git, repo):
-        git.return_value = None
-        repo.return_value = None
-
-        self.rorepo.clone("repos/foo/bar.git")
-
-        assert_true(git.called)
-        path = os.path.join(absolute_project_path(), '.git')
-        assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {}))
-        assert_true(repo.called)
-
-    @patch_object(Repo, '__init__')
-    @patch_object(Git, '_call_process')
-    def test_clone_with_options(self, git, repo):
-        git.return_value = None
-        repo.return_value = None
-
-        self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'})
-
-        assert_true(git.called)
-        path = os.path.join(absolute_project_path(), '.git')
-        assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'),
-                                      { 'template': '/awesome'}))
-        assert_true(repo.called)
-
-
-    def test_daemon_export(self):
-        orig_val = self.rorepo.daemon_export
-        self.rorepo.daemon_export = not orig_val
-        assert self.rorepo.daemon_export == ( not orig_val )
-        self.rorepo.daemon_export = orig_val
-        assert self.rorepo.daemon_export == orig_val
+	
+	@raises(InvalidGitRepositoryError)
+	def test_new_should_raise_on_invalid_repo_location(self):
+		Repo(tempfile.gettempdir())
+
+	@raises(NoSuchPathError)
+	def test_new_should_raise_on_non_existant_path(self):
+		Repo("repos/foobar")
+
+	def test_repo_creation_from_different_paths(self):
+		r_from_gitdir = Repo(self.rorepo.git_dir)
+		assert r_from_gitdir.git_dir == self.rorepo.git_dir
+		assert r_from_gitdir.git_dir.endswith('.git')
+		assert not self.rorepo.git.working_dir.endswith('.git')
+		assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir
+
+	def test_description(self):
+		txt = "Test repository"
+		self.rorepo.description = txt
+		assert_equal(self.rorepo.description, txt)
+
+	def test_heads_should_return_array_of_head_objects(self):
+		for head in self.rorepo.heads:
+			assert_equal(Head, head.__class__)
+
+	def test_heads_should_populate_head_data(self):
+		for head in self.rorepo.heads:
+			assert head.name
+			assert isinstance(head.commit,Commit)
+		# END for each head 
+		
+		assert isinstance(self.rorepo.heads.master, Head)
+		assert isinstance(self.rorepo.heads['master'], Head)
+		
+	def test_tree_from_revision(self):
+		tree = self.rorepo.tree('0.1.6')
+		assert len(tree.sha) == 40 
+		assert tree.type == "tree"
+		assert self.rorepo.tree(tree) == tree
+		
+		# try from invalid revision that does not exist
+		self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world')
+
+	def test_commits(self):
+		mc = 10
+		commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc))
+		assert len(commits) == mc
+		
+		c = commits[0]
+		assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha)
+		assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents])
+		assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha)
+		assert_equal("Michael Trier", c.author.name)
+		assert_equal("mtrier@gmail.com", c.author.email)
+		assert_equal(1232829715, c.authored_date)
+		assert_equal(5*3600, c.author_tz_offset)
+		assert_equal("Michael Trier", c.committer.name)
+		assert_equal("mtrier@gmail.com", c.committer.email)
+		assert_equal(1232829715, c.committed_date)
+		assert_equal(5*3600, c.committer_tz_offset)
+		assert_equal("Bumped version 0.1.6\n", c.message)
+
+		c = commits[1]
+		assert isinstance(c.parents, tuple)
+
+	def test_trees(self):
+		mc = 30
+		num_trees = 0
+		for tree in self.rorepo.iter_trees('0.1.5', max_count=mc):
+			num_trees += 1
+			assert isinstance(tree, Tree)
+		# END for each tree
+		assert num_trees == mc
+
+
+	def _test_empty_repo(self, repo):
+		# test all kinds of things with an empty, freshly initialized repo. 
+		# It should throw good errors
+		
+		# entries should be empty
+		assert len(repo.index.entries) == 0
+		
+		# head is accessible
+		assert repo.head
+		assert repo.head.ref
+		assert not repo.head.is_valid()
+		
+		# we can change the head to some other ref
+		head_ref = Head.from_path(repo, Head.to_full_path('some_head'))
+		assert not head_ref.is_valid()
+		repo.head.ref = head_ref
+		
+		# is_dirty can handle all kwargs
+		for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)):
+			assert not repo.is_dirty(*args)
+		# END for each arg 
+		
+		# we can add a file to the index ( if we are not bare )
+		if not repo.bare:
+			pass
+		# END test repos with working tree
+		
+
+	def test_init(self):
+		prev_cwd = os.getcwd()
+		os.chdir(tempfile.gettempdir())
+		git_dir_rela = "repos/foo/bar.git"
+		del_dir_abs = os.path.abspath("repos")
+		git_dir_abs = os.path.abspath(git_dir_rela)
+		try:
+			# with specific path
+			for path in (git_dir_rela, git_dir_abs):
+				r = Repo.init(path=path, bare=True)
+				assert isinstance(r, Repo)
+				assert r.bare == True
+				assert os.path.isdir(r.git_dir)
+				
+				self._test_empty_repo(r)
+				shutil.rmtree(git_dir_abs)
+			# END for each path
+			
+			os.makedirs(git_dir_rela)
+			os.chdir(git_dir_rela)
+			r = Repo.init(bare=False)
+			r.bare == False
+			
+			self._test_empty_repo(r)
+		finally:
+			try:
+				shutil.rmtree(del_dir_abs)
+			except OSError:
+				pass
+			os.chdir(prev_cwd)
+		# END restore previous state
+		
+	def test_bare_property(self):
+		self.rorepo.bare
+
+	@patch_object(Repo, '__init__')
+	@patch_object(Git, '_call_process')
+	def test_init_with_options(self, git, repo):
+		git.return_value = True
+		repo.return_value = None
+
+		r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"})
+		assert isinstance(r, Repo)
+
+		assert_true(git.called)
+		assert_true(repo.called)
+
+	@patch_object(Repo, '__init__')
+	@patch_object(Git, '_call_process')
+	def test_clone(self, git, repo):
+		git.return_value = None
+		repo.return_value = None
+
+		self.rorepo.clone("repos/foo/bar.git")
+
+		assert_true(git.called)
+		path = os.path.join(absolute_project_path(), '.git')
+		assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {}))
+		assert_true(repo.called)
+
+	@patch_object(Repo, '__init__')
+	@patch_object(Git, '_call_process')
+	def test_clone_with_options(self, git, repo):
+		git.return_value = None
+		repo.return_value = None
+
+		self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'})
+
+		assert_true(git.called)
+		path = os.path.join(absolute_project_path(), '.git')
+		assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'),
+									  { 'template': '/awesome'}))
+		assert_true(repo.called)
+
+
+	def test_daemon_export(self):
+		orig_val = self.rorepo.daemon_export
+		self.rorepo.daemon_export = not orig_val
+		assert self.rorepo.daemon_export == ( not orig_val )
+		self.rorepo.daemon_export = orig_val
+		assert self.rorepo.daemon_export == orig_val
   
-    def test_alternates(self):
-        cur_alternates = self.rorepo.alternates
-        # empty alternates
-        self.rorepo.alternates = []
-        assert self.rorepo.alternates == []
-        alts = [ "other/location", "this/location" ]
-        self.rorepo.alternates = alts
-        assert alts == self.rorepo.alternates
-        self.rorepo.alternates = cur_alternates
-
-    def test_repr(self):
-        path = os.path.join(os.path.abspath(GIT_REPO), '.git')
-        assert_equal('<git.Repo "%s">' % path, repr(self.rorepo))
-
-    def test_is_dirty_with_bare_repository(self):
-        self.rorepo._bare = True
-        assert_false(self.rorepo.is_dirty())
-
-    def test_is_dirty(self):
-        self.rorepo._bare = False
-        for index in (0,1):
-            for working_tree in (0,1):
-                for untracked_files in (0,1):
-                    assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False)
-                # END untracked files
-            # END working tree
-        # END index
-        self.rorepo._bare = True
-        assert self.rorepo.is_dirty() == False
-
-    def test_head(self):
-        assert self.rorepo.head.reference.object == self.rorepo.active_branch.object
-
-    def test_index(self):
-        index = self.rorepo.index
-        assert isinstance(index, IndexFile)
-    
-    def test_tag(self):
-        assert self.rorepo.tag('refs/tags/0.1.5').commit
-        
-    def test_archive(self):
-        tmpfile = os.tmpfile()
-        self.rorepo.archive(tmpfile, '0.1.5')
-        assert tmpfile.tell()
-        
-    @patch_object(Git, '_call_process')
-    def test_should_display_blame_information(self, git):
-        git.return_value = fixture('blame')
-        b = self.rorepo.blame( 'master', 'lib/git.py')
-        assert_equal(13, len(b))
-        assert_equal( 2, len(b[0]) )
-        # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
-        assert_equal(hash(b[0][0]), hash(b[9][0]))
-        c = b[0][0]
-        assert_true(git.called)
-        assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
-        
-        assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha)
-        assert_equal('Tom Preston-Werner', c.author.name)
-        assert_equal('tom@mojombo.com', c.author.email)
-        assert_equal(1191997100, c.authored_date)
-        assert_equal('Tom Preston-Werner', c.committer.name)
-        assert_equal('tom@mojombo.com', c.committer.email)
-        assert_equal(1191997100, c.committed_date)
-        assert_equal('initial grit setup', c.message)
-        
-        # test the 'lines per commit' entries
-        tlist = b[0][1]
-        assert_true( tlist )
-        assert_true( isinstance( tlist[0], basestring ) )
-        assert_true( len( tlist ) < sum( len(t) for t in tlist ) )               # test for single-char bug
-        
-    def test_untracked_files(self):
-        base = self.rorepo.working_tree_dir
-        files = (   join_path_native(base, "__test_myfile"), 
-                    join_path_native(base, "__test_other_file") )
-        num_recently_untracked = 0
-        try:
-            for fpath in files:
-                fd = open(fpath,"wb")
-                fd.close()
-            # END for each filename
-            untracked_files = self.rorepo.untracked_files
-            num_recently_untracked = len(untracked_files)
-            
-            # assure we have all names - they are relative to the git-dir
-            num_test_untracked = 0
-            for utfile in untracked_files:
-                num_test_untracked += join_path_native(base, utfile) in files
-            assert len(files) == num_test_untracked
-        finally:
-            for fpath in files:
-                if os.path.isfile(fpath):
-                    os.remove(fpath)
-        # END handle files 
-        
-        assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files))
-        
-    def test_config_reader(self):
-        reader = self.rorepo.config_reader()                # all config files 
-        assert reader.read_only
-        reader = self.rorepo.config_reader("repository")    # single config file
-        assert reader.read_only
-        
-    def test_config_writer(self):
-        for config_level in self.rorepo.config_level:
-            try:
-                writer = self.rorepo.config_writer(config_level)
-                assert not writer.read_only
-            except IOError:
-                # its okay not to get a writer for some configuration files if we 
-                # have no permissions
-                pass 
-        # END for each config level 
-        
-    def test_creation_deletion(self):
-        # just a very quick test to assure it generally works. There are 
-        # specialized cases in the test_refs module
-        head = self.rorepo.create_head("new_head", "HEAD~1")
-        self.rorepo.delete_head(head)
-        
-        tag = self.rorepo.create_tag("new_tag", "HEAD~2")
-        self.rorepo.delete_tag(tag)
-        
-        remote = self.rorepo.create_remote("new_remote", "git@server:repo.git")
-        self.rorepo.delete_remote(remote)
-        
-    def test_comparison_and_hash(self):
-        # this is only a preliminary test, more testing done in test_index
-        assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo)
-        assert len(set((self.rorepo, self.rorepo))) == 1
+	def test_alternates(self):
+		cur_alternates = self.rorepo.alternates
+		# empty alternates
+		self.rorepo.alternates = []
+		assert self.rorepo.alternates == []
+		alts = [ "other/location", "this/location" ]
+		self.rorepo.alternates = alts
+		assert alts == self.rorepo.alternates
+		self.rorepo.alternates = cur_alternates
+
+	def test_repr(self):
+		path = os.path.join(os.path.abspath(GIT_REPO), '.git')
+		assert_equal('<git.Repo "%s">' % path, repr(self.rorepo))
+
+	def test_is_dirty_with_bare_repository(self):
+		self.rorepo._bare = True
+		assert_false(self.rorepo.is_dirty())
+
+	def test_is_dirty(self):
+		self.rorepo._bare = False
+		for index in (0,1):
+			for working_tree in (0,1):
+				for untracked_files in (0,1):
+					assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False)
+				# END untracked files
+			# END working tree
+		# END index
+		self.rorepo._bare = True
+		assert self.rorepo.is_dirty() == False
+
+	def test_head(self):
+		assert self.rorepo.head.reference.object == self.rorepo.active_branch.object
+
+	def test_index(self):
+		index = self.rorepo.index
+		assert isinstance(index, IndexFile)
+	
+	def test_tag(self):
+		assert self.rorepo.tag('refs/tags/0.1.5').commit
+		
+	def test_archive(self):
+		tmpfile = os.tmpfile()
+		self.rorepo.archive(tmpfile, '0.1.5')
+		assert tmpfile.tell()
+		
+	@patch_object(Git, '_call_process')
+	def test_should_display_blame_information(self, git):
+		git.return_value = fixture('blame')
+		b = self.rorepo.blame( 'master', 'lib/git.py')
+		assert_equal(13, len(b))
+		assert_equal( 2, len(b[0]) )
+		# assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
+		assert_equal(hash(b[0][0]), hash(b[9][0]))
+		c = b[0][0]
+		assert_true(git.called)
+		assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
+		
+		assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha)
+		assert_equal('Tom Preston-Werner', c.author.name)
+		assert_equal('tom@mojombo.com', c.author.email)
+		assert_equal(1191997100, c.authored_date)
+		assert_equal('Tom Preston-Werner', c.committer.name)
+		assert_equal('tom@mojombo.com', c.committer.email)
+		assert_equal(1191997100, c.committed_date)
+		assert_equal('initial grit setup', c.message)
+		
+		# test the 'lines per commit' entries
+		tlist = b[0][1]
+		assert_true( tlist )
+		assert_true( isinstance( tlist[0], basestring ) )
+		assert_true( len( tlist ) < sum( len(t) for t in tlist ) )				 # test for single-char bug
+		
+	def test_untracked_files(self):
+		base = self.rorepo.working_tree_dir
+		files = (	join_path_native(base, "__test_myfile"), 
+					join_path_native(base, "__test_other_file") )
+		num_recently_untracked = 0
+		try:
+			for fpath in files:
+				fd = open(fpath,"wb")
+				fd.close()
+			# END for each filename
+			untracked_files = self.rorepo.untracked_files
+			num_recently_untracked = len(untracked_files)
+			
+			# assure we have all names - they are relative to the git-dir
+			num_test_untracked = 0
+			for utfile in untracked_files:
+				num_test_untracked += join_path_native(base, utfile) in files
+			assert len(files) == num_test_untracked
+		finally:
+			for fpath in files:
+				if os.path.isfile(fpath):
+					os.remove(fpath)
+		# END handle files 
+		
+		assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files))
+		
+	def test_config_reader(self):
+		reader = self.rorepo.config_reader()				# all config files 
+		assert reader.read_only
+		reader = self.rorepo.config_reader("repository")	# single config file
+		assert reader.read_only
+		
+	def test_config_writer(self):
+		for config_level in self.rorepo.config_level:
+			try:
+				writer = self.rorepo.config_writer(config_level)
+				assert not writer.read_only
+			except IOError:
+				# its okay not to get a writer for some configuration files if we 
+				# have no permissions
+				pass 
+		# END for each config level 
+		
+	def test_creation_deletion(self):
+		# just a very quick test to assure it generally works. There are 
+		# specialized cases in the test_refs module
+		head = self.rorepo.create_head("new_head", "HEAD~1")
+		self.rorepo.delete_head(head)
+		
+		tag = self.rorepo.create_tag("new_tag", "HEAD~2")
+		self.rorepo.delete_tag(tag)
+		
+		remote = self.rorepo.create_remote("new_remote", "git@server:repo.git")
+		self.rorepo.delete_remote(remote)
+		
+	def test_comparison_and_hash(self):
+		# this is only a preliminary test, more testing done in test_index
+		assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo)
+		assert len(set((self.rorepo, self.rorepo))) == 1
+		
+	def test_git_cmd(self):
+		# test CatFileContentStream, just to be very sure we have no fencepost errors
+		# last \n is the terminating newline that it expects
+		l1 = "0123456789\n"
+		l2 = "abcdefghijklmnopqrstxy\n"
+		l3 = "z\n" 
+		d = "%s%s%s\n" % (l1, l2, l3)
+		
+		l1p = l1[:5]
+		
+		# full size
+		# size is without terminating newline
+		def mkfull():
+			return Git.CatFileContentStream(len(d)-1, StringIO(d))
+			
+		ts = 5
+		def mktiny():
+			return Git.CatFileContentStream(ts, StringIO(d))
+		
+		# readlines no limit
+		s = mkfull()
+		lines = s.readlines()
+		assert len(lines) == 3 and lines[-1].endswith('\n')
+		assert s._stream.tell() == len(d)	# must have scrubbed to the end
+		
+		# realines line limit
+		s = mkfull()
+		lines = s.readlines(5)
+		assert len(lines) == 1
+		
+		# readlines on tiny sections
+		s = mktiny()
+		lines = s.readlines()
+		assert len(lines) == 1 and lines[0] == l1p
+		assert s._stream.tell() == ts+1
+		
+		# readline no limit
+		s = mkfull()
+		assert s.readline() == l1
+		assert s.readline() == l2
+		assert s.readline() == l3
+		assert s.readline() == ''
+		assert s._stream.tell() == len(d)
+		
+		# readline limit
+		s = mkfull()
+		assert s.readline(5) == l1p
+		assert s.readline() == l1[5:]
+		
+		# readline on tiny section
+		s = mktiny()
+		assert s.readline() == l1p
+		assert s.readline() == ''
+		assert s._stream.tell() == ts+1
+		
+		# read no limit
+		s = mkfull()
+		assert s.read() == d[:-1]
+		assert s.read() == ''
+		assert s._stream.tell() == len(d)
+		
+		# read limit
+		s = mkfull()
+		assert s.read(5) == l1p
+		assert s.read(6) == l1[5:]
+		assert s._stream.tell() == 5 + 6	# its not yet done
+		
+		# read tiny
+		s = mktiny()
+		assert s.read(2) == l1[:2]
+		assert s._stream.tell() == 2
+		assert s.read() == l1[2:ts]
+		assert s._stream.tell() == ts+1
diff --git a/test/git/test_utils.py b/test/git/test_utils.py
index f843c12e..83ef7e4b 100644
--- a/test/git/test_utils.py
+++ b/test/git/test_utils.py
@@ -9,112 +9,144 @@ import tempfile
 
 from test.testlib import *
 from git.utils import *
+from git.objects.utils import *
 from git import *
 from git.cmd import dashify
 import time
 
 
 class TestUtils(TestCase):
-    def setup(self):
-        self.testdict = {
-            "string":   "42",
-            "int":      42,
-            "array":    [ 42 ],
-        }
+	def setup(self):
+		self.testdict = {
+			"string":	"42",
+			"int":		42,
+			"array":	[ 42 ],
+		}
 
-    def test_it_should_dashify(self):
-        assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
-        assert_equal('foo', dashify('foo'))
-        
-        
-    def test_lock_file(self):
-        my_file = tempfile.mktemp()
-        lock_file = LockFile(my_file)
-        assert not lock_file._has_lock()
-        # release lock we don't have  - fine
-        lock_file._release_lock()
-        
-        # get lock
-        lock_file._obtain_lock_or_raise()
-        assert lock_file._has_lock()
-        
-        # concurrent access
-        other_lock_file = LockFile(my_file)
-        assert not other_lock_file._has_lock()
-        self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
-        
-        lock_file._release_lock()
-        assert not lock_file._has_lock()
-        
-        other_lock_file._obtain_lock_or_raise()
-        self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
-        
-        # auto-release on destruction
-        del(other_lock_file)
-        lock_file._obtain_lock_or_raise()
-        lock_file._release_lock()
-        
-    def test_blocking_lock_file(self):
-        my_file = tempfile.mktemp()
-        lock_file = BlockingLockFile(my_file)
-        lock_file._obtain_lock()
-        
-        # next one waits for the lock
-        start = time.time()
-        wait_time = 0.1
-        wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
-        self.failUnlessRaises(IOError, wait_lock._obtain_lock)
-        elapsed = time.time() - start
-        assert elapsed <= wait_time + 0.02  # some extra time it may cost
-        
-    def _cmp_contents(self, file_path, data):
-        # raise if data from file at file_path 
-        # does not match data string
-        fp = open(file_path, "rb")
-        try:
-            assert fp.read() == data
-        finally:
-            fp.close()
-        
-    def test_safe_operation(self):
-        my_file = tempfile.mktemp()
-        orig_data = "hello"
-        new_data = "world"
-        my_file_fp = open(my_file, "wb")
-        my_file_fp.write(orig_data)
-        my_file_fp.close()
-        
-        try:
-            cwrite = ConcurrentWriteOperation(my_file)
-            
-            # didn't start writing, doesnt matter
-            cwrite._end_writing(False)
-            cwrite._end_writing(True)
-            assert not cwrite._is_writing()
-            
-            # write data and fail
-            stream = cwrite._begin_writing()
-            assert cwrite._is_writing()
-            stream.write(new_data)
-            cwrite._end_writing(successful=False)
-            self._cmp_contents(my_file, orig_data)
-            assert not os.path.exists(stream.name)
-            
-            # write data - concurrently
-            ocwrite = ConcurrentWriteOperation(my_file)
-            stream = cwrite._begin_writing()
-            self.failUnlessRaises(IOError, ocwrite._begin_writing)
-            
-            stream.write("world")
-            cwrite._end_writing(successful=True)
-            self._cmp_contents(my_file, new_data)
-            assert not os.path.exists(stream.name)
-                
-            # could test automatic _end_writing on destruction
-        finally:
-            os.remove(my_file)
-        # END final cleanup
-        
-        
-        
-        
+	def test_it_should_dashify(self):
+		assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
+		assert_equal('foo', dashify('foo'))
+		
+		
+	def test_lock_file(self):
+		my_file = tempfile.mktemp()
+		lock_file = LockFile(my_file)
+		assert not lock_file._has_lock()
+		# release lock we don't have  - fine
+		lock_file._release_lock()
+		
+		# get lock
+		lock_file._obtain_lock_or_raise()
+		assert lock_file._has_lock()
+		
+		# concurrent access
+		other_lock_file = LockFile(my_file)
+		assert not other_lock_file._has_lock()
+		self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
+		
+		lock_file._release_lock()
+		assert not lock_file._has_lock()
+		
+		other_lock_file._obtain_lock_or_raise()
+		self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
+		
+		# auto-release on destruction
+		del(other_lock_file)
+		lock_file._obtain_lock_or_raise()
+		lock_file._release_lock()
+		
+	def test_blocking_lock_file(self):
+		my_file = tempfile.mktemp()
+		lock_file = BlockingLockFile(my_file)
+		lock_file._obtain_lock()
+		
+		# next one waits for the lock
+		start = time.time()
+		wait_time = 0.1
+		wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
+		self.failUnlessRaises(IOError, wait_lock._obtain_lock)
+		elapsed = time.time() - start
+		assert elapsed <= wait_time + 0.02	# some extra time it may cost
+		
+	def _cmp_contents(self, file_path, data):
+		# raise if data from file at file_path 
+		# does not match data string
+		fp = open(file_path, "rb")
+		try:
+			assert fp.read() == data
+		finally:
+			fp.close()
+		
+	def test_safe_operation(self):
+		my_file = tempfile.mktemp()
+		orig_data = "hello"
+		new_data = "world"
+		my_file_fp = open(my_file, "wb")
+		my_file_fp.write(orig_data)
+		my_file_fp.close()
+		
+		try:
+			cwrite = ConcurrentWriteOperation(my_file)
+			
+			# didn't start writing, doesnt matter
+			cwrite._end_writing(False)
+			cwrite._end_writing(True)
+			assert not cwrite._is_writing()
+			
+			# write data and fail
+			stream = cwrite._begin_writing()
+			assert cwrite._is_writing()
+			stream.write(new_data)
+			cwrite._end_writing(successful=False)
+			self._cmp_contents(my_file, orig_data)
+			assert not os.path.exists(stream.name)
+			
+			# write data - concurrently
+			ocwrite = ConcurrentWriteOperation(my_file)
+			stream = cwrite._begin_writing()
+			self.failUnlessRaises(IOError, ocwrite._begin_writing)
+			
+			stream.write("world")
+			cwrite._end_writing(successful=True)
+			self._cmp_contents(my_file, new_data)
+			assert not os.path.exists(stream.name)
+				
+			# could test automatic _end_writing on destruction
+		finally:
+			os.remove(my_file)
+		# END final cleanup
+		
+	def test_user_id(self):
+		assert '@' in get_user_id()
+		
+	def test_parse_date(self):
+		# test all supported formats
+		def assert_rval(rval, veri_time, offset=0):
+			assert len(rval) == 2
+			assert isinstance(rval[0], int) and isinstance(rval[1], int)
+			assert rval[0] == veri_time
+			assert rval[1] == offset
+			
+			# now that we are here, test our conversion functions as well
+			utctz = altz_to_utctz_str(offset)
+			assert isinstance(utctz, basestring)
+			assert utctz_to_altz(verify_utctz(utctz)) == offset
+		# END assert rval utility
+		
+		rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0)
+		iso = ("2005-04-07T22:13:11 -0200", 7200)
+		iso2 = ("2005-04-07 22:13:11 +0400", -14400)
+		iso3 = ("2005.04.07 22:13:11 -0000", 0)
+		alt = ("04/07/2005 22:13:11", 0)
+		alt2 = ("07.04.2005 22:13:11", 0)
+		veri_time = 1112904791		# the time this represents
+		for date, offset in (rfc, iso, iso2, iso3, alt, alt2):
+			assert_rval(parse_date(date), veri_time, offset)
+		# END for each date type
+		
+		# and failure
+		self.failUnlessRaises(ValueError, parse_date, 'invalid format')
+		self.failUnlessRaises(ValueError, parse_date, '123456789 -02000')
+		self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200')
+		
+		
diff --git a/test/testlib/helper.py b/test/testlib/helper.py
index 9c38ffd5..c9b4c2ac 100644
--- a/test/testlib/helper.py
+++ b/test/testlib/helper.py
@@ -9,6 +9,7 @@ from git import Repo, Remote, GitCommandError
 from unittest import TestCase
 import tempfile
 import shutil
+import cStringIO
 
 GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
 
@@ -23,40 +24,13 @@ def absolute_project_path():
     return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
     
     
-class ListProcessAdapter(object):
-    """Allows to use lists as Process object as returned by SubProcess.Popen.
+class StringProcessAdapter(object):
+    """Allows to use strings as Process object as returned by SubProcess.Popen.
     Its tailored to work with the test system only"""
     
-    class Stream(object):
-        """Simple stream emulater meant to work only with tests"""
-        def __init__(self, data):
-            self.data = data
-            self.cur_iter = None
-        
-        def __iter__(self):
-            dat = self.data
-            if isinstance(dat, basestring):
-                dat = dat.splitlines()
-            if self.cur_iter is None:
-                self.cur_iter = iter(dat)
-            return self.cur_iter
-            
-        def read(self):
-            dat = self.data
-            if isinstance(dat, (tuple,list)):
-                dat = "\n".join(dat)
-            return dat
-            
-        def next(self):
-            if self.cur_iter is None:
-                self.cur_iter = iter(self)
-            return self.cur_iter.next()
-            
-    # END stream 
-    
-    def __init__(self, input_list_or_string):
-        self.stdout = self.Stream(input_list_or_string)
-        self.stderr = self.Stream('')
+    def __init__(self, input_string):
+        self.stdout = cStringIO.StringIO(input_string)
+        self.stderr = cStringIO.StringIO()
         
     def wait(self):
         return 0