15 files changed, 2428 insertions, 1174 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index aac539eb..2f17c55b 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -22,5 +22,8 @@ from git.remote import *
 from git.index import *
 from git.utils import LockFile, BlockingLockFile
 
+# odb is NOT imported intentionally - if you really want it, you should get it 
+# yourself as its part of the core
+
 __all__ = [ name for name, obj in locals().items()
             if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 82daf551..5cae2998 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,427 +13,515 @@ from errors import GitCommandError
 GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
 
 execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
-                  'with_exceptions', 'as_process', 
-                  'output_stream' )
+				  'with_exceptions', 'as_process', 
+				  'output_stream' )
 
 
 
 def dashify(string):
-    return string.replace('_', '-')
+	return string.replace('_', '-')
 
 class Git(object):
-    """
-    The Git class manages communication with the Git binary.
-    
-    It provides a convenient interface to calling the Git binary, such as in::
-    
-     g = Git( git_dir )
-     g.init()                   # calls 'git init' program
-     rval = g.ls_files()        # calls 'git ls-files' program
-    
-    ``Debugging``
-        Set the GIT_PYTHON_TRACE environment variable print each invocation 
-        of the command to stdout.
-        Set its value to 'full' to see details about the returned values.
-    """
-    __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
-    
-    # CONFIGURATION
-    # The size in bytes read from stdout when copying git's output to another stream
-    max_chunk_size = 1024*64
-    
-    class AutoInterrupt(object):
-        """
-        Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
-        used to prevent processes piling up in case iterators stop reading.
-        Besides all attributes are wired through to the contained process object.
-        
-        The wait method was overridden to perform automatic status code checking
-        and possibly raise.
-        """
-        __slots__= ("proc", "args")
-        
-        def __init__(self, proc, args ):
-            self.proc = proc
-            self.args = args
-            
-        def __del__(self):
-            # did the process finish already so we have a return code ?
-            if self.proc.poll() is not None:
-                return
-                
-            # can be that nothing really exists anymore ... 
-            if os is None:
-                return
-                
-            # try to kill it
-            try:
-                os.kill(self.proc.pid, 2)   # interrupt signal
-            except AttributeError:
-                # try windows 
-                # for some reason, providing None for stdout/stderr still prints something. This is why 
-                # we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
-                # is whether we really want to see all these messages. Its annoying no matter what.
-                subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
-            # END exception handling 
-            
-        def __getattr__(self, attr):
-            return getattr(self.proc, attr)
-            
-        def wait(self):
-            """
-            Wait for the process and return its status code. 
-            
-            Raise
-                GitCommandError if the return status is not 0
-            """
-            status = self.proc.wait()
-            if status != 0:
-                raise GitCommandError(self.args, status, self.proc.stderr.read())
-            # END status handling 
-            return status
-            
-    
-    
-    def __init__(self, working_dir=None):
-        """
-        Initialize this instance with:
-        
-        ``working_dir``
-           Git directory we should work in. If None, we always work in the current 
-           directory as returned by os.getcwd().
-           It is meant to be the working tree directory if available, or the 
-           .git directory in case of bare repositories.
-        """
-        super(Git, self).__init__()
-        self._working_dir = working_dir
-        
-        # cached command slots
-        self.cat_file_header = None
-        self.cat_file_all = None
-
-    def __getattr__(self, name):
-        """
-        A convenience method as it allows to call the command as if it was 
-        an object.
-        Returns
-            Callable object that will execute call _call_process with your arguments.
-        """
-        if name[:1] == '_':
-            raise AttributeError(name)
-        return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
-    @property
-    def working_dir(self):
-        """
-        Returns
-            Git directory we are working on
-        """
-        return self._working_dir
-
-    def execute(self, command,
-                istream=None,
-                with_keep_cwd=False,
-                with_extended_output=False,
-                with_exceptions=True,
-                as_process=False, 
-                output_stream=None, 
-                **subprocess_kwargs
-                ):
-        """
-        Handles executing the command on the shell and consumes and returns
-        the returned information (stdout)
-
-        ``command``
-            The command argument list to execute.
-            It should be a string, or a sequence of program arguments. The
-            program to execute is the first item in the args sequence or string.
-
-        ``istream``
-            Standard input filehandle passed to subprocess.Popen.
-
-        ``with_keep_cwd``
-            Whether to use the current working directory from os.getcwd().
-            The cmd otherwise uses its own working_dir that it has been initialized
-            with if possible.
-
-        ``with_extended_output``
-            Whether to return a (status, stdout, stderr) tuple.
-
-        ``with_exceptions``
-            Whether to raise an exception when git returns a non-zero status.
-
-        ``as_process``
-            Whether to return the created process instance directly from which 
-            streams can be read on demand. This will render with_extended_output and 
-            with_exceptions ineffective - the caller will have 
-            to deal with the details himself.
-            It is important to note that the process will be placed into an AutoInterrupt
-            wrapper that will interrupt the process once it goes out of scope. If you 
-            use the command in iterators, you should pass the whole process instance 
-            instead of a single stream.
-            
-        ``output_stream``
-            If set to a file-like object, data produced by the git command will be 
-            output to the given stream directly.
-            This feature only has any effect if as_process is False. Processes will
-            always be created with a pipe as subprocess.Popen can only accept system
-            file descriptors, not python objects ( such as StringIO ).
-            This merely is a workaround as the data will be copied from the 
-            output pipe to the given output stream directly.
-            See also: Git.max_chunk_size
-            
-        ``**subprocess_kwargs``
-            Keyword arguments to be passed to subprocess.Popen. Please note that 
-            some of the valid kwargs are already set by this method, the ones you 
-            specify may not be the same ones.
-            
-        Returns::
-        
-         str(output)                                   # extended_output = False (Default)
-         tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-         
-         if ouput_stream is True, the stdout value will be your output stream:
-         output_stream                                  # extended_output = False
-         tuple(int(status), output_stream, str(stderr))# extended_output = True
-        
-        Raise
-            GitCommandError
-        
-        NOTE
-           If you add additional keyword arguments to the signature of this method, 
-           you must update the execute_kwargs tuple housed in this module.
-        """
-        if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
-            print ' '.join(command)
-
-        # Allow the user to have the command executed in their working dir.
-        if with_keep_cwd or self._working_dir is None:
-          cwd = os.getcwd()
-        else:
-          cwd=self._working_dir
-          
-        # Start the process
-        proc = subprocess.Popen(command,
-                                cwd=cwd,
-                                stdin=istream,
-                                stderr=subprocess.PIPE,
-                                stdout=subprocess.PIPE,
-                                close_fds=(os.name=='posix'),# unsupported on linux
-                                **subprocess_kwargs
-                                )
-        if as_process:
-            return self.AutoInterrupt(proc, command)
-        
-        # Wait for the process to return
-        status = 0
-        stdout_value = ''
-        stderr_value = ''
-        try:
-            if output_stream is None:
-                stdout_value, stderr_value = proc.communicate()      
-                # strip trailing "\n"
-                if stdout_value.endswith("\n"):
-                    stdout_value = stdout_value[:-1]
-                if stderr_value.endswith("\n"):
-                    stderr_value = stderr_value[:-1]
-                status = proc.returncode
-            else:
-                max_chunk_size = self.max_chunk_size
-                while True:
-                    chunk = proc.stdout.read(max_chunk_size)
-                    output_stream.write(chunk)
-                    if len(chunk) < max_chunk_size:
-                        break
-                # END reading output stream
-                stdout_value = output_stream
-                stderr_value = proc.stderr.read()
-                # strip trailing "\n"
-                if stderr_value.endswith("\n"):
-                    stderr_value = stderr_value[:-1]
-                status = proc.wait()
-            # END stdout handling
-        finally:
-            proc.stdout.close()
-            proc.stderr.close()
-
-        if with_exceptions and status != 0:
-            raise GitCommandError(command, status, stderr_value)
-
-        if GIT_PYTHON_TRACE == 'full':
-            if stderr_value:
-              print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
-            elif stdout_value:
-              print "%s -> %d: '%s'" % (command, status, stdout_value)
-            else:
-              print "%s -> %d" % (command, status)
-
-        # Allow access to the command's status code
-        if with_extended_output:
-            return (status, stdout_value, stderr_value)
-        else:
-            return stdout_value
-
-    def transform_kwargs(self, **kwargs):
-        """
-        Transforms Python style kwargs into git command line options.
-        """
-        args = []
-        for k, v in kwargs.items():
-            if len(k) == 1:
-                if v is True:
-                    args.append("-%s" % k)
-                elif type(v) is not bool:
-                    args.append("-%s%s" % (k, v))
-            else:
-                if v is True:
-                    args.append("--%s" % dashify(k))
-                elif type(v) is not bool:
-                    args.append("--%s=%s" % (dashify(k), v))
-        return args
-
-    @classmethod
-    def __unpack_args(cls, arg_list):
-        if not isinstance(arg_list, (list,tuple)):
-            return [ str(arg_list) ]
-            
-        outlist = list()
-        for arg in arg_list:
-            if isinstance(arg_list, (list, tuple)):
-                outlist.extend(cls.__unpack_args( arg ))
-            # END recursion 
-            else:
-                outlist.append(str(arg))
-        # END for each arg
-        return outlist
-
-    def _call_process(self, method, *args, **kwargs):
-        """
-        Run the given git command with the specified arguments and return
-        the result as a String
-
-        ``method``
-            is the command. Contained "_" characters will be converted to dashes,
-            such as in 'ls_files' to call 'ls-files'.
-
-        ``args``
-            is the list of arguments. If None is included, it will be pruned.
-            This allows your commands to call git more conveniently as None
-            is realized as non-existent
-
-        ``kwargs``
-            is a dict of keyword arguments.
-            This function accepts the same optional keyword arguments
-            as execute().
-
-        Examples::
-            git.rev_list('master', max_count=10, header=True)
-
-        Returns
-            Same as execute()
-        """
-
-        # Handle optional arguments prior to calling transform_kwargs
-        # otherwise these'll end up in args, which is bad.
-        _kwargs = {}
-        for kwarg in execute_kwargs:
-            try:
-                _kwargs[kwarg] = kwargs.pop(kwarg)
-            except KeyError:
-                pass
-
-        # Prepare the argument list
-        opt_args = self.transform_kwargs(**kwargs)
-        
-        ext_args = self.__unpack_args([a for a in args if a is not None])
-        args = opt_args + ext_args
-
-        call = ["git", dashify(method)]
-        call.extend(args)
-
-        return self.execute(call, **_kwargs)
-        
-    def _parse_object_header(self, header_line):
-        """
-        ``header_line``
-            <hex_sha> type_string size_as_int
-            
-        Returns
-            (hex_sha, type_string, size_as_int)
-            
-        Raises
-            ValueError if the header contains indication for an error due to incorrect 
-            input sha
-        """
-        tokens = header_line.split()
-        if len(tokens) != 3:
-            raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
-        if len(tokens[0]) != 40:
-            raise ValueError("Failed to parse header: %r" % header_line) 
-        return (tokens[0], tokens[1], int(tokens[2]))
-    
-    def __prepare_ref(self, ref):
-        # required for command to separate refs on stdin
-        refstr = str(ref)               # could be ref-object
-        if refstr.endswith("\n"):
-            return refstr
-        return refstr + "\n"
-    
-    def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
-        cur_val = getattr(self, attr_name)
-        if cur_val is not None:
-            return cur_val
-            
-        options = { "istream" : subprocess.PIPE, "as_process" : True }
-        options.update( kwargs )
-        
-        cmd = self._call_process( cmd_name, *args, **options )
-        setattr(self, attr_name, cmd )
-        return cmd
-    
-    def __get_object_header(self, cmd, ref):
-        cmd.stdin.write(self.__prepare_ref(ref))
-        cmd.stdin.flush()
-        return self._parse_object_header(cmd.stdout.readline())
-    
-    def get_object_header(self, ref):
-        """
-        Use this method to quickly examine the type and size of the object behind 
-        the given ref. 
-        
-        NOTE
-            The method will only suffer from the costs of command invocation 
-            once and reuses the command in subsequent calls. 
-        
-        Return:
-            (hexsha, type_string, size_as_int)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
-        return self.__get_object_header(cmd, ref)
-        
-    def get_object_data(self, ref):
-        """
-        As get_object_header, but returns object data as well
-        
-        Return:
-            (hexsha, type_string, size_as_int,data_string)
-        """
-        cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
-        hexsha, typename, size = self.__get_object_header(cmd, ref)
-        data = cmd.stdout.read(size)
-        cmd.stdout.read(1)      # finishing newlines
-        
-        return (hexsha, typename, size, data)
-        
-    def clear_cache(self):
-        """
-        Clear all kinds of internal caches to release resources.
-        
-        Currently persistent commands will be interrupted.
-        
-        Returns
-            self
-        """
-        self.cat_file_all = None
-        self.cat_file_header = None
-        return self
+	"""
+	The Git class manages communication with the Git binary.
+	
+	It provides a convenient interface to calling the Git binary, such as in::
+	
+	 g = Git( git_dir )
+	 g.init()					# calls 'git init' program
+	 rval = g.ls_files()		# calls 'git ls-files' program
+	
+	``Debugging``
+		Set the GIT_PYTHON_TRACE environment variable print each invocation 
+		of the command to stdout.
+		Set its value to 'full' to see details about the returned values.
+	"""
+	__slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+	
+	# CONFIGURATION
+	# The size in bytes read from stdout when copying git's output to another stream
+	max_chunk_size = 1024*64
+	
+	class AutoInterrupt(object):
+		"""
+		Kill/Interrupt the stored process instance once this instance goes out of scope. It is 
+		used to prevent processes piling up in case iterators stop reading.
+		Besides all attributes are wired through to the contained process object.
+		
+		The wait method was overridden to perform automatic status code checking
+		and possibly raise.
+		"""
+		__slots__= ("proc", "args")
+		
+		def __init__(self, proc, args ):
+			self.proc = proc
+			self.args = args
+			
+		def __del__(self):
+			# did the process finish already so we have a return code ?
+			if self.proc.poll() is not None:
+				return
+				
+			# can be that nothing really exists anymore ... 
+			if os is None:
+				return
+				
+			# try to kill it
+			try:
+				os.kill(self.proc.pid, 2)	# interrupt signal
+			except AttributeError:
+				# try windows 
+				# for some reason, providing None for stdout/stderr still prints something. This is why 
+				# we simply use the shell and redirect to nul. Its slower than CreateProcess, question 
+				# is whether we really want to see all these messages. Its annoying no matter what.
+				subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
+			# END exception handling 
+			
+		def __getattr__(self, attr):
+			return getattr(self.proc, attr)
+			
+		def wait(self):
+			"""
+			Wait for the process and return its status code. 
+			
+			Raise
+				GitCommandError if the return status is not 0
+			"""
+			status = self.proc.wait()
+			if status != 0:
+				raise GitCommandError(self.args, status, self.proc.stderr.read())
+			# END status handling 
+			return status
+	# END auto interrupt
+	
+	class CatFileContentStream(object):
+		"""Object representing a sized read-only stream returning the contents of 
+		an object.
+		It behaves like a stream, but counts the data read and simulates an empty 
+		stream once our sized content region is empty.
+		If not all data is read to the end of the objects's lifetime, we read the 
+		rest to assure the underlying stream continues to work"""
+		
+		__slots__ = ('_stream', '_nbr', '_size')
+		
+		def __init__(self, size, stream):
+			self._stream = stream
+			self._size = size
+			self._nbr = 0			# num bytes read
+			
+		def read(self, size=-1):
+			bytes_left = self._size - self._nbr
+			if bytes_left == 0:
+				return ''
+			if size > -1:
+				# assure we don't try to read past our limit
+				size = min(bytes_left, size)
+			else:
+				# they try to read all, make sure its not more than what remains
+				size = bytes_left
+			# END check early depletion
+			data = self._stream.read(size)
+			self._nbr += len(data)
+			
+			# check for depletion, read our final byte to make the stream usable by others
+			if self._size - self._nbr == 0:
+				self._stream.read(1)	# final newline
+			# END finish reading
+			
+			return data
+			
+		def readline(self, size=-1):
+			if self._nbr == self._size:
+				return ''
+			
+			# clamp size to lowest allowed value
+			bytes_left = self._size - self._nbr
+			if size > -1:
+				size = min(bytes_left, size)
+			else:
+				size = bytes_left
+			# END handle size
+			
+			data = self._stream.readline(size)
+			self._nbr += len(data)
+			
+			# handle final byte
+			# we inline everything, it must be fast !
+			if self._size - self._nbr == 0:
+				self._stream.read(1)
+			# END finish reading
+			
+			return data
+			
+		def readlines(self, size=-1):
+			if self._nbr == self._size:
+				return list()
+			
+			# leave all additional logic to our readline method, we just check the size
+			out = list()
+			nbr = 0
+			while True:
+				line = self.readline()
+				if not line:
+					break
+				out.append(line)
+				if size > -1:
+					nbr += len(line)
+					if nbr > size:
+						break
+				# END handle size constraint
+			# END readline loop
+			return out
+			
+		def __iter__(self):
+			return self
+			
+		def next(self):
+			line = self.readline()
+			if not line:
+				raise StopIteration
+			return line
+			
+		def __del__(self):
+			bytes_left = self._size - self._nbr
+			if bytes_left:
+				# seek and discard
+				self._stream.seek(bytes_left + 1, os.SEEK_CUR)	# includes terminating newline
+			# END handle incomplete read
+	
+	
+	def __init__(self, working_dir=None):
+		"""
+		Initialize this instance with:
+		
+		``working_dir``
+		   Git directory we should work in. If None, we always work in the current 
+		   directory as returned by os.getcwd().
+		   It is meant to be the working tree directory if available, or the 
+		   .git directory in case of bare repositories.
+		"""
+		super(Git, self).__init__()
+		self._working_dir = working_dir
+		
+		# cached command slots
+		self.cat_file_header = None
+		self.cat_file_all = None
+
+	def __getattr__(self, name):
+		"""
+		A convenience method as it allows to call the command as if it was 
+		an object.
+		Returns
+			Callable object that will execute call _call_process with your arguments.
+		"""
+		if name[:1] == '_':
+			raise AttributeError(name)
+		return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+	@property
+	def working_dir(self):
+		"""
+		Returns
+			Git directory we are working on
+		"""
+		return self._working_dir
+
+	def execute(self, command,
+				istream=None,
+				with_keep_cwd=False,
+				with_extended_output=False,
+				with_exceptions=True,
+				as_process=False, 
+				output_stream=None, 
+				**subprocess_kwargs
+				):
+		"""
+		Handles executing the command on the shell and consumes and returns
+		the returned information (stdout)
+
+		``command``
+			The command argument list to execute.
+			It should be a string, or a sequence of program arguments. The
+			program to execute is the first item in the args sequence or string.
+
+		``istream``
+			Standard input filehandle passed to subprocess.Popen.
+
+		``with_keep_cwd``
+			Whether to use the current working directory from os.getcwd().
+			The cmd otherwise uses its own working_dir that it has been initialized
+			with if possible.
+
+		``with_extended_output``
+			Whether to return a (status, stdout, stderr) tuple.
+
+		``with_exceptions``
+			Whether to raise an exception when git returns a non-zero status.
+
+		``as_process``
+			Whether to return the created process instance directly from which 
+			streams can be read on demand. This will render with_extended_output and 
+			with_exceptions ineffective - the caller will have 
+			to deal with the details himself.
+			It is important to note that the process will be placed into an AutoInterrupt
+			wrapper that will interrupt the process once it goes out of scope. If you 
+			use the command in iterators, you should pass the whole process instance 
+			instead of a single stream.
+			
+		``output_stream``
+			If set to a file-like object, data produced by the git command will be 
+			output to the given stream directly.
+			This feature only has any effect if as_process is False. Processes will
+			always be created with a pipe due to issues with subprocess.
+			This merely is a workaround as data will be copied from the 
+			output pipe to the given output stream directly.
+			
+		``**subprocess_kwargs``
+			Keyword arguments to be passed to subprocess.Popen. Please note that 
+			some of the valid kwargs are already set by this method, the ones you 
+			specify may not be the same ones.
+			
+		Returns::
+		
+		 str(output)								   # extended_output = False (Default)
+		 tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+		 
+		 if ouput_stream is True, the stdout value will be your output stream:
+		 output_stream									# extended_output = False
+		 tuple(int(status), output_stream, str(stderr))# extended_output = True
+		
+		Raise
+			GitCommandError
+		
+		NOTE
+		   If you add additional keyword arguments to the signature of this method, 
+		   you must update the execute_kwargs tuple housed in this module.
+		"""
+		if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+			print ' '.join(command)
+
+		# Allow the user to have the command executed in their working dir.
+		if with_keep_cwd or self._working_dir is None:
+		  cwd = os.getcwd()
+		else:
+		  cwd=self._working_dir
+		  
+		# Start the process
+		proc = subprocess.Popen(command,
+								cwd=cwd,
+								stdin=istream,
+								stderr=subprocess.PIPE,
+								stdout=subprocess.PIPE,
+								close_fds=(os.name=='posix'),# unsupported on linux
+								**subprocess_kwargs
+								)
+		if as_process:
+			return self.AutoInterrupt(proc, command)
+		
+		# Wait for the process to return
+		status = 0
+		stdout_value = ''
+		stderr_value = ''
+		try:
+			if output_stream is None:
+				stdout_value, stderr_value = proc.communicate() 
+				# strip trailing "\n"
+				if stdout_value.endswith("\n"):
+					stdout_value = stdout_value[:-1]
+				if stderr_value.endswith("\n"):
+					stderr_value = stderr_value[:-1]
+				status = proc.returncode
+			else:
+				stream_copy(proc.stdout, output_stream, self.max_chunk_size)
+				stdout_value = output_stream
+				stderr_value = proc.stderr.read()
+				# strip trailing "\n"
+				if stderr_value.endswith("\n"):
+					stderr_value = stderr_value[:-1]
+				status = proc.wait()
+			# END stdout handling
+		finally:
+			proc.stdout.close()
+			proc.stderr.close()
+
+		if with_exceptions and status != 0:
+			raise GitCommandError(command, status, stderr_value)
+
+		if GIT_PYTHON_TRACE == 'full':
+			if stderr_value:
+			  print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+			elif stdout_value:
+			  print "%s -> %d: '%s'" % (command, status, stdout_value)
+			else:
+			  print "%s -> %d" % (command, status)
+
+		# Allow access to the command's status code
+		if with_extended_output:
+			return (status, stdout_value, stderr_value)
+		else:
+			return stdout_value
+
+	def transform_kwargs(self, **kwargs):
+		"""
+		Transforms Python style kwargs into git command line options.
+		"""
+		args = []
+		for k, v in kwargs.items():
+			if len(k) == 1:
+				if v is True:
+					args.append("-%s" % k)
+				elif type(v) is not bool:
+					args.append("-%s%s" % (k, v))
+			else:
+				if v is True:
+					args.append("--%s" % dashify(k))
+				elif type(v) is not bool:
+					args.append("--%s=%s" % (dashify(k), v))
+		return args
+
+	@classmethod
+	def __unpack_args(cls, arg_list):
+		if not isinstance(arg_list, (list,tuple)):
+			return [ str(arg_list) ]
+			
+		outlist = list()
+		for arg in arg_list:
+			if isinstance(arg_list, (list, tuple)):
+				outlist.extend(cls.__unpack_args( arg ))
+			# END recursion 
+			else:
+				outlist.append(str(arg))
+		# END for each arg
+		return outlist
+
+	def _call_process(self, method, *args, **kwargs):
+		"""
+		Run the given git command with the specified arguments and return
+		the result as a String
+
+		``method``
+			is the command. Contained "_" characters will be converted to dashes,
+			such as in 'ls_files' to call 'ls-files'.
+
+		``args``
+			is the list of arguments. If None is included, it will be pruned.
+			This allows your commands to call git more conveniently as None
+			is realized as non-existent
+
+		``kwargs``
+			is a dict of keyword arguments.
+			This function accepts the same optional keyword arguments
+			as execute().
+
+		Examples::
+			git.rev_list('master', max_count=10, header=True)
+
+		Returns
+			Same as execute()
+		"""
+
+		# Handle optional arguments prior to calling transform_kwargs
+		# otherwise these'll end up in args, which is bad.
+		_kwargs = {}
+		for kwarg in execute_kwargs:
+			try:
+				_kwargs[kwarg] = kwargs.pop(kwarg)
+			except KeyError:
+				pass
+
+		# Prepare the argument list
+		opt_args = self.transform_kwargs(**kwargs)
+		
+		ext_args = self.__unpack_args([a for a in args if a is not None])
+		args = opt_args + ext_args
+
+		call = ["git", dashify(method)]
+		call.extend(args)
+
+		return self.execute(call, **_kwargs)
+		
+	def _parse_object_header(self, header_line):
+		"""
+		``header_line``
+			<hex_sha> type_string size_as_int
+			
+		Returns
+			(hex_sha, type_string, size_as_int)
+			
+		Raises
+			ValueError if the header contains indication for an error due to incorrect 
+			input sha
+		"""
+		tokens = header_line.split()
+		if len(tokens) != 3:
+			raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
+		if len(tokens[0]) != 40:
+			raise ValueError("Failed to parse header: %r" % header_line) 
+		return (tokens[0], tokens[1], int(tokens[2]))
+	
+	def __prepare_ref(self, ref):
+		# required for command to separate refs on stdin
+		refstr = str(ref)				# could be ref-object
+		if refstr.endswith("\n"):
+			return refstr
+		return refstr + "\n"
+	
+	def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+		cur_val = getattr(self, attr_name)
+		if cur_val is not None:
+			return cur_val
+			
+		options = { "istream" : subprocess.PIPE, "as_process" : True }
+		options.update( kwargs )
+		
+		cmd = self._call_process( cmd_name, *args, **options )
+		setattr(self, attr_name, cmd )
+		return cmd
+	
+	def __get_object_header(self, cmd, ref):
+		cmd.stdin.write(self.__prepare_ref(ref))
+		cmd.stdin.flush()
+		return self._parse_object_header(cmd.stdout.readline())
+	
+	def get_object_header(self, ref):
+		""" Use this method to quickly examine the type and size of the object behind 
+		the given ref. 
+		
+		:note: The method will only suffer from the costs of command invocation 
+			once and reuses the command in subsequent calls. 
+		
+		:return: (hexsha, type_string, size_as_int) """
+		cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+		return self.__get_object_header(cmd, ref)
+		
+	def get_object_data(self, ref):
+		""" As get_object_header, but returns object data as well
+		:return: (hexsha, type_string, size_as_int,data_string)
+		:note: not threadsafe
+		"""
+		hexsha, typename, size, stream = self.stream_object_data(ref)
+		data = stream.read(size)
+		del(stream)
+		return (hexsha, typename, size, data)
+		
+	def stream_object_data(self, ref):
+		"""As get_object_header, but returns the data as a stream
+		:return: (hexsha, type_string, size_as_int, stream)
+		:note: This method is not threadsafe, you need one independent	Command instance
+			per thread to be safe !"""
+		cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+		hexsha, typename, size = self.__get_object_header(cmd, ref)
+		return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
+		
+	def clear_cache(self):
+		"""
+		Clear all kinds of internal caches to release resources.
+		
+		Currently persistent commands will be interrupted.
+		
+		Returns
+			self
+		"""
+		self.cat_file_all = None
+		self.cat_file_header = None
+		return self
diff --git a/lib/git/errors.py b/lib/git/errors.py
index f66fb528..d8a35e02 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -8,19 +8,25 @@ Module containing all exceptions thrown througout the git package,
 """
 
 class InvalidGitRepositoryError(Exception):
-    """
-    Thrown if the given repository appears to have an invalid format. 
-    """
+    """ Thrown if the given repository appears to have an invalid format.  """
+
+class ODBError(Exception):
+	"""All errors thrown by the object database"""
+	
+class InvalidDBRoot(ODBError):
+	"""Thrown if an object database cannot be initialized at the given path"""
+	
+class BadObject(ODBError):
+	"""The object with the given SHA does not exist"""
+	
+class BadObjectType(ODBError):
+	"""The object had an unsupported type"""
 
 class NoSuchPathError(OSError):
-    """
-    Thrown if a path could not be access by the system.
-    """
+    """ Thrown if a path could not be access by the system. """
 
 class GitCommandError(Exception):
-    """
-    Thrown if execution of the git command fails with non-zero status code.
-    """
+    """ Thrown if execution of the git command fails with non-zero status code. """
     def __init__(self, command, status, stderr=None):
         self.stderr = stderr
         self.status = status
diff --git a/lib/git/index.py b/lib/git/index.py
index 8ccc3fe3..36428315 100644
--- a/lib/git/index.py
+++ b/lib/git/index.py
@@ -21,7 +21,7 @@ import git.diff as diff
 
 from errors import GitCommandError
 from git.objects import Blob, Tree, Object, Commit
-from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
+from git.utils import IndexFileSHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
 
 
 class CheckoutError( Exception ):
@@ -461,7 +461,7 @@ class IndexFile(LazyMixin, diff.Diffable):
         write_op = ConcurrentWriteOperation(file_path or self._file_path)
         stream = write_op._begin_writing()
 
-        stream = SHA1Writer(stream)
+        stream = IndexFileSHA1Writer(stream)
 
         # header
         stream.write("DIRC")
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..5a3a15a7 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,224 +4,220 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 import os
-from git.utils import LazyMixin, join_path_native
+from git.utils import LazyMixin, join_path_native, stream_copy
 import utils
-    
+	
 _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
 
 class Object(LazyMixin):
-    """
-    Implements an Object which may be Blobs, Trees, Commits and Tags
-    
-    This Object also serves as a constructor for instances of the correct type::
-    
-        inst = Object.new(repo,id)
-        inst.sha        # objects sha in hex
-        inst.size   # objects uncompressed data size
-        inst.data   # byte string containing the whole data of the object
-    """
-    NULL_HEX_SHA = '0'*40
-    TYPES = ("blob", "tree", "commit", "tag")
-    __slots__ = ("repo", "sha", "size", "data" )
-    type = None         # to be set by subclass
-    
-    def __init__(self, repo, id):
-        """
-        Initialize an object by identifying it by its id. All keyword arguments
-        will be set on demand if None.
-        
-        ``repo``
-            repository this object is located in
-            
-        ``id``
-            SHA1 or ref suitable for git-rev-parse
-        """
-        super(Object,self).__init__()
-        self.repo = repo
-        self.sha = id
+	"""
+	Implements an Object which may be Blobs, Trees, Commits and Tags
+	
+	This Object also serves as a constructor for instances of the correct type::
+	
+		inst = Object.new(repo,id)
+		inst.sha		# objects sha in hex
+		inst.size	# objects uncompressed data size
+		inst.data	# byte string containing the whole data of the object
+	"""
+	NULL_HEX_SHA = '0'*40
+	TYPES = ("blob", "tree", "commit", "tag")
+	__slots__ = ("repo", "sha", "size", "data" )
+	type = None			# to be set by subclass
+	
+	def __init__(self, repo, id):
+		"""
+		Initialize an object by identifying it by its id. All keyword arguments
+		will be set on demand if None.
+		
+		``repo``
+			repository this object is located in
+			
+		``id``
+			SHA1 or ref suitable for git-rev-parse
+		"""
+		super(Object,self).__init__()
+		self.repo = repo
+		self.sha = id
 
-    @classmethod
-    def new(cls, repo, id):
-        """
-        Return
-            New Object instance of a type appropriate to the object type behind 
-            id. The id of the newly created object will be a hexsha even though 
-            the input id may have been a Reference or Rev-Spec
-            
-        Note
-            This cannot be a __new__ method as it would always call __init__
-            with the input id which is not necessarily a hexsha.
-        """
-        hexsha, typename, size = repo.git.get_object_header(id)
-        obj_type = utils.get_object_type_by_name(typename)
-        inst = obj_type(repo, hexsha)
-        inst.size = size
-        return inst
-    
-    def _set_self_from_args_(self, args_dict):
-        """
-        Initialize attributes on self from the given dict that was retrieved
-        from locals() in the calling method.
-        
-        Will only set an attribute on self if the corresponding value in args_dict
-        is not None
-        """
-        for attr, val in args_dict.items():
-            if attr != "self" and val is not None:
-                setattr( self, attr, val )
-        # END set all non-None attributes
-    
-    def _set_cache_(self, attr):
-        """
-        Retrieve object information
-        """
-        if attr  == "size":
-            hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        elif attr == "data":
-            hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
-            assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
-        else:
-            super(Object,self)._set_cache_(attr)
-        
-    def __eq__(self, other):
-        """
-        Returns
-            True if the objects have the same SHA1
-        """
-        return self.sha == other.sha
-        
-    def __ne__(self, other):
-        """
-        Returns
-            True if the objects do not have the same SHA1
-        """
-        return self.sha != other.sha
-        
-    def __hash__(self):
-        """
-        Returns
-            Hash of our id allowing objects to be used in dicts and sets
-        """
-        return hash(self.sha)
-        
-    def __str__(self):
-        """
-        Returns
-            string of our SHA1 as understood by all git commands
-        """
-        return self.sha
-        
-    def __repr__(self):
-        """
-        Returns
-            string with pythonic representation of our object
-        """
-        return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+	@classmethod
+	def new(cls, repo, id):
+		"""
+		Return
+			New Object instance of a type appropriate to the object type behind 
+			id. The id of the newly created object will be a hexsha even though 
+			the input id may have been a Reference or Rev-Spec
+			
+		Note
+			This cannot be a __new__ method as it would always call __init__
+			with the input id which is not necessarily a hexsha.
+		"""
+		hexsha, typename, size = repo.git.get_object_header(id)
+		obj_type = utils.get_object_type_by_name(typename)
+		inst = obj_type(repo, hexsha)
+		inst.size = size
+		return inst
+	
+	def _set_self_from_args_(self, args_dict):
+		"""
+		Initialize attributes on self from the given dict that was retrieved
+		from locals() in the calling method.
+		
+		Will only set an attribute on self if the corresponding value in args_dict
+		is not None
+		"""
+		for attr, val in args_dict.items():
+			if attr != "self" and val is not None:
+				setattr( self, attr, val )
+		# END set all non-None attributes
+	
+	def _set_cache_(self, attr):
+		"""
+		Retrieve object information
+		"""
+		if attr	 == "size":
+			oinfo = self.repo.odb.info(self.sha)
+			self.size = oinfo.size
+			assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
+		elif attr == "data":
+			ostream = self.repo.odb.stream(self.sha)
+			self.size = ostream.size
+			self.data = ostream.read()
+			assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+		else:
+			super(Object,self)._set_cache_(attr)
+		
+	def __eq__(self, other):
+		"""
+		Returns
+			True if the objects have the same SHA1
+		"""
+		return self.sha == other.sha
+		
+	def __ne__(self, other):
+		"""
+		Returns
+			True if the objects do not have the same SHA1
+		"""
+		return self.sha != other.sha
+		
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our id allowing objects to be used in dicts and sets
+		"""
+		return hash(self.sha)
+		
+	def __str__(self):
+		"""
+		Returns
+			string of our SHA1 as understood by all git commands
+		"""
+		return self.sha
+		
+	def __repr__(self):
+		"""
+		Returns
+			string with pythonic representation of our object
+		"""
+		return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
 
-    @property
-    def data_stream(self):
-        """
-        Returns 
-            File Object compatible stream to the uncompressed raw data of the object
-        """
-        proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
-        return utils.ProcessStreamAdapter(proc, "stdout") 
+	@property
+	def data_stream(self):
+		""" :return:  File Object compatible stream to the uncompressed raw data of the object
+		:note: returned streams must be read in order"""
+		return self.repo.odb.stream(self.sha)
 
-    def stream_data(self, ostream):
-        """
-        Writes our data directly to the given output stream
-        
-        ``ostream``
-            File object compatible stream object.
-            
-        Returns
-            self
-        """
-        self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
-        return self
+	def stream_data(self, ostream):
+		"""Writes our data directly to the given output stream
+		:param ostream: File object compatible stream object.
+		:return: self"""
+		istream = self.repo.odb.stream(self.sha)
+		stream_copy(istream, ostream)
+		return self
+		
 
 class IndexObject(Object):
-    """
-    Base for all objects that can be part of the index file , namely Tree, Blob and
-    SubModule objects
-    """
-    __slots__ = ("path", "mode") 
-    
-    def __init__(self, repo, sha, mode=None, path=None):
-        """
-        Initialize a newly instanced IndexObject
-        ``repo``
-            is the Repo we are located in
+	"""
+	Base for all objects that can be part of the index file , namely Tree, Blob and
+	SubModule objects
+	"""
+	__slots__ = ("path", "mode") 
+	
+	def __init__(self, repo, sha, mode=None, path=None):
+		"""
+		Initialize a newly instanced IndexObject
+		``repo``
+			is the Repo we are located in
 
-        ``sha`` : string
-            is the git object id as hex sha
+		``sha`` : string
+			is the git object id as hex sha
 
-        ``mode`` : int
-            is the file mode as int, use the stat module to evaluate the infomration
+		``mode`` : int
+			is the file mode as int, use the stat module to evaluate the infomration
 
-        ``path`` : str
-            is the path to the file in the file system, relative to the git repository root, i.e.
-            file.ext or folder/other.ext
-                
-        NOTE
-            Path may not be set of the index object has been created directly as it cannot
-            be retrieved without knowing the parent tree.
-        """
-        super(IndexObject, self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-        if isinstance(mode, basestring):
-            self.mode = self._mode_str_to_int(mode)
-    
-    def __hash__(self):
-        """
-        Returns
-            Hash of our path as index items are uniquely identifyable by path, not 
-            by their data !
-        """
-        return hash(self.path)
-    
-    def _set_cache_(self, attr):
-        if attr in IndexObject.__slots__:
-            # they cannot be retrieved lateron ( not without searching for them )
-            raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
-        else:
-            super(IndexObject, self)._set_cache_(attr)
-    
-    @classmethod
-    def _mode_str_to_int(cls, modestr):
-        """
-        ``modestr``
-            string like 755 or 644 or 100644 - only the last 6 chars will be used
-            
-        Returns
-            String identifying a mode compatible to the mode methods ids of the 
-            stat module regarding the rwx permissions for user, group and other, 
-            special flags and file system flags, i.e. whether it is a symlink
-            for example.
-        """
-        mode = 0
-        for iteration,char in enumerate(reversed(modestr[-6:])):
-            mode += int(char) << iteration*3
-        # END for each char
-        return mode
-        
-    @property
-    def name(self):
-        """
-        Returns
-            Name portion of the path, effectively being the basename
-        """
-        return os.path.basename(self.path)
-        
-    @property
-    def abspath(self):
-        """
-        Returns
-            Absolute path to this index object in the file system ( as opposed to the 
-            .path field which is a path relative to the git repository ).
-            
-            The returned path will be native to the system and contains '\' on windows. 
-        """
-        return join_path_native(self.repo.working_tree_dir, self.path)
-        
+		``path`` : str
+			is the path to the file in the file system, relative to the git repository root, i.e.
+			file.ext or folder/other.ext
+				
+		NOTE
+			Path may not be set of the index object has been created directly as it cannot
+			be retrieved without knowing the parent tree.
+		"""
+		super(IndexObject, self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+		if isinstance(mode, basestring):
+			self.mode = self._mode_str_to_int(mode)
+	
+	def __hash__(self):
+		"""
+		Returns
+			Hash of our path as index items are uniquely identifyable by path, not 
+			by their data !
+		"""
+		return hash(self.path)
+	
+	def _set_cache_(self, attr):
+		if attr in IndexObject.__slots__:
+			# they cannot be retrieved lateron ( not without searching for them )
+			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+		else:
+			super(IndexObject, self)._set_cache_(attr)
+	
+	@classmethod
+	def _mode_str_to_int(cls, modestr):
+		"""
+		``modestr``
+			string like 755 or 644 or 100644 - only the last 6 chars will be used
+			
+		Returns
+			String identifying a mode compatible to the mode methods ids of the 
+			stat module regarding the rwx permissions for user, group and other, 
+			special flags and file system flags, i.e. whether it is a symlink
+			for example.
+		"""
+		mode = 0
+		for iteration,char in enumerate(reversed(modestr[-6:])):
+			mode += int(char) << iteration*3
+		# END for each char
+		return mode
+		
+	@property
+	def name(self):
+		"""
+		Returns
+			Name portion of the path, effectively being the basename
+		"""
+		return os.path.basename(self.path)
+		
+	@property
+	def abspath(self):
+		"""
+		Returns
+			Absolute path to this index object in the file system ( as opposed to the 
+			.path field which is a path relative to the git repository ).
+			
+			The returned path will be native to the system and contains '\' on windows. 
+		"""
+		return join_path_native(self.repo.working_tree_dir, self.path)
+		
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..9a3c2c95 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,434 @@
 from git.utils import Iterable
 import git.diff as diff
 import git.stats as stats
+from git.actor import Actor
 from tree import Tree
+from git.odb import IStream
+from cStringIO import StringIO
 import base
 import utils
-import tempfile
+import time
 import os
 
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
-    """
-    Wraps a git Commit object.
-    
-    This class will act lazily on some of its attributes and will query the 
-    value on demand only if it involves calling the git binary.
-    """
-    
-    # object configuration 
-    type = "commit"
-    __slots__ = ("tree",
-                 "author", "authored_date", "author_tz_offset",
-                 "committer", "committed_date", "committer_tz_offset",
-                 "message", "parents")
-    _id_attribute_ = "sha"
-    
-    def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
-                 committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
-        """
-        Instantiate a new Commit. All keyword arguments taking None as default will 
-        be implicitly set if id names a valid sha. 
-        
-        The parameter documentation indicates the type of the argument after a colon ':'.
-
-        ``sha``
-            is the sha id of the commit or a ref
-
-        ``parents`` : tuple( Commit, ... )
-            is a tuple of commit ids or actual Commits
-
-        ``tree`` : Tree
-            is the corresponding tree id or an actual Tree
-
-        ``author`` : Actor
-            is the author string ( will be implicitly converted into an Actor object )
-
-        ``authored_date`` : int_seconds_since_epoch
-            is the authored DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``author_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``committer`` : Actor
-            is the committer string
-
-        ``committed_date`` : int_seconds_since_epoch
-            is the committed DateTime - use time.gmtime() to convert it into a 
-            different format
-
-        ``committer_tz_offset``: int_seconds_west_of_utc
-           is the timezone that the authored_date is in
-
-        ``message`` : string
-            is the commit message
-
-        Returns
-            git.Commit
-        """
-        super(Commit,self).__init__(repo, sha)
-        self._set_self_from_args_(locals())
-
-        if parents is not None:
-            self.parents = tuple( self.__class__(repo, p) for p in parents )
-        # END for each parent to convert
-            
-        if self.sha and tree is not None:
-            self.tree = Tree(repo, tree, path='')
-        # END id to tree conversion
-        
-    @classmethod
-    def _get_intermediate_items(cls, commit):
-        return commit.parents
-
-    def _set_cache_(self, attr):
-        """
-        Called by LazyMixin superclass when the given uninitialized member needs 
-        to be set.
-        We set all values at once.
-        """
-        if attr in Commit.__slots__:
-            # prepare our data lines to match rev-list
-            data_lines = self.data.splitlines()
-            data_lines.insert(0, "commit %s" % self.sha)
-            temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
-            self.parents = temp.parents
-            self.tree = temp.tree
-            self.author = temp.author
-            self.authored_date = temp.authored_date
-            self.author_tz_offset = temp.author_tz_offset
-            self.committer = temp.committer
-            self.committed_date = temp.committed_date
-            self.committer_tz_offset = temp.committer_tz_offset
-            self.message = temp.message
-        else:
-            super(Commit, self)._set_cache_(attr)
-
-    @property
-    def summary(self):
-        """
-        Returns
-            First line of the commit message.
-        """
-        return self.message.split('\n', 1)[0]
-        
-    def count(self, paths='', **kwargs):
-        """
-        Count the number of commits reachable from this commit
-
-        ``paths``
-            is an optinal path or a list of paths restricting the return value 
-            to commits actually containing the paths
-
-        ``kwargs``
-            Additional options to be passed to git-rev-list. They must not alter
-            the ouput style of the command, or parsing will yield incorrect results
-        Returns
-            int
-        """
-        # yes, it makes a difference whether empty paths are given or not in our case
-        # as the empty paths version will ignore merge commits for some reason.
-        if paths:
-            return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
-        else:
-            return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-        
-
-    @property
-    def name_rev(self):
-        """
-        Returns
-            String describing the commits hex sha based on the closest Reference.
-            Mostly useful for UI purposes
-        """
-        return self.repo.git.name_rev(self)
-
-    @classmethod
-    def iter_items(cls, repo, rev, paths='', **kwargs):
-        """
-        Find all commits matching the given criteria.
-
-        ``repo``
-            is the Repo
-
-        ``rev``
-            revision specifier, see git-rev-parse for viable options
-
-        ``paths``
-            is an optinal path or list of paths, if set only Commits that include the path 
-            or paths will be considered
-
-        ``kwargs``
-            optional keyword arguments to git rev-list where
-            ``max_count`` is the maximum number of commits to fetch
-            ``skip`` is the number of commits to skip
-            ``since`` all commits since i.e. '1970-01-01'
-
-        Returns
-            iterator yielding Commit items
-        """
-        options = {'pretty': 'raw', 'as_process' : True }
-        options.update(kwargs)
-        
-        args = list()
-        if paths:
-            args.extend(('--', paths))
-        # END if paths
-
-        proc = repo.git.rev_list(rev, args, **options)
-        return cls._iter_from_process_or_stream(repo, proc, True)
-        
-    def iter_parents(self, paths='', **kwargs):
-        """
-        Iterate _all_ parents of this commit.
-        
-        ``paths``
-            Optional path or list of paths limiting the Commits to those that 
-            contain at least one of the paths
-        
-        ``kwargs``
-            All arguments allowed by git-rev-list
-            
-        Return:
-            Iterator yielding Commit objects which are parents of self
-        """
-        # skip ourselves
-        skip = kwargs.get("skip", 1)
-        if skip == 0:   # skip ourselves 
-            skip = 1
-        kwargs['skip'] = skip
-        
-        return self.iter_items( self.repo, self, paths, **kwargs )
-
-    @property
-    def stats(self):
-        """
-        Create a git stat from changes between this commit and its first parent 
-        or from all changes done if this is the very first commit.
-        
-        Return
-            git.Stats
-        """
-        if not self.parents:
-            text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
-            text2 = ""
-            for line in text.splitlines()[1:]:
-                (insertions, deletions, filename) = line.split("\t")
-                text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
-            text = text2
-        else:
-            text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
-        return stats.Stats._list_from_string(self.repo, text)
-
-    @classmethod
-    def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
-        """
-        Parse out commit information into a list of Commit objects
-
-        ``repo``
-            is the Repo
-
-        ``proc``
-            git-rev-list process instance (raw format)
-
-        ``from_rev_list``
-            If True, the stream was created by rev-list in which case we parse 
-            the message differently
-        Returns
-            iterator returning Commit objects
-        """
-        stream = proc_or_stream
-        if not hasattr(stream,'next'):
-            stream = proc_or_stream.stdout
-            
-        for line in stream:
-            commit_tokens = line.split() 
-            id = commit_tokens[1]
-            assert commit_tokens[0] == "commit"
-            tree = stream.next().split()[1]
-
-            parents = []
-            next_line = None
-            for parent_line in stream:
-                if not parent_line.startswith('parent'):
-                    next_line = parent_line
-                    break
-                # END abort reading parents
-                parents.append(parent_line.split()[-1])
-            # END for each parent line
-            
-            author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
-            committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-            
-            # empty line
-            stream.next()
-            
-            message_lines = []
-            if from_rev_list:
-                for msg_line in stream:
-                    if not msg_line.startswith('    '):
-                        # and forget about this empty marker
-                        break
-                    # END abort message reading 
-                    # strip leading 4 spaces
-                    message_lines.append(msg_line[4:])
-                # END while there are message lines
-            else:
-                # a stream from our data simply gives us the plain message
-                for msg_line in stream:
-                    message_lines.append(msg_line)
-            # END message parsing
-            message = '\n'.join(message_lines)
-            
-            yield Commit(repo, id, parents=tuple(parents), tree=tree,
-                         author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
-                         committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
-                         message=message)
-        # END for each line in stream
-        
-        
-    @classmethod
-    def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
-        """
-        Commit the given tree, creating a commit object.
-        
-        ``repo``
-            is the Repo
-            
-        ``tree``
-            Sha of a tree or a tree object to become the tree of the new commit
-        
-        ``message``
-            Commit message. It may be an empty string if no message is provided.
-            It will be converted to a string in any case.
-            
-        ``parent_commits``
-            Optional Commit objects to use as parents for the new commit.
-            If empty list, the commit will have no parents at all and become 
-            a root commit.
-            If None , the current head commit will be the parent of the 
-            new commit object
-            
-        ``head``
-            If True, the HEAD will be advanced to the new commit automatically.
-            Else the HEAD will remain pointing on the previous commit. This could 
-            lead to undesired results when diffing files.
-            
-        Returns
-            Commit object representing the new commit
-            
-        Note:
-            Additional information about hte committer and Author are taken from the
-            environment or from the git configuration, see git-commit-tree for 
-            more information
-        """
-        parents = parent_commits
-        if parent_commits is None:
-            try:
-                parent_commits = [ repo.head.commit ]
-            except ValueError:
-                # empty repositories have no head commit
-                parent_commits = list()
-            # END handle parent commits
-        # END if parent commits are unset
-        
-        parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-        
-        # create message stream
-        tmp_file_path = tempfile.mktemp()
-        fp = open(tmp_file_path,"wb")
-        fp.write(str(message))
-        fp.close()
-        fp = open(tmp_file_path,"rb")
-        fp.seek(0)
-        
-        try:
-            # write the current index as tree
-            commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
-            new_commit = cls(repo, commit_sha)
-            
-            if head:
-                try:
-                    repo.head.commit = new_commit
-                except ValueError:
-                    # head is not yet set to the ref our HEAD points to.
-                    import git.refs
-                    master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
-                    repo.head.reference = master
-                # END handle empty repositories
-            # END advance head handling 
-            
-            return new_commit
-        finally:
-            fp.close()
-            os.remove(tmp_file_path)
-        
-    def __str__(self):
-        """ Convert commit to string which is SHA1 """
-        return self.sha
-
-    def __repr__(self):
-        return '<git.Commit "%s">' % self.sha
 
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
+	"""
+	Wraps a git Commit object.
+	
+	This class will act lazily on some of its attributes and will query the 
+	value on demand only if it involves calling the git binary.
+	"""
+	
+	# ENVIRONMENT VARIABLES
+	# read when creating new commits
+	env_author_name = "GIT_AUTHOR_NAME"
+	env_author_email = "GIT_AUTHOR_EMAIL"
+	env_author_date = "GIT_AUTHOR_DATE"
+	env_committer_name = "GIT_COMMITTER_NAME"
+	env_committer_email = "GIT_COMMITTER_EMAIL"
+	env_committer_date = "GIT_COMMITTER_DATE"
+	env_email = "EMAIL"
+	
+	# CONFIGURATION KEYS
+	conf_email = 'email'
+	conf_name = 'name'
+	conf_encoding = 'i18n.commitencoding'
+	
+	# INVARIANTS
+	default_encoding = "UTF-8"
+	
+	
+	# object configuration 
+	type = "commit"
+	__slots__ = ("tree",
+				 "author", "authored_date", "author_tz_offset",
+				 "committer", "committed_date", "committer_tz_offset",
+				 "message", "parents", "encoding")
+	_id_attribute_ = "sha"
+	
+	def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+				 committer=None, committed_date=None, committer_tz_offset=None, 
+				 message=None,  parents=None, encoding=None):
+		"""
+		Instantiate a new Commit. All keyword arguments taking None as default will 
+		be implicitly set if id names a valid sha. 
+		
+		The parameter documentation indicates the type of the argument after a colon ':'.
+
+		:param sha: is the sha id of the commit or a ref
+		:param parents: tuple( Commit, ... ) 
+			is a tuple of commit ids or actual Commits
+		:param tree: Tree
+			is the corresponding tree id or an actual Tree
+		:param author: Actor
+			is the author string ( will be implicitly converted into an Actor object )
+		:param authored_date: int_seconds_since_epoch
+			is the authored DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param author_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param committer: Actor
+			is the committer string
+		:param committed_date: int_seconds_since_epoch
+			is the committed DateTime - use time.gmtime() to convert it into a 
+			different format
+		:param committer_tz_offset: int_seconds_west_of_utc
+			is the timezone that the authored_date is in
+		:param message: string
+			is the commit message
+		:param encoding: string
+			encoding of the message, defaults to UTF-8
+		:return: git.Commit
+		
+		:note: Timezone information is in the same format and in the same sign 
+			as what time.altzone returns. The sign is inverted compared to git's 
+			UTC timezone.
+		"""
+		super(Commit,self).__init__(repo, sha)
+		self._set_self_from_args_(locals())
+		
+	@classmethod
+	def _get_intermediate_items(cls, commit):
+		return commit.parents
+
+	def _set_cache_(self, attr):
+		""" Called by LazyMixin superclass when the given uninitialized member needs 
+		to be set.
+		We set all values at once. """
+		if attr in Commit.__slots__:
+			# read the data in a chunk, its faster - then provide a file wrapper
+			# Could use self.data, but lets try to get it with less calls
+			hexsha, typename, size, data = self.repo.git.get_object_data(self)
+			self._deserialize(StringIO(data))
+		else:
+			super(Commit, self)._set_cache_(attr)
+
+	@property
+	def summary(self):
+		"""
+		Returns
+			First line of the commit message.
+		"""
+		return self.message.split('\n', 1)[0]
+		
+	def count(self, paths='', **kwargs):
+		"""
+		Count the number of commits reachable from this commit
+
+		``paths``
+			is an optinal path or a list of paths restricting the return value 
+			to commits actually containing the paths
+
+		``kwargs``
+			Additional options to be passed to git-rev-list. They must not alter
+			the ouput style of the command, or parsing will yield incorrect results
+		Returns
+			int
+		"""
+		# yes, it makes a difference whether empty paths are given or not in our case
+		# as the empty paths version will ignore merge commits for some reason.
+		if paths:
+			return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+		else:
+			return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+		
+
+	@property
+	def name_rev(self):
+		"""
+		Returns
+			String describing the commits hex sha based on the closest Reference.
+			Mostly useful for UI purposes
+		"""
+		return self.repo.git.name_rev(self)
+
+	@classmethod
+	def iter_items(cls, repo, rev, paths='', **kwargs):
+		"""
+		Find all commits matching the given criteria.
+
+		``repo``
+			is the Repo
+
+		``rev``
+			revision specifier, see git-rev-parse for viable options
+
+		``paths``
+			is an optinal path or list of paths, if set only Commits that include the path 
+			or paths will be considered
+
+		``kwargs``
+			optional keyword arguments to git rev-list where
+			``max_count`` is the maximum number of commits to fetch
+			``skip`` is the number of commits to skip
+			``since`` all commits since i.e. '1970-01-01'
+
+		Returns
+			iterator yielding Commit items
+		"""
+		if 'pretty' in kwargs:
+			raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+		# END handle pretty
+		args = list()
+		if paths:
+			args.extend(('--', paths))
+		# END if paths
+
+		proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+		return cls._iter_from_process_or_stream(repo, proc)
+		
+	def iter_parents(self, paths='', **kwargs):
+		"""
+		Iterate _all_ parents of this commit.
+		
+		``paths``
+			Optional path or list of paths limiting the Commits to those that 
+			contain at least one of the paths
+		
+		``kwargs``
+			All arguments allowed by git-rev-list
+			
+		Return:
+			Iterator yielding Commit objects which are parents of self
+		"""
+		# skip ourselves
+		skip = kwargs.get("skip", 1)
+		if skip == 0:	# skip ourselves 
+			skip = 1
+		kwargs['skip'] = skip
+		
+		return self.iter_items( self.repo, self, paths, **kwargs )
+
+	@property
+	def stats(self):
+		"""
+		Create a git stat from changes between this commit and its first parent 
+		or from all changes done if this is the very first commit.
+		
+		Return
+			git.Stats
+		"""
+		if not self.parents:
+			text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+			text2 = ""
+			for line in text.splitlines()[1:]:
+				(insertions, deletions, filename) = line.split("\t")
+				text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+			text = text2
+		else:
+			text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+		return stats.Stats._list_from_string(self.repo, text)
+
+	@classmethod
+	def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+		"""Parse out commit information into a list of Commit objects
+		We expect one-line per commit, and parse the actual commit information directly
+		from our lighting fast object database
+
+		:param proc: git-rev-list process instance - one sha per line
+		:return: iterator returning Commit objects"""
+		stream = proc_or_stream
+		if not hasattr(stream,'readline'):
+			stream = proc_or_stream.stdout
+			
+		readline = stream.readline
+		while True:
+			line = readline()
+			if not line:
+				break
+			sha = line.strip()
+			if len(sha) > 40:
+				# split additional information, as returned by bisect for instance
+				sha, rest = line.split(None, 1)
+			# END handle extra info
+			
+			assert len(sha) == 40, "Invalid line: %s" % sha
+			yield Commit(repo, sha)
+		# END for each line in stream
+		
+		
+	@classmethod
+	def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+		"""Commit the given tree, creating a commit object.
+		
+		:param repo: Repo object the commit should be part of 
+		:param tree: Sha of a tree or a tree object to become the tree of the new commit
+		:param message: Commit message. It may be an empty string if no message is provided.
+			It will be converted to a string in any case.
+		:param parent_commits:
+			Optional Commit objects to use as parents for the new commit.
+			If empty list, the commit will have no parents at all and become 
+			a root commit.
+			If None , the current head commit will be the parent of the 
+			new commit object
+		:param head:
+			If True, the HEAD will be advanced to the new commit automatically.
+			Else the HEAD will remain pointing on the previous commit. This could 
+			lead to undesired results when diffing files.
+			
+		:return: Commit object representing the new commit
+			
+		:note:
+			Additional information about the committer and Author are taken from the
+			environment or from the git configuration, see git-commit-tree for 
+			more information
+		"""
+		parents = parent_commits
+		if parent_commits is None:
+			try:
+				parent_commits = [ repo.head.commit ]
+			except ValueError:
+				# empty repositories have no head commit
+				parent_commits = list()
+			# END handle parent commits
+		# END if parent commits are unset
+		
+		# retrieve all additional information, create a commit object, and 
+		# serialize it
+		# Generally: 
+		# * Environment variables override configuration values
+		# * Sensible defaults are set according to the git documentation
+		
+		# COMMITER AND AUTHOR INFO
+		cr = repo.config_reader()
+		env = os.environ
+		default_email = utils.get_user_id()
+		default_name = default_email.split('@')[0]
+		
+		conf_name = cr.get_value('user', cls.conf_name, default_name)
+		conf_email = cr.get_value('user', cls.conf_email, default_email)
+		
+		author_name = env.get(cls.env_author_name, conf_name)
+		author_email = env.get(cls.env_author_email, default_email)
+		
+		committer_name = env.get(cls.env_committer_name, conf_name)
+		committer_email = env.get(cls.env_committer_email, conf_email)
+		
+		# PARSE THE DATES
+		unix_time = int(time.time())
+		offset = time.altzone
+		
+		author_date_str = env.get(cls.env_author_date, '')
+		if author_date_str:
+			author_time, author_offset = utils.parse_date(author_date_str)
+		else:
+			author_time, author_offset = unix_time, offset
+		# END set author time
+		
+		committer_date_str = env.get(cls.env_committer_date, '')
+		if committer_date_str: 
+			committer_time, committer_offset = utils.parse_date(committer_date_str)
+		else:
+			committer_time, committer_offset = unix_time, offset
+		# END set committer time
+		
+		# assume utf8 encoding
+		enc_section, enc_option = cls.conf_encoding.split('.')
+		conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
+		
+		author = Actor(author_name, author_email)
+		committer = Actor(committer_name, committer_email)
+		
+		
+		# CREATE NEW COMMIT
+		new_commit = cls(repo, cls.NULL_HEX_SHA, tree, 
+						author, author_time, author_offset, 
+						committer, committer_time, committer_offset,
+						message, parent_commits, conf_encoding)
+		
+		stream = StringIO()
+		new_commit._serialize(stream)
+		streamlen = stream.tell()
+		stream.seek(0)
+		
+		istream = repo.odb.store(IStream(cls.type, streamlen, stream))
+		new_commit.sha = istream.sha
+		
+		if head:
+			try:
+				repo.head.commit = new_commit
+			except ValueError:
+				# head is not yet set to the ref our HEAD points to
+				# Happens on first commit
+				import git.refs
+				master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+				repo.head.reference = master
+			# END handle empty repositories
+		# END advance head handling 
+		
+		return new_commit
+	
+		
+	def __str__(self):
+		""" Convert commit to string which is SHA1 """
+		return self.sha
+
+	def __repr__(self):
+		return '<git.Commit "%s">' % self.sha
+
+	#{ Serializable Implementation
+	
+	def _serialize(self, stream):
+		write = stream.write
+		write("tree %s\n" % self.tree)
+		for p in self.parents:
+			write("parent %s\n" % p)
+			
+		a = self.author
+		c = self.committer
+		fmt = "%s %s <%s> %s %s\n"
+		write(fmt % ("author", a.name, a.email, 
+						self.authored_date, 
+						utils.altz_to_utctz_str(self.author_tz_offset)))
+			
+		write(fmt % ("committer", c.name, c.email, 
+						self.committed_date,
+						utils.altz_to_utctz_str(self.committer_tz_offset)))
+		
+		if self.encoding != self.default_encoding:
+			write("encoding %s\n" % self.encoding)
+		
+		write("\n")
+		write(self.message)
+		return self
+	
+	def _deserialize(self, stream):
+		""":param from_rev_list: if true, the stream format is coming from the rev-list command
+		Otherwise it is assumed to be a plain data stream from our object"""
+		readline = stream.readline
+		self.tree = Tree(self.repo, readline().split()[1], 0, '')
+
+		self.parents = list()
+		next_line = None
+		while True:
+			parent_line = readline()
+			if not parent_line.startswith('parent'):
+				next_line = parent_line
+				break
+			# END abort reading parents
+			self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+		# END for each parent line
+		self.parents = tuple(self.parents)
+		
+		self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+		self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+		
+		
+		# now we can have the encoding line, or an empty line followed by the optional
+		# message.
+		self.encoding = self.default_encoding
+		# read encoding or empty line to separate message
+		enc = readline()
+		enc = enc.strip()
+		if enc:
+			self.encoding = enc[enc.find(' ')+1:]
+			# now comes the message separator 
+			readline()
+		# END handle encoding
+		
+		# a stream from our data simply gives us the plain message
+		# The end of our message stream is marked with a newline that we strip
+		self.message = stream.read()
+		return self
+		
+	#} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
                            visit_once = False, ignore_self=1 ):
         """For documentation, see utils.Traversable.traverse
         
-        Trees are set to visist_once = False to gain more performance in the traversal"""
+        Trees are set to visit_once = False to gain more performance in the traversal"""
         return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
 
     # List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,302 @@ Module for general utility functions
 import re
 from collections import deque as Deque
 from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date', 
+			'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 
+			'verify_utctz')
 
 def get_object_type_by_name(object_type_name):
-    """
-    Returns
-        type suitable to handle the given object type name.
-        Use the type to create new instances.
-        
-    ``object_type_name``
-        Member of TYPES
-        
-    Raises
-        ValueError: In case object_type_name is unknown
-    """
-    if object_type_name == "commit":
-        import commit
-        return commit.Commit
-    elif object_type_name == "tag":
-        import tag
-        return tag.TagObject
-    elif object_type_name == "blob":
-        import blob
-        return blob.Blob
-    elif object_type_name == "tree":
-        import tree
-        return tree.Tree
-    else:
-        raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-        
-    
+	"""
+	Returns
+		type suitable to handle the given object type name.
+		Use the type to create new instances.
+		
+	``object_type_name``
+		Member of TYPES
+		
+	Raises
+		ValueError: In case object_type_name is unknown
+	"""
+	if object_type_name == "commit":
+		import commit
+		return commit.Commit
+	elif object_type_name == "tag":
+		import tag
+		return tag.TagObject
+	elif object_type_name == "blob":
+		import blob
+		return blob.Blob
+	elif object_type_name == "tree":
+		import tree
+		return tree.Tree
+	else:
+		raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+		
+
+def get_user_id():
+	""":return: string identifying the currently active system user as name@node
+	:note: user can be set with the 'USER' environment variable, usually set on windows"""
+	ukn = 'UNKNOWN'
+	username = os.environ.get('USER', ukn)
+	if username == ukn and hasattr(os, 'getlogin'):
+		username = os.getlogin()
+	# END get username from login
+	return "%s@%s" % (username, platform.node())
+		
+
+def utctz_to_altz(utctz):
+	"""we convert utctz to the timezone in seconds, it is the format time.altzone
+	returns. Git stores it as UTC timezon which has the opposite sign as well, 
+	which explains the -1 * ( that was made explicit here )
+	:param utctz: git utc timezone string, i.e. +0200"""
+	return -1 * int(float(utctz)/100*3600)
+	
+def altz_to_utctz_str(altz):
+	"""As above, but inverses the operation, returning a string that can be used
+	in commit objects"""
+	utci = -1 * int((altz / 3600)*100)
+	utcs = str(abs(utci))
+	utcs = "0"*(4-len(utcs)) + utcs
+	prefix = (utci < 0 and '-') or '+'
+	return prefix + utcs
+	
+
+def verify_utctz(offset):
+	""":raise ValueError: if offset is incorrect
+	:return: offset"""
+	fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+	if len(offset) != 5:
+		raise fmt_exc
+	if offset[0] not in "+-":
+		raise fmt_exc
+	if	offset[1] not in digits or \
+		offset[2] not in digits or \
+		offset[3] not in digits or \
+		offset[4] not in digits:
+		raise fmt_exc
+	# END for each char
+	return offset
+
+def parse_date(string_date):
+	"""
+	Parse the given date as one of the following
+		* Git internal format: timestamp offset
+		* RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. 
+		* ISO 8601 2005-04-07T22:13:13
+		 The T can be a space as well
+		 
+	:return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+	:raise ValueError: If the format could not be understood
+	:note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY 
+	"""
+	# git time
+	try:
+		if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+			timestamp, offset = string_date.split()
+			timestamp = int(timestamp)
+			return timestamp, utctz_to_altz(verify_utctz(offset))
+		else:
+			offset = "+0000"					# local time by default
+			if string_date[-5] in '-+':
+				offset = verify_utctz(string_date[-5:])
+				string_date = string_date[:-6]	# skip space as well
+			# END split timezone info
+			
+			# now figure out the date and time portion - split time
+			date_formats = list()
+			splitter = -1
+			if ',' in string_date:
+				date_formats.append("%a, %d %b %Y")
+				splitter = string_date.rfind(' ')
+			else:
+				# iso plus additional
+				date_formats.append("%Y-%m-%d")
+				date_formats.append("%Y.%m.%d")
+				date_formats.append("%m/%d/%Y")
+				date_formats.append("%d.%m.%Y")
+				
+				splitter = string_date.rfind('T')
+				if splitter == -1:
+					splitter = string_date.rfind(' ')
+				# END handle 'T' and ' '
+			# END handle rfc or iso 
+			
+			assert splitter > -1
+			
+			# split date and time
+			time_part = string_date[splitter+1:]	# skip space
+			date_part = string_date[:splitter]
+			
+			# parse time
+			tstruct = time.strptime(time_part, "%H:%M:%S")
+			
+			for fmt in date_formats:
+				try:
+					dtstruct = time.strptime(date_part, fmt)
+					fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, 
+												tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+												dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+					return int(time.mktime(fstruct)), utctz_to_altz(offset)
+				except ValueError:
+					continue
+				# END exception handling
+			# END for each fmt
+			
+			# still here ? fail
+			raise ValueError("no format matched")
+		# END handle format
+	except Exception:
+		raise ValueError("Unsupported date format: %s" % string_date)  
+	# END handle exceptions
+
+	
 # precompiled regex
 _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
 
 def parse_actor_and_date(line):
-    """
-    Parse out the actor (author or committer) info from a line like::
-    
-     author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-    
-    Returns
-        [Actor, int_seconds_since_epoch, int_timezone_offset]
-    """
-    m = _re_actor_epoch.search(line)
-    actor, epoch, offset = m.groups()
-    return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-    
-    
-    
+	"""
+	Parse out the actor (author or committer) info from a line like::
+	
+	 author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+	
+	Returns
+		[Actor, int_seconds_since_epoch, int_timezone_offset]
+	"""
+	m = _re_actor_epoch.search(line)
+	actor, epoch, offset = m.groups()
+	return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
+	
+	
+	
 class ProcessStreamAdapter(object):
-    """
-    Class wireing all calls to the contained Process instance.
-    
-    Use this type to hide the underlying process to provide access only to a specified 
-    stream. The process is usually wrapped into an AutoInterrupt class to kill 
-    it if the instance goes out of scope.
-    """
-    __slots__ = ("_proc", "_stream")
-    def __init__(self, process, stream_name):
-        self._proc = process
-        self._stream = getattr(process, stream_name)
-    
-    def __getattr__(self, attr):
-        return getattr(self._stream, attr)
-        
-        
+	"""
+	Class wireing all calls to the contained Process instance.
+	
+	Use this type to hide the underlying process to provide access only to a specified 
+	stream. The process is usually wrapped into an AutoInterrupt class to kill 
+	it if the instance goes out of scope.
+	"""
+	__slots__ = ("_proc", "_stream")
+	def __init__(self, process, stream_name):
+		self._proc = process
+		self._stream = getattr(process, stream_name)
+	
+	def __getattr__(self, attr):
+		return getattr(self._stream, attr)
+		
+		
 class Traversable(object):
-    """Simple interface to perforam depth-first or breadth-first traversals 
-    into one direction.
-    Subclasses only need to implement one function.
-    Instances of the Subclass must be hashable"""
-    __slots__ = tuple()
-    
-    @classmethod
-    def _get_intermediate_items(cls, item):
-        """
-        Returns:
-            List of items connected to the given item.
-            Must be implemented in subclass
-        """
-        raise NotImplementedError("To be implemented in subclass")
-            
-    
-    def traverse( self, predicate = lambda i,d: True,
-                           prune = lambda i,d: False, depth = -1, branch_first=True,
-                           visit_once = True, ignore_self=1, as_edge = False ):
-        """
-        ``Returns``
-            iterator yieling of items found when traversing self
-            
-        ``predicate``
-            f(i,d) returns False if item i at depth d should not be included in the result
-            
-        ``prune``
-            f(i,d) return True if the search should stop at item i at depth d.
-            Item i will not be returned.
-            
-        ``depth``
-            define at which level the iteration should not go deeper
-            if -1, there is no limit
-            if 0, you would effectively only get self, the root of the iteration
-            i.e. if 1, you would only get the first level of predessessors/successors
-            
-        ``branch_first``
-            if True, items will be returned branch first, otherwise depth first
-            
-        ``visit_once``
-            if True, items will only be returned once, although they might be encountered
-            several times. Loops are prevented that way.
-        
-        ``ignore_self``
-            if True, self will be ignored and automatically pruned from
-            the result. Otherwise it will be the first item to be returned.
-            If as_edge is True, the source of the first edge is None
-            
-        ``as_edge``
-            if True, return a pair of items, first being the source, second the 
-            destinatination, i.e. tuple(src, dest) with the edge spanning from 
-            source to destination"""
-        visited = set()
-        stack = Deque()
-        stack.append( ( 0 ,self, None ) )       # self is always depth level 0
-    
-        def addToStack( stack, item, branch_first, depth ):
-            lst = self._get_intermediate_items( item )
-            if not lst:
-                return
-            if branch_first:
-                stack.extendleft( ( depth , i, item ) for i in lst )
-            else:
-                reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
-                stack.extend( reviter )
-        # END addToStack local method
-    
-        while stack:
-            d, item, src = stack.pop()          # depth of item, item, item_source
-            
-            if visit_once and item in visited:
-                continue
-                
-            if visit_once:
-                visited.add(item)
-            
-            rval = ( as_edge and (src, item) ) or item
-            if prune( rval, d ):
-                continue
-    
-            skipStartItem = ignore_self and ( item == self )
-            if not skipStartItem and predicate( rval, d ):
-                yield rval
-    
-            # only continue to next level if this is appropriate !
-            nd = d + 1
-            if depth > -1 and nd > depth:
-                continue
-    
-            addToStack( stack, item, branch_first, nd )
-        # END for each item on work stack
+	"""Simple interface to perforam depth-first or breadth-first traversals 
+	into one direction.
+	Subclasses only need to implement one function.
+	Instances of the Subclass must be hashable"""
+	__slots__ = tuple()
+	
+	@classmethod
+	def _get_intermediate_items(cls, item):
+		"""
+		Returns:
+			List of items connected to the given item.
+			Must be implemented in subclass
+		"""
+		raise NotImplementedError("To be implemented in subclass")
+			
+	
+	def traverse( self, predicate = lambda i,d: True,
+						   prune = lambda i,d: False, depth = -1, branch_first=True,
+						   visit_once = True, ignore_self=1, as_edge = False ):
+		"""
+		``Returns``
+			iterator yieling of items found when traversing self
+			
+		``predicate``
+			f(i,d) returns False if item i at depth d should not be included in the result
+			
+		``prune``
+			f(i,d) return True if the search should stop at item i at depth d.
+			Item i will not be returned.
+			
+		``depth``
+			define at which level the iteration should not go deeper
+			if -1, there is no limit
+			if 0, you would effectively only get self, the root of the iteration
+			i.e. if 1, you would only get the first level of predessessors/successors
+			
+		``branch_first``
+			if True, items will be returned branch first, otherwise depth first
+			
+		``visit_once``
+			if True, items will only be returned once, although they might be encountered
+			several times. Loops are prevented that way.
+		
+		``ignore_self``
+			if True, self will be ignored and automatically pruned from
+			the result. Otherwise it will be the first item to be returned.
+			If as_edge is True, the source of the first edge is None
+			
+		``as_edge``
+			if True, return a pair of items, first being the source, second the 
+			destinatination, i.e. tuple(src, dest) with the edge spanning from 
+			source to destination"""
+		visited = set()
+		stack = Deque()
+		stack.append( ( 0 ,self, None ) )		# self is always depth level 0
+	
+		def addToStack( stack, item, branch_first, depth ):
+			lst = self._get_intermediate_items( item )
+			if not lst:
+				return
+			if branch_first:
+				stack.extendleft( ( depth , i, item ) for i in lst )
+			else:
+				reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+				stack.extend( reviter )
+		# END addToStack local method
+	
+		while stack:
+			d, item, src = stack.pop()			# depth of item, item, item_source
+			
+			if visit_once and item in visited:
+				continue
+				
+			if visit_once:
+				visited.add(item)
+			
+			rval = ( as_edge and (src, item) ) or item
+			if prune( rval, d ):
+				continue
+	
+			skipStartItem = ignore_self and ( item == self )
+			if not skipStartItem and predicate( rval, d ):
+				yield rval
+	
+			# only continue to next level if this is appropriate !
+			nd = d + 1
+			if depth > -1 and nd > depth:
+				continue
+	
+			addToStack( stack, item, branch_first, nd )
+		# END for each item on work stack
+		
+
+class Serializable(object):
+	"""Defines methods to serialize and deserialize objects from and into a data stream"""
+	
+	def _serialize(self, stream):
+		"""Serialize the data of this object into the given data stream
+		:note: a serialized object would ``_deserialize`` into the same objet
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def _deserialize(self, stream):
+		"""Deserialize all information regarding this object from the stream
+		:param stream: a file-like object
+		:return: self"""
+		raise NotImplementedError("To be implemented in subclass")
diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py
new file mode 100644
index 00000000..5789d7eb
--- /dev/null
+++ b/lib/git/odb/__init__.py
@@ -0,0 +1,6 @@
+"""Initialize the object database module"""
+
+# default imports
+from db import *
+from stream import *
+
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
new file mode 100644
index 00000000..a8de28ec
--- /dev/null
+++ b/lib/git/odb/db.py
@@ -0,0 +1,337 @@
+"""Contains implementations of database retrieveing objects"""
+from git.utils import IndexFileSHA1Writer
+from git.errors import (
+	InvalidDBRoot, 
+	BadObject, 
+	BadObjectType
+	)
+
+from stream import (
+		DecompressMemMapReader,
+		FDCompressedSha1Writer,
+		Sha1Writer,
+		OStream,
+		OInfo
+	)
+
+from utils import (
+		ENOENT,
+		to_hex_sha,
+		exists,
+		hex_to_bin,
+		isdir,
+		mkdir,
+		rename,
+		dirname,
+		join
+	)
+
+from fun import ( 
+	chunk_size,
+	loose_object_header_info, 
+	write_object
+	)
+
+import tempfile
+import mmap
+import os
+
+
+__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB', 
+			'CompoundDB', 'ReferenceDB', 'GitObjectDB' )
+
+class ObjectDBR(object):
+	"""Defines an interface for object database lookup.
+	Objects are identified either by hex-sha (40 bytes) or 
+	by sha (20 bytes)"""
+	
+	def __contains__(self, sha):
+		return self.has_obj
+	
+	#{ Query Interface 
+	def has_object(self, sha):
+		"""
+		:return: True if the object identified by the given 40 byte hexsha or 20 bytes
+			binary sha is contained in the database
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def info(self, sha):
+		""" :return: OInfo instance
+		:param sha: 40 bytes hexsha or 20 bytes binary sha
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def info_async(self, input_channel):
+		"""Retrieve information of a multitude of objects asynchronously
+		:param input_channel: Channel yielding the sha's of the objects of interest
+		:return: Channel yielding OInfo|InvalidOInfo, in any order"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream(self, sha):
+		""":return: OStream instance
+		:param sha: 40 bytes hexsha or 20 bytes binary sha
+		:raise BadObject:"""
+		raise NotImplementedError("To be implemented in subclass")
+		
+	def stream_async(self, input_channel):
+		"""Retrieve the OStream of multiple objects
+		:param input_channel: see ``info``
+		:param max_threads: see ``ObjectDBW.store``
+		:return: Channel yielding OStream|InvalidOStream instances in any order"""
+		raise NotImplementedError("To be implemented in subclass")
+			
+	#} END query interface
+	
+class ObjectDBW(object):
+	"""Defines an interface to create objects in the database"""
+	
+	def __init__(self, *args, **kwargs):
+		self._ostream = None
+	
+	#{ Edit Interface
+	def set_ostream(self, stream):
+		"""Adjusts the stream to which all data should be sent when storing new objects
+		:param stream: if not None, the stream to use, if None the default stream
+			will be used.
+		:return: previously installed stream, or None if there was no override
+		:raise TypeError: if the stream doesn't have the supported functionality"""
+		cstream = self._ostream
+		self._ostream = stream
+		return cstream
+		
+	def ostream(self):
+		""":return: overridden output stream this instance will write to, or None
+			if it will write to the default stream"""
+		return self._ostream
+	
+	def store(self, istream):
+		"""Create a new object in the database
+		:return: the input istream object with its sha set to its corresponding value
+		:param istream: IStream compatible instance. If its sha is already set 
+			to a value, the object will just be stored in the our database format, 
+			in which case the input stream is expected to be in object format ( header + contents ).
+		:raise IOError: if data could not be written"""
+		raise NotImplementedError("To be implemented in subclass")
+	
+	def store_async(self, input_channel):
+		"""Create multiple new objects in the database asynchronously. The method will 
+		return right away, returning an output channel which receives the results as 
+		they are computed.
+		
+		:return: Channel yielding your IStream which served as input, in any order.
+			The IStreams sha will be set to the sha it received during the process, 
+			or its error attribute will be set to the exception informing about the error.
+		:param input_channel: Channel yielding IStream instance.
+			As the same instances will be used in the output channel, you can create a map
+			between the id(istream) -> istream
+		:note:As some ODB implementations implement this operation as atomic, they might 
+			abort the whole operation if one item could not be processed. Hence check how 
+			many items have actually been produced."""
+		raise NotImplementedError("To be implemented in subclass")
+	
+	#} END edit interface
+	
+
+class FileDBBase(object):
+	"""Provides basic facilities to retrieve files of interest, including 
+	caching facilities to help mapping hexsha's to objects"""
+	
+	def __init__(self, root_path):
+		"""Initialize this instance to look for its files at the given root path
+		All subsequent operations will be relative to this path
+		:raise InvalidDBRoot: 
+		:note: The base will perform basic checking for accessability, but the subclass
+			is required to verify that the root_path contains the database structure it needs"""
+		super(FileDBBase, self).__init__()
+		if not os.path.isdir(root_path):
+			raise InvalidDBRoot(root_path)
+		self._root_path = root_path
+		
+		
+	#{ Interface 
+	def root_path(self):
+		""":return: path at which this db operates"""
+		return self._root_path
+	
+	def db_path(self, rela_path):
+		"""
+		:return: the given relative path relative to our database root, allowing 
+			to pontentially access datafiles"""
+		return join(self._root_path, rela_path)
+	#} END interface
+		
+	
+	
+class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
+	"""A database which operates on loose object files"""
+	
+	# CONFIGURATION
+	# chunks in which data will be copied between streams
+	stream_chunk_size = chunk_size
+	
+	
+	def __init__(self, root_path):
+		super(LooseObjectDB, self).__init__(root_path)
+		self._hexsha_to_file = dict()
+		# Additional Flags - might be set to 0 after the first failure
+		# Depending on the root, this might work for some mounts, for others not, which
+		# is why it is per instance
+		self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+	
+	#{ Interface 
+	def object_path(self, hexsha):
+		"""
+		:return: path at which the object with the given hexsha would be stored, 
+			relative to the database root"""
+		return join(hexsha[:2], hexsha[2:])
+	
+	def readable_db_object_path(self, hexsha):
+		"""
+		:return: readable object path to the object identified by hexsha
+		:raise BadObject: If the object file does not exist"""
+		try:
+			return self._hexsha_to_file[hexsha]
+		except KeyError:
+			pass
+		# END ignore cache misses 
+			
+		# try filesystem
+		path = self.db_path(self.object_path(hexsha))
+		if exists(path):
+			self._hexsha_to_file[hexsha] = path
+			return path
+		# END handle cache
+		raise BadObject(hexsha)
+		
+	#} END interface
+	
+	def _map_loose_object(self, sha):
+		"""
+		:return: memory map of that file to allow random read access
+		:raise BadObject: if object could not be located"""
+		db_path = self.db_path(self.object_path(to_hex_sha(sha)))
+		try:
+			fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags)
+		except OSError,e:
+			if e.errno != ENOENT:
+				# try again without noatime
+				try:
+					fd = os.open(db_path, os.O_RDONLY)
+				except OSError:
+					raise BadObject(to_hex_sha(sha))
+				# didn't work because of our flag, don't try it again
+				self._fd_open_flags = 0
+			else:
+				raise BadObject(to_hex_sha(sha))
+			# END handle error
+		# END exception handling
+		try:
+			return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+		finally:
+			os.close(fd)
+		# END assure file is closed
+		
+	def set_ostream(self, stream):
+		""":raise TypeError: if the stream does not support the Sha1Writer interface"""
+		if stream is not None and not isinstance(stream, Sha1Writer):
+			raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+		return super(LooseObjectDB, self).set_ostream(stream)
+			
+	def info(self, sha):
+		m = self._map_loose_object(sha)
+		try:
+			type, size = loose_object_header_info(m)
+			return OInfo(sha, type, size)
+		finally:
+			m.close()
+		# END assure release of system resources
+		
+	def stream(self, sha):
+		m = self._map_loose_object(sha)
+		type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+		return OStream(sha, type, size, stream)
+		
+	def has_object(self, sha):
+		try:
+			self.readable_db_object_path(to_hex_sha(sha))
+			return True
+		except BadObject:
+			return False
+		# END check existance
+	
+	def store(self, istream):
+		"""note: The sha we produce will be hex by nature"""
+		assert istream.sha is None, "Direct istream writing not yet implemented"
+		tmp_path = None
+		writer = self.ostream()
+		if writer is None:
+			# open a tmp file to write the data to
+			fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+			writer = FDCompressedSha1Writer(fd)
+		# END handle custom writer
+	
+		try:
+			try:
+				write_object(istream.type, istream.size, istream.read, writer.write,
+								chunk_size=self.stream_chunk_size)
+			except:
+				if tmp_path:
+					os.remove(tmp_path)
+				raise
+			# END assure tmpfile removal on error
+		finally:
+			if tmp_path:
+				writer.close()
+		# END assure target stream is closed
+		
+		sha = writer.sha(as_hex=True)
+		
+		if tmp_path:
+			obj_path = self.db_path(self.object_path(sha))
+			obj_dir = dirname(obj_path)
+			if not isdir(obj_dir):
+				mkdir(obj_dir)
+			# END handle destination directory
+			rename(tmp_path, obj_path)
+		# END handle dry_run
+		
+		istream.sha = sha
+		return istream
+	
+	
+class PackedDB(FileDBBase, ObjectDBR):
+	"""A database operating on a set of object packs"""
+	
+	
+class CompoundDB(ObjectDBR):
+	"""A database which delegates calls to sub-databases"""
+	
+
+class ReferenceDB(CompoundDB):
+	"""A database consisting of database referred to in a file"""
+	
+	
+#class GitObjectDB(CompoundDB, ObjectDBW):
+class GitObjectDB(LooseObjectDB):
+	"""A database representing the default git object store, which includes loose 
+	objects, pack files and an alternates file
+	
+	It will create objects only in the loose object database.
+	:note: for now, we use the git command to do all the lookup, just until he 
+		have packs and the other implementations
+	"""
+	def __init__(self, root_path, git):
+		"""Initialize this instance with the root and a git command"""
+		super(GitObjectDB, self).__init__(root_path)
+		self._git = git
+		
+	def info(self, sha):
+		t = self._git.get_object_header(sha)
+		return OInfo(t[0], t[1], t[2])
+		
+	def stream(self, sha):
+		"""For now, all lookup is done by git itself"""
+		t = self._git.stream_object_data(sha)
+		return OStream(t[0], t[1], t[2], t[3])
+	
diff --git a/lib/git/odb/fun.py b/lib/git/odb/fun.py
new file mode 100644
index 00000000..870a6f02
--- /dev/null
+++ b/lib/git/odb/fun.py
@@ -0,0 +1,108 @@
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+from git.errors import (
+	BadObjectType
+	)
+
+import zlib
+decompressobj = zlib.decompressobj
+
+
+# INVARIANTS
+type_id_to_type_map = 	{
+							1 : "commit",
+							2 : "tree",
+							3 : "blob",
+							4 : "tag"
+						}
+
+# used when dealing with larger streams
+chunk_size = 1000*1000
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'object_header_info', 
+			'write_object' )
+
+#{ Routines
+
+def is_loose_object(m):
+	""":return: True the file contained in memory map m appears to be a loose object.
+	Only the first two bytes are needed"""
+	b0, b1 = map(ord, m[:2])
+	word = (b0 << 8) + b1
+	return b0 == 0x78 and (word % 31) == 0
+
+def loose_object_header_info(m):
+	""":return: tuple(type_string, uncompressed_size_in_bytes) the type string of the 
+		object as well as its uncompressed size in bytes.
+	:param m: memory map from which to read the compressed object data"""
+	decompress_size = 8192		# is used in cgit as well
+	hdr = decompressobj().decompress(m, decompress_size)
+	type_name, size = hdr[:hdr.find("\0")].split(" ")
+	return type_name, int(size)
+	
+def object_header_info(m):
+	""":return: tuple(type_string, uncompressed_size_in_bytes 
+	:param mmap: mapped memory map. It will be 
+		seeked to the actual start of the object contents, which can be used
+		to initialize a zlib decompress object.
+	:note: This routine can only handle new-style objects which are assumably contained
+		in packs
+		"""
+	assert not is_loose_object(m), "Use loose_object_header_info instead"
+	
+	c = b0							# first byte
+	i = 1							# next char to read
+	type_id = (c >> 4) & 7			# numeric type
+	size = c & 15					# starting size
+	s = 4							# starting bit-shift size
+	while c & 0x80:
+		c = ord(m[i])
+		i += 1
+		size += (c & 0x7f) << s
+		s += 7
+	# END character loop
+	
+	# finally seek the map to the start of the data stream
+	m.seek(i)
+	try:
+		return (type_id_to_type_map[type_id], size)
+	except KeyError:
+		# invalid object type - we could try to be smart now and decode part 
+		# of the stream to get the info, problem is that we had trouble finding 
+		# the exact start of the content stream
+		raise BadObjectType(type_id)
+	# END handle exceptions
+	
+def write_object(type, size, read, write, chunk_size=chunk_size):
+	"""Write the object as identified by type, size and source_stream into the 
+	target_stream
+	
+	:param type: type string of the object
+	:param size: amount of bytes to write from source_stream
+	:param read: read method of a stream providing the content data
+	:param write: write method of the output stream
+	:param close_target_stream: if True, the target stream will be closed when 
+		the routine exits, even if an error is thrown
+	:return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+	tbw = 0												# total num bytes written
+	dbw = 0												# num data bytes written
+	
+	# WRITE HEADER: type SP size NULL
+	tbw += write("%s %i\0" % (type, size))
+
+	# WRITE ALL DATA UP TO SIZE
+	while True:
+		cs = min(chunk_size, size-dbw)
+		data_len = write(read(cs))
+		dbw += data_len
+		if data_len < cs or dbw == size:
+			tbw += dbw
+			break
+		# END check for stream end
+	# END duplicate data
+	return tbw
+
+	
+#} END routines
diff --git a/lib/git/odb/stream.py b/lib/git/odb/stream.py
new file mode 100644
index 00000000..d1181382
--- /dev/null
+++ b/lib/git/odb/stream.py
@@ -0,0 +1,446 @@
+import zlib
+from cStringIO import StringIO
+from git.utils import make_sha
+import errno
+
+from utils import (
+		to_hex_sha,
+		to_bin_sha, 
+		write, 
+		close
+	)
+
+__all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream', 
+			'DecompressMemMapReader', 'FDCompressedSha1Writer')
+
+
+# ZLIB configuration
+# used when compressing objects - 1 to 9 ( slowest )
+Z_BEST_SPEED = 1
+
+
+#{ ODB Bases
+
+class OInfo(tuple):
+	"""Carries information about an object in an ODB, provdiing information 
+	about the sha of the object, the type_string as well as the uncompressed size
+	in bytes.
+	
+	It can be accessed using tuple notation and using attribute access notation::
+	
+		assert dbi[0] == dbi.sha
+		assert dbi[1] == dbi.type
+		assert dbi[2] == dbi.size
+	
+	The type is designed to be as lighteight as possible."""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, type, size):
+		return tuple.__new__(cls, (sha, type, size))
+	
+	def __init__(self, *args):
+		tuple.__init__(self)
+	
+	#{ Interface 
+	@property
+	def sha(self):
+		return self[0]
+		
+	@property
+	def type(self):
+		return self[1]
+		
+	@property
+	def size(self):
+		return self[2]
+	#} END interface
+
+
+class OStream(OInfo):
+	"""Base for object streams retrieved from the database, providing additional 
+	information about the stream.
+	Generally, ODB streams are read-only as objects are immutable"""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, type, size, stream, *args, **kwargs):
+		"""Helps with the initialization of subclasses"""
+		return tuple.__new__(cls, (sha, type, size, stream))
+	
+	
+	def __init__(self, *args, **kwargs):
+		tuple.__init__(self)
+	#{ Interface 
+	
+	def is_compressed(self):
+		""":return: True if reads of this stream yield zlib compressed data. Default False
+		:note: this does not imply anything about the actual internal storage.
+			Hence the data could be uncompressed, but read compressed, or vice versa"""
+		raise False
+		
+	#} END interface
+	
+	#{ Stream Reader Interface 
+	
+	def read(self, size=-1):
+		return self[3].read(size)
+		
+	#} END stream reader interface
+
+
+class IStream(list):
+	"""Represents an input content stream to be fed into the ODB. It is mutable to allow 
+	the ODB to record information about the operations outcome right in this instance.
+	
+	It provides interfaces for the OStream and a StreamReader to allow the instance
+	to blend in without prior conversion.
+	
+	The only method your content stream must support is 'read'"""
+	__slots__ = tuple()
+	
+	def __new__(cls, type, size, stream, sha=None, compressed=False):
+		return list.__new__(cls, (sha, type, size, stream, compressed, None))
+		
+	def __init__(self, type, size, stream, sha=None, compressed=None):
+		list.__init__(self, (sha, type, size, stream, compressed, None))
+	
+	#{ Interface 
+	
+	def hexsha(self):
+		""":return: our sha, hex encoded, 40 bytes"""
+		return to_hex_sha(self[0])
+	
+	def binsha(self):
+		""":return: our sha as binary, 20 bytes"""
+		return to_bin_sha(self[0])
+		
+	def _error(self):
+		""":return: the error that occurred when processing the stream, or None"""
+		return self[5]
+		
+	def _set_error(self, exc):
+		"""Set this input stream to the given exc, may be None to reset the error"""
+		self[5] = exc
+			
+	error = property(_error, _set_error)
+	
+	#} END interface
+	
+	#{ Stream Reader Interface
+	
+	def read(self, size=-1):
+		"""Implements a simple stream reader interface, passing the read call on 
+			to our internal stream"""
+		return self[3].read(size)
+		
+	#} END stream reader interface 
+	
+	#{  interface
+	
+	def _set_sha(self, sha):
+		self[0] = sha
+		
+	def _sha(self):
+		return self[0]
+		
+	sha = property(_sha, _set_sha)
+	
+	
+	def _type(self):
+		return self[1]
+	
+	def _set_type(self, type):
+		self[1] = type
+		
+	type = property(_type, _set_type)
+	
+	def _size(self):
+		return self[2]
+		
+	def _set_size(self, size):
+		self[2] = size
+	
+	size = property(_size, _set_size)
+	
+	def _stream(self):
+		return self[3]
+		
+	def _set_stream(self, stream):
+		self[3] = stream
+	
+	stream = property(_stream, _set_stream)
+	
+	#} END odb info interface 
+	
+	#{ OStream interface 
+	
+	def is_compressed(self):
+		return self[4]
+		
+	#} END OStream interface
+		
+
+class InvalidOInfo(tuple):
+	"""Carries information about a sha identifying an object which is invalid in 
+	the queried database. The exception attribute provides more information about
+	the cause of the issue"""
+	__slots__ = tuple()
+	
+	def __new__(cls, sha, exc):
+		return tuple.__new__(cls, (sha, exc))
+		
+	def __init__(self, sha, exc):
+		tuple.__init__(self, (sha, exc))
+	
+	@property
+	def sha(self):
+		return self[0]
+		
+	@property
+	def error(self):
+		""":return: exception instance explaining the failure"""
+		return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+	"""Carries information about an invalid ODB stream"""
+	__slots__ = tuple()
+	
+#} END ODB Bases
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(object):
+	"""Reads data in chunks from a memory map and decompresses it. The client sees 
+	only the uncompressed data, respective file-like read calls are handling on-demand
+	buffered decompression accordingly
+	
+	A constraint on the total size of bytes is activated, simulating 
+	a logical file within a possibly larger physical memory area
+	
+	To read efficiently, you clearly don't want to read individual bytes, instead, 
+	read a few kilobytes at least.
+	
+	:note: The chunk-size should be carefully selected as it will involve quite a bit 
+		of string copying due to the way the zlib is implemented. Its very wasteful, 
+		hence we try to find a good tradeoff between allocation time and number of 
+		times we actually allocate. An own zlib implementation would be good here
+		to better support streamed reading - it would only need to keep the mmap
+		and decompress it into chunks, thats all ... """
+	__slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close')
+	
+	max_read_size = 512*1024
+	
+	def __init__(self, m, close_on_deletion, size):
+		"""Initialize with mmap for stream reading"""
+		self._m = m
+		self._zip = zlib.decompressobj()
+		self._buf = None						# buffer of decompressed bytes
+		self._buflen = 0						# length of bytes in buffer
+		self._s = size							# size of uncompressed data to read in total
+		self._br = 0							# num uncompressed bytes read
+		self._cws = 0							# start byte of compression window
+		self._cwe = 0							# end byte of compression window
+		self._close = close_on_deletion			# close the memmap on deletion ?
+		
+	def __del__(self):
+		if self._close:
+			self._m.close()
+		# END handle resource freeing
+		
+	@classmethod
+	def new(self, m, close_on_deletion=False):
+		"""Create a new DecompressMemMapReader instance for acting as a read-only stream
+		This method parses the object header from m and returns the parsed 
+		type and size, as well as the created stream instance.
+		:param m: memory map on which to oparate
+		:param close_on_deletion: if True, the memory map will be closed once we are 
+			being deleted"""
+		inst = DecompressMemMapReader(m, close_on_deletion, 0)
+		
+		# read header
+		maxb = 512				# should really be enough, cgit uses 8192 I believe
+		inst._s = maxb
+		hdr = inst.read(maxb)
+		hdrend = hdr.find("\0")
+		type, size = hdr[:hdrend].split(" ")
+		size = int(size)
+		inst._s = size
+		
+		# adjust internal state to match actual header length that we ignore
+		# The buffer will be depleted first on future reads
+		inst._br = 0
+		hdrend += 1									# count terminating \0
+		inst._buf = StringIO(hdr[hdrend:])
+		inst._buflen = len(hdr) - hdrend
+		
+		return type, size, inst
+		
+	def read(self, size=-1):
+		if size < 1:
+			size = self._s - self._br
+		else:
+			size = min(size, self._s - self._br)
+		# END clamp size
+		
+		if size == 0:
+			return str()
+		# END handle depletion
+		
+		# protect from memory peaks
+		# If he tries to read large chunks, our memory patterns get really bad
+		# as we end up copying a possibly huge chunk from our memory map right into
+		# memory. This might not even be possible. Nonetheless, try to dampen the 
+		# effect a bit by reading in chunks, returning a huge string in the end.
+		# Our performance now depends on StringIO. This way we don't need two large
+		# buffers in peak times, but only one large one in the end which is 
+		# the return buffer
+		# NO: We don't do it - if the user thinks its best, he is right. If he 
+		# has trouble, he will start reading in chunks. According to our tests
+		# its still faster if we read 10 Mb at once instead of chunking it.
+		
+		# if size > self.max_read_size:
+			# sio = StringIO()
+			# while size:
+				# read_size = min(self.max_read_size, size)
+				# data = self.read(read_size)
+				# sio.write(data)
+				# size -= len(data)
+				# if len(data) < read_size:
+					# break
+			# # END data loop
+			# sio.seek(0)
+			# return sio.getvalue()
+		# # END handle maxread
+		# 
+		# deplete the buffer, then just continue using the decompress object 
+		# which has an own buffer. We just need this to transparently parse the 
+		# header from the zlib stream
+		dat = str()
+		if self._buf:
+			if self._buflen >= size:
+				# have enough data
+				dat = self._buf.read(size)
+				self._buflen -= size
+				self._br += size
+				return dat
+			else:
+				dat = self._buf.read()		# ouch, duplicates data
+				size -= self._buflen
+				self._br += self._buflen
+				
+				self._buflen = 0
+				self._buf = None
+			# END handle buffer len
+		# END handle buffer
+		
+		# decompress some data
+		# Abstract: zlib needs to operate on chunks of our memory map ( which may 
+		# be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+		# attribute which possible reads our whole map to the end, forcing 
+		# everything to be read from disk even though just a portion was requested.
+		# As this would be a nogo, we workaround it by passing only chunks of data, 
+		# moving the window into the memory map along as we decompress, which keeps 
+		# the tail smaller than our chunk-size. This causes 'only' the chunk to be
+		# copied once, and another copy of a part of it when it creates the unconsumed
+		# tail. We have to use it to hand in the appropriate amount of bytes durin g
+		# the next read.
+		tail = self._zip.unconsumed_tail
+		if tail:
+			# move the window, make it as large as size demands. For code-clarity, 
+			# we just take the chunk from our map again instead of reusing the unconsumed
+			# tail. The latter one would safe some memory copying, but we could end up
+			# with not getting enough data uncompressed, so we had to sort that out as well.
+			# Now we just assume the worst case, hence the data is uncompressed and the window
+			# needs to be as large as the uncompressed bytes we want to read.
+			self._cws = self._cwe - len(tail)
+			self._cwe = self._cws + size
+			
+			
+			indata = self._m[self._cws:self._cwe]		# another copy ... :(
+			# get the actual window end to be sure we don't use it for computations
+			self._cwe = self._cws + len(indata) 
+		else:
+			cws = self._cws
+			self._cws = self._cwe
+			self._cwe = cws + size 
+			indata = self._m[self._cws:self._cwe]		# ... copy it again :(
+		# END handle tail
+			
+		dcompdat = self._zip.decompress(indata, size)
+		
+		self._br += len(dcompdat)
+		if dat:
+			dcompdat = dat + dcompdat
+			
+		return dcompdat
+		
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+	"""Simple stream writer which produces a sha whenever you like as it degests
+	everything it is supposed to write"""
+	
+	def __init__(self):
+		self.sha1 = make_sha("")
+
+	#{ Stream Interface
+
+	def write(self, data):
+		""":raise IOError: If not all bytes could be written
+		:return: lenght of incoming data"""
+		self.sha1.update(data)
+		return len(data)
+
+	# END stream interface 
+
+	#{ Interface
+	
+	def sha(self, as_hex = False):
+		""":return: sha so far
+		:param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+		if as_hex:
+			return self.sha1.hexdigest()
+		return self.sha1.digest()
+	
+	#} END interface 
+
+class FDCompressedSha1Writer(Sha1Writer):
+	"""Digests data written to it, making the sha available, then compress the 
+	data and write it to the file descriptor
+	:note: operates on raw file descriptors
+	:note: for this to work, you have to use the close-method of this instance"""
+	__slots__ = ("fd", "sha1", "zip")
+	
+	# default exception
+	exc = IOError("Failed to write all bytes to filedescriptor")
+	
+	def __init__(self, fd):
+		super(FDCompressedSha1Writer, self).__init__()
+		self.fd = fd
+		self.zip = zlib.compressobj(Z_BEST_SPEED)
+
+	#{ Stream Interface
+
+	def write(self, data):
+		""":raise IOError: If not all bytes could be written
+		:return: lenght of incoming data"""
+		self.sha1.update(data)
+		cdata = self.zip.compress(data)
+		bytes_written = write(self.fd, cdata)
+		if bytes_written != len(cdata):
+			raise self.exc
+		return len(data)
+
+	def close(self):
+		remainder = self.zip.flush()
+		if write(self.fd, remainder) != len(remainder):
+			raise self.exc
+		return close(self.fd)
+
+	#} END stream interface
+
+#} END W streams
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
new file mode 100644
index 00000000..6863e97b
--- /dev/null
+++ b/lib/git/odb/utils.py
@@ -0,0 +1,38 @@
+import binascii
+import os
+import errno
+
+#{ Routines
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+def to_hex_sha(sha):
+	""":return: hexified version  of sha"""
+	if len(sha) == 40:
+		return sha
+	return bin_to_hex(sha)
+	
+def to_bin_sha(sha):
+	if len(sha) == 20:
+		return sha
+	return hex_to_bin(sha)
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+isdir = os.path.isdir
+rename = os.rename
+dirname = os.path.dirname
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+
+
+#} END Routines
+
+
diff --git a/lib/git/repo.py b/lib/git/repo.py
index f4caa3fb..78e5f526 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -4,12 +4,6 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 
-import os
-import sys
-import re
-import gzip
-import StringIO
-
 from errors import InvalidGitRepositoryError, NoSuchPathError
 from cmd import Git
 from actor import Actor
@@ -19,6 +13,15 @@ from objects import *
 from config import GitConfigParser
 from remote import Remote
 
+from odb import GitObjectDB
+
+import os
+import sys
+import re
+import gzip
+import StringIO
+
+
 def touch(filename):
     fp = open(filename, "a")
     fp.close()
@@ -53,7 +56,7 @@ class Repo(object):
     'git_dir' is the .git repository directoy, which is always set.
     """
     DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
-    __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" )
+    __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
     
     # precompiled regex
     re_whitespace = re.compile(r'\s+')
@@ -65,27 +68,22 @@ class Repo(object):
     # represents the configuration level of a configuration file
     config_level = ("system", "global", "repository")
 
-    def __init__(self, path=None):
-        """
-        Create a new Repo instance
+    def __init__(self, path=None, odbt = GitObjectDB):
+        """ Create a new Repo instance
 
-        ``path``
-            is the path to either the root git directory or the bare git repo
-
-        Examples::
+		:param path: is the path to either the root git directory or the bare git repo::
 
             repo = Repo("/Users/mtrier/Development/git-python")
             repo = Repo("/Users/mtrier/Development/git-python.git")
             repo = Repo("~/Development/git-python.git")
             repo = Repo("$REPOSITORIES/Development/git-python.git")
-            
-        Raises
-            InvalidGitRepositoryError or NoSuchPathError
-
-        Returns
-            ``git.Repo``
-        """
-
+        
+        :param odbt: Object DataBase type - a type which is constructed by providing 
+        	the directory containing the database objects, i.e. .git/objects. It will
+        	be used to access all object data
+        :raise InvalidGitRepositoryError:
+        :raise NoSuchPathError:
+		:return: git.Repo """
         epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
 
         if not os.path.exists(epath):
@@ -130,6 +128,12 @@ class Repo(object):
         
         self.working_dir = self._working_tree_dir or self.git_dir
         self.git = Git(self.working_dir)
+        
+        # special handling, in special times
+        args = [os.path.join(self.git_dir, 'objects')]
+        if issubclass(odbt, GitObjectDB):
+        	args.append(self.git)
+		self.odb = odbt(*args)
 
     def __eq__(self, rhs):
     	if isinstance(rhs, Repo):
diff --git a/lib/git/utils.py b/lib/git/utils.py
index c21528b1..60a7de48 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -27,6 +27,21 @@ def make_sha(source=''):
         sha1 = sha.sha(source)
         return sha1
 
+def stream_copy(source, destination, chunk_size=512*1024):
+	"""Copy all data from the source stream into the destination stream in chunks
+	of size chunk_size
+	:return: amount of bytes written"""
+	br = 0
+	while True:
+		chunk = source.read(chunk_size)
+		destination.write(chunk)
+		br += len(chunk)
+		if len(chunk) < chunk_size:
+			break
+	# END reading output stream
+	return br
+	
+
 def join_path(a, *p):
     """Join path tokens together similar to os.path.join, but always use 
     '/' instead of possibly '\' on windows."""
@@ -61,12 +76,14 @@ def join_path_native(a, *p):
     return to_native_path(join_path(a, *p))
 
 
-class SHA1Writer(object):
+class IndexFileSHA1Writer(object):
     """
     Wrapper around a file-like object that remembers the SHA1 of 
     the data written to it. It will write a sha when the stream is closed
     or if the asked for explicitly usign write_sha.
     
+    Only useful to the indexfile
+    
     Note:
         Based on the dulwich project
     """
@@ -78,7 +95,7 @@ class SHA1Writer(object):
 
     def write(self, data):
         self.sha1.update(data)
-        self.f.write(data)
+        return self.f.write(data)
 
     def write_sha(self):
         sha = self.sha1.digest()