summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGES7
-rw-r--r--lib/git/__init__.py3
-rw-r--r--lib/git/cmd.py924
-rw-r--r--lib/git/errors.py24
-rw-r--r--lib/git/index.py4
-rw-r--r--lib/git/objects/base.py414
-rw-r--r--lib/git/objects/commit.py788
-rw-r--r--lib/git/objects/tree.py2
-rw-r--r--lib/git/objects/utils.py439
-rw-r--r--lib/git/odb/__init__.py6
-rw-r--r--lib/git/odb/db.py337
-rw-r--r--lib/git/odb/fun.py108
-rw-r--r--lib/git/odb/stream.py446
-rw-r--r--lib/git/odb/utils.py38
-rw-r--r--lib/git/repo.py48
-rw-r--r--lib/git/utils.py21
-rw-r--r--test/fixtures/rev_list27
-rw-r--r--test/git/performance/lib.py65
-rw-r--r--test/git/performance/test_commit.py98
-rw-r--r--test/git/performance/test_odb.py61
-rw-r--r--test/git/performance/test_streams.py144
-rw-r--r--test/git/performance/test_utils.py59
-rw-r--r--test/git/test_commit.py369
-rw-r--r--test/git/test_diff.py6
-rw-r--r--test/git/test_odb.py66
-rw-r--r--test/git/test_performance.py52
-rw-r--r--test/git/test_repo.py709
-rw-r--r--test/git/test_utils.py236
-rw-r--r--test/testlib/helper.py38
29 files changed, 3676 insertions, 1863 deletions
diff --git a/CHANGES b/CHANGES
index 5d677b06..e9e1257e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,7 +1,12 @@
=======
CHANGES
=======
-
+
+0.2 Beta 2
+===========
+ * Commit objects now carry the 'encoding' information of their message. It wasn't parsed previously, and defaults to UTF-8
+ * Commit.create_from_tree now uses a pure-python implementation, mimicing git-commit-tree
+
0.2
=====
General
diff --git a/lib/git/__init__.py b/lib/git/__init__.py
index aac539eb..2f17c55b 100644
--- a/lib/git/__init__.py
+++ b/lib/git/__init__.py
@@ -22,5 +22,8 @@ from git.remote import *
from git.index import *
from git.utils import LockFile, BlockingLockFile
+# odb is NOT imported intentionally - if you really want it, you should get it
+# yourself as its part of the core
+
__all__ = [ name for name, obj in locals().items()
if not (name.startswith('_') or inspect.ismodule(obj)) ]
diff --git a/lib/git/cmd.py b/lib/git/cmd.py
index 82daf551..5cae2998 100644
--- a/lib/git/cmd.py
+++ b/lib/git/cmd.py
@@ -13,427 +13,515 @@ from errors import GitCommandError
GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False)
execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output',
- 'with_exceptions', 'as_process',
- 'output_stream' )
+ 'with_exceptions', 'as_process',
+ 'output_stream' )
def dashify(string):
- return string.replace('_', '-')
+ return string.replace('_', '-')
class Git(object):
- """
- The Git class manages communication with the Git binary.
-
- It provides a convenient interface to calling the Git binary, such as in::
-
- g = Git( git_dir )
- g.init() # calls 'git init' program
- rval = g.ls_files() # calls 'git ls-files' program
-
- ``Debugging``
- Set the GIT_PYTHON_TRACE environment variable print each invocation
- of the command to stdout.
- Set its value to 'full' to see details about the returned values.
- """
- __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
-
- # CONFIGURATION
- # The size in bytes read from stdout when copying git's output to another stream
- max_chunk_size = 1024*64
-
- class AutoInterrupt(object):
- """
- Kill/Interrupt the stored process instance once this instance goes out of scope. It is
- used to prevent processes piling up in case iterators stop reading.
- Besides all attributes are wired through to the contained process object.
-
- The wait method was overridden to perform automatic status code checking
- and possibly raise.
- """
- __slots__= ("proc", "args")
-
- def __init__(self, proc, args ):
- self.proc = proc
- self.args = args
-
- def __del__(self):
- # did the process finish already so we have a return code ?
- if self.proc.poll() is not None:
- return
-
- # can be that nothing really exists anymore ...
- if os is None:
- return
-
- # try to kill it
- try:
- os.kill(self.proc.pid, 2) # interrupt signal
- except AttributeError:
- # try windows
- # for some reason, providing None for stdout/stderr still prints something. This is why
- # we simply use the shell and redirect to nul. Its slower than CreateProcess, question
- # is whether we really want to see all these messages. Its annoying no matter what.
- subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
- # END exception handling
-
- def __getattr__(self, attr):
- return getattr(self.proc, attr)
-
- def wait(self):
- """
- Wait for the process and return its status code.
-
- Raise
- GitCommandError if the return status is not 0
- """
- status = self.proc.wait()
- if status != 0:
- raise GitCommandError(self.args, status, self.proc.stderr.read())
- # END status handling
- return status
-
-
-
- def __init__(self, working_dir=None):
- """
- Initialize this instance with:
-
- ``working_dir``
- Git directory we should work in. If None, we always work in the current
- directory as returned by os.getcwd().
- It is meant to be the working tree directory if available, or the
- .git directory in case of bare repositories.
- """
- super(Git, self).__init__()
- self._working_dir = working_dir
-
- # cached command slots
- self.cat_file_header = None
- self.cat_file_all = None
-
- def __getattr__(self, name):
- """
- A convenience method as it allows to call the command as if it was
- an object.
- Returns
- Callable object that will execute call _call_process with your arguments.
- """
- if name[:1] == '_':
- raise AttributeError(name)
- return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
-
- @property
- def working_dir(self):
- """
- Returns
- Git directory we are working on
- """
- return self._working_dir
-
- def execute(self, command,
- istream=None,
- with_keep_cwd=False,
- with_extended_output=False,
- with_exceptions=True,
- as_process=False,
- output_stream=None,
- **subprocess_kwargs
- ):
- """
- Handles executing the command on the shell and consumes and returns
- the returned information (stdout)
-
- ``command``
- The command argument list to execute.
- It should be a string, or a sequence of program arguments. The
- program to execute is the first item in the args sequence or string.
-
- ``istream``
- Standard input filehandle passed to subprocess.Popen.
-
- ``with_keep_cwd``
- Whether to use the current working directory from os.getcwd().
- The cmd otherwise uses its own working_dir that it has been initialized
- with if possible.
-
- ``with_extended_output``
- Whether to return a (status, stdout, stderr) tuple.
-
- ``with_exceptions``
- Whether to raise an exception when git returns a non-zero status.
-
- ``as_process``
- Whether to return the created process instance directly from which
- streams can be read on demand. This will render with_extended_output and
- with_exceptions ineffective - the caller will have
- to deal with the details himself.
- It is important to note that the process will be placed into an AutoInterrupt
- wrapper that will interrupt the process once it goes out of scope. If you
- use the command in iterators, you should pass the whole process instance
- instead of a single stream.
-
- ``output_stream``
- If set to a file-like object, data produced by the git command will be
- output to the given stream directly.
- This feature only has any effect if as_process is False. Processes will
- always be created with a pipe as subprocess.Popen can only accept system
- file descriptors, not python objects ( such as StringIO ).
- This merely is a workaround as the data will be copied from the
- output pipe to the given output stream directly.
- See also: Git.max_chunk_size
-
- ``**subprocess_kwargs``
- Keyword arguments to be passed to subprocess.Popen. Please note that
- some of the valid kwargs are already set by this method, the ones you
- specify may not be the same ones.
-
- Returns::
-
- str(output) # extended_output = False (Default)
- tuple(int(status), str(stdout), str(stderr)) # extended_output = True
-
- if ouput_stream is True, the stdout value will be your output stream:
- output_stream # extended_output = False
- tuple(int(status), output_stream, str(stderr))# extended_output = True
-
- Raise
- GitCommandError
-
- NOTE
- If you add additional keyword arguments to the signature of this method,
- you must update the execute_kwargs tuple housed in this module.
- """
- if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
- print ' '.join(command)
-
- # Allow the user to have the command executed in their working dir.
- if with_keep_cwd or self._working_dir is None:
- cwd = os.getcwd()
- else:
- cwd=self._working_dir
-
- # Start the process
- proc = subprocess.Popen(command,
- cwd=cwd,
- stdin=istream,
- stderr=subprocess.PIPE,
- stdout=subprocess.PIPE,
- close_fds=(os.name=='posix'),# unsupported on linux
- **subprocess_kwargs
- )
- if as_process:
- return self.AutoInterrupt(proc, command)
-
- # Wait for the process to return
- status = 0
- stdout_value = ''
- stderr_value = ''
- try:
- if output_stream is None:
- stdout_value, stderr_value = proc.communicate()
- # strip trailing "\n"
- if stdout_value.endswith("\n"):
- stdout_value = stdout_value[:-1]
- if stderr_value.endswith("\n"):
- stderr_value = stderr_value[:-1]
- status = proc.returncode
- else:
- max_chunk_size = self.max_chunk_size
- while True:
- chunk = proc.stdout.read(max_chunk_size)
- output_stream.write(chunk)
- if len(chunk) < max_chunk_size:
- break
- # END reading output stream
- stdout_value = output_stream
- stderr_value = proc.stderr.read()
- # strip trailing "\n"
- if stderr_value.endswith("\n"):
- stderr_value = stderr_value[:-1]
- status = proc.wait()
- # END stdout handling
- finally:
- proc.stdout.close()
- proc.stderr.close()
-
- if with_exceptions and status != 0:
- raise GitCommandError(command, status, stderr_value)
-
- if GIT_PYTHON_TRACE == 'full':
- if stderr_value:
- print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
- elif stdout_value:
- print "%s -> %d: '%s'" % (command, status, stdout_value)
- else:
- print "%s -> %d" % (command, status)
-
- # Allow access to the command's status code
- if with_extended_output:
- return (status, stdout_value, stderr_value)
- else:
- return stdout_value
-
- def transform_kwargs(self, **kwargs):
- """
- Transforms Python style kwargs into git command line options.
- """
- args = []
- for k, v in kwargs.items():
- if len(k) == 1:
- if v is True:
- args.append("-%s" % k)
- elif type(v) is not bool:
- args.append("-%s%s" % (k, v))
- else:
- if v is True:
- args.append("--%s" % dashify(k))
- elif type(v) is not bool:
- args.append("--%s=%s" % (dashify(k), v))
- return args
-
- @classmethod
- def __unpack_args(cls, arg_list):
- if not isinstance(arg_list, (list,tuple)):
- return [ str(arg_list) ]
-
- outlist = list()
- for arg in arg_list:
- if isinstance(arg_list, (list, tuple)):
- outlist.extend(cls.__unpack_args( arg ))
- # END recursion
- else:
- outlist.append(str(arg))
- # END for each arg
- return outlist
-
- def _call_process(self, method, *args, **kwargs):
- """
- Run the given git command with the specified arguments and return
- the result as a String
-
- ``method``
- is the command. Contained "_" characters will be converted to dashes,
- such as in 'ls_files' to call 'ls-files'.
-
- ``args``
- is the list of arguments. If None is included, it will be pruned.
- This allows your commands to call git more conveniently as None
- is realized as non-existent
-
- ``kwargs``
- is a dict of keyword arguments.
- This function accepts the same optional keyword arguments
- as execute().
-
- Examples::
- git.rev_list('master', max_count=10, header=True)
-
- Returns
- Same as execute()
- """
-
- # Handle optional arguments prior to calling transform_kwargs
- # otherwise these'll end up in args, which is bad.
- _kwargs = {}
- for kwarg in execute_kwargs:
- try:
- _kwargs[kwarg] = kwargs.pop(kwarg)
- except KeyError:
- pass
-
- # Prepare the argument list
- opt_args = self.transform_kwargs(**kwargs)
-
- ext_args = self.__unpack_args([a for a in args if a is not None])
- args = opt_args + ext_args
-
- call = ["git", dashify(method)]
- call.extend(args)
-
- return self.execute(call, **_kwargs)
-
- def _parse_object_header(self, header_line):
- """
- ``header_line``
- <hex_sha> type_string size_as_int
-
- Returns
- (hex_sha, type_string, size_as_int)
-
- Raises
- ValueError if the header contains indication for an error due to incorrect
- input sha
- """
- tokens = header_line.split()
- if len(tokens) != 3:
- raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
- if len(tokens[0]) != 40:
- raise ValueError("Failed to parse header: %r" % header_line)
- return (tokens[0], tokens[1], int(tokens[2]))
-
- def __prepare_ref(self, ref):
- # required for command to separate refs on stdin
- refstr = str(ref) # could be ref-object
- if refstr.endswith("\n"):
- return refstr
- return refstr + "\n"
-
- def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
- cur_val = getattr(self, attr_name)
- if cur_val is not None:
- return cur_val
-
- options = { "istream" : subprocess.PIPE, "as_process" : True }
- options.update( kwargs )
-
- cmd = self._call_process( cmd_name, *args, **options )
- setattr(self, attr_name, cmd )
- return cmd
-
- def __get_object_header(self, cmd, ref):
- cmd.stdin.write(self.__prepare_ref(ref))
- cmd.stdin.flush()
- return self._parse_object_header(cmd.stdout.readline())
-
- def get_object_header(self, ref):
- """
- Use this method to quickly examine the type and size of the object behind
- the given ref.
-
- NOTE
- The method will only suffer from the costs of command invocation
- once and reuses the command in subsequent calls.
-
- Return:
- (hexsha, type_string, size_as_int)
- """
- cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
- return self.__get_object_header(cmd, ref)
-
- def get_object_data(self, ref):
- """
- As get_object_header, but returns object data as well
-
- Return:
- (hexsha, type_string, size_as_int,data_string)
- """
- cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
- hexsha, typename, size = self.__get_object_header(cmd, ref)
- data = cmd.stdout.read(size)
- cmd.stdout.read(1) # finishing newlines
-
- return (hexsha, typename, size, data)
-
- def clear_cache(self):
- """
- Clear all kinds of internal caches to release resources.
-
- Currently persistent commands will be interrupted.
-
- Returns
- self
- """
- self.cat_file_all = None
- self.cat_file_header = None
- return self
+ """
+ The Git class manages communication with the Git binary.
+
+ It provides a convenient interface to calling the Git binary, such as in::
+
+ g = Git( git_dir )
+ g.init() # calls 'git init' program
+ rval = g.ls_files() # calls 'git ls-files' program
+
+ ``Debugging``
+ Set the GIT_PYTHON_TRACE environment variable print each invocation
+ of the command to stdout.
+ Set its value to 'full' to see details about the returned values.
+ """
+ __slots__ = ("_working_dir", "cat_file_all", "cat_file_header")
+
+ # CONFIGURATION
+ # The size in bytes read from stdout when copying git's output to another stream
+ max_chunk_size = 1024*64
+
+ class AutoInterrupt(object):
+ """
+ Kill/Interrupt the stored process instance once this instance goes out of scope. It is
+ used to prevent processes piling up in case iterators stop reading.
+ Besides all attributes are wired through to the contained process object.
+
+ The wait method was overridden to perform automatic status code checking
+ and possibly raise.
+ """
+ __slots__= ("proc", "args")
+
+ def __init__(self, proc, args ):
+ self.proc = proc
+ self.args = args
+
+ def __del__(self):
+ # did the process finish already so we have a return code ?
+ if self.proc.poll() is not None:
+ return
+
+ # can be that nothing really exists anymore ...
+ if os is None:
+ return
+
+ # try to kill it
+ try:
+ os.kill(self.proc.pid, 2) # interrupt signal
+ except AttributeError:
+ # try windows
+ # for some reason, providing None for stdout/stderr still prints something. This is why
+ # we simply use the shell and redirect to nul. Its slower than CreateProcess, question
+ # is whether we really want to see all these messages. Its annoying no matter what.
+ subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True)
+ # END exception handling
+
+ def __getattr__(self, attr):
+ return getattr(self.proc, attr)
+
+ def wait(self):
+ """
+ Wait for the process and return its status code.
+
+ Raise
+ GitCommandError if the return status is not 0
+ """
+ status = self.proc.wait()
+ if status != 0:
+ raise GitCommandError(self.args, status, self.proc.stderr.read())
+ # END status handling
+ return status
+ # END auto interrupt
+
+ class CatFileContentStream(object):
+ """Object representing a sized read-only stream returning the contents of
+ an object.
+ It behaves like a stream, but counts the data read and simulates an empty
+ stream once our sized content region is empty.
+ If not all data is read to the end of the objects's lifetime, we read the
+ rest to assure the underlying stream continues to work"""
+
+ __slots__ = ('_stream', '_nbr', '_size')
+
+ def __init__(self, size, stream):
+ self._stream = stream
+ self._size = size
+ self._nbr = 0 # num bytes read
+
+ def read(self, size=-1):
+ bytes_left = self._size - self._nbr
+ if bytes_left == 0:
+ return ''
+ if size > -1:
+ # assure we don't try to read past our limit
+ size = min(bytes_left, size)
+ else:
+ # they try to read all, make sure its not more than what remains
+ size = bytes_left
+ # END check early depletion
+ data = self._stream.read(size)
+ self._nbr += len(data)
+
+ # check for depletion, read our final byte to make the stream usable by others
+ if self._size - self._nbr == 0:
+ self._stream.read(1) # final newline
+ # END finish reading
+
+ return data
+
+ def readline(self, size=-1):
+ if self._nbr == self._size:
+ return ''
+
+ # clamp size to lowest allowed value
+ bytes_left = self._size - self._nbr
+ if size > -1:
+ size = min(bytes_left, size)
+ else:
+ size = bytes_left
+ # END handle size
+
+ data = self._stream.readline(size)
+ self._nbr += len(data)
+
+ # handle final byte
+ # we inline everything, it must be fast !
+ if self._size - self._nbr == 0:
+ self._stream.read(1)
+ # END finish reading
+
+ return data
+
+ def readlines(self, size=-1):
+ if self._nbr == self._size:
+ return list()
+
+ # leave all additional logic to our readline method, we just check the size
+ out = list()
+ nbr = 0
+ while True:
+ line = self.readline()
+ if not line:
+ break
+ out.append(line)
+ if size > -1:
+ nbr += len(line)
+ if nbr > size:
+ break
+ # END handle size constraint
+ # END readline loop
+ return out
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ line = self.readline()
+ if not line:
+ raise StopIteration
+ return line
+
+ def __del__(self):
+ bytes_left = self._size - self._nbr
+ if bytes_left:
+ # seek and discard
+ self._stream.seek(bytes_left + 1, os.SEEK_CUR) # includes terminating newline
+ # END handle incomplete read
+
+
+ def __init__(self, working_dir=None):
+ """
+ Initialize this instance with:
+
+ ``working_dir``
+ Git directory we should work in. If None, we always work in the current
+ directory as returned by os.getcwd().
+ It is meant to be the working tree directory if available, or the
+ .git directory in case of bare repositories.
+ """
+ super(Git, self).__init__()
+ self._working_dir = working_dir
+
+ # cached command slots
+ self.cat_file_header = None
+ self.cat_file_all = None
+
+ def __getattr__(self, name):
+ """
+ A convenience method as it allows to call the command as if it was
+ an object.
+ Returns
+ Callable object that will execute call _call_process with your arguments.
+ """
+ if name[:1] == '_':
+ raise AttributeError(name)
+ return lambda *args, **kwargs: self._call_process(name, *args, **kwargs)
+
+ @property
+ def working_dir(self):
+ """
+ Returns
+ Git directory we are working on
+ """
+ return self._working_dir
+
+ def execute(self, command,
+ istream=None,
+ with_keep_cwd=False,
+ with_extended_output=False,
+ with_exceptions=True,
+ as_process=False,
+ output_stream=None,
+ **subprocess_kwargs
+ ):
+ """
+ Handles executing the command on the shell and consumes and returns
+ the returned information (stdout)
+
+ ``command``
+ The command argument list to execute.
+ It should be a string, or a sequence of program arguments. The
+ program to execute is the first item in the args sequence or string.
+
+ ``istream``
+ Standard input filehandle passed to subprocess.Popen.
+
+ ``with_keep_cwd``
+ Whether to use the current working directory from os.getcwd().
+ The cmd otherwise uses its own working_dir that it has been initialized
+ with if possible.
+
+ ``with_extended_output``
+ Whether to return a (status, stdout, stderr) tuple.
+
+ ``with_exceptions``
+ Whether to raise an exception when git returns a non-zero status.
+
+ ``as_process``
+ Whether to return the created process instance directly from which
+ streams can be read on demand. This will render with_extended_output and
+ with_exceptions ineffective - the caller will have
+ to deal with the details himself.
+ It is important to note that the process will be placed into an AutoInterrupt
+ wrapper that will interrupt the process once it goes out of scope. If you
+ use the command in iterators, you should pass the whole process instance
+ instead of a single stream.
+
+ ``output_stream``
+ If set to a file-like object, data produced by the git command will be
+ output to the given stream directly.
+ This feature only has any effect if as_process is False. Processes will
+ always be created with a pipe due to issues with subprocess.
+ This merely is a workaround as data will be copied from the
+ output pipe to the given output stream directly.
+
+ ``**subprocess_kwargs``
+ Keyword arguments to be passed to subprocess.Popen. Please note that
+ some of the valid kwargs are already set by this method, the ones you
+ specify may not be the same ones.
+
+ Returns::
+
+ str(output) # extended_output = False (Default)
+ tuple(int(status), str(stdout), str(stderr)) # extended_output = True
+
+ if ouput_stream is True, the stdout value will be your output stream:
+ output_stream # extended_output = False
+ tuple(int(status), output_stream, str(stderr))# extended_output = True
+
+ Raise
+ GitCommandError
+
+ NOTE
+ If you add additional keyword arguments to the signature of this method,
+ you must update the execute_kwargs tuple housed in this module.
+ """
+ if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full':
+ print ' '.join(command)
+
+ # Allow the user to have the command executed in their working dir.
+ if with_keep_cwd or self._working_dir is None:
+ cwd = os.getcwd()
+ else:
+ cwd=self._working_dir
+
+ # Start the process
+ proc = subprocess.Popen(command,
+ cwd=cwd,
+ stdin=istream,
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ close_fds=(os.name=='posix'),# unsupported on linux
+ **subprocess_kwargs
+ )
+ if as_process:
+ return self.AutoInterrupt(proc, command)
+
+ # Wait for the process to return
+ status = 0
+ stdout_value = ''
+ stderr_value = ''
+ try:
+ if output_stream is None:
+ stdout_value, stderr_value = proc.communicate()
+ # strip trailing "\n"
+ if stdout_value.endswith("\n"):
+ stdout_value = stdout_value[:-1]
+ if stderr_value.endswith("\n"):
+ stderr_value = stderr_value[:-1]
+ status = proc.returncode
+ else:
+ stream_copy(proc.stdout, output_stream, self.max_chunk_size)
+ stdout_value = output_stream
+ stderr_value = proc.stderr.read()
+ # strip trailing "\n"
+ if stderr_value.endswith("\n"):
+ stderr_value = stderr_value[:-1]
+ status = proc.wait()
+ # END stdout handling
+ finally:
+ proc.stdout.close()
+ proc.stderr.close()
+
+ if with_exceptions and status != 0:
+ raise GitCommandError(command, status, stderr_value)
+
+ if GIT_PYTHON_TRACE == 'full':
+ if stderr_value:
+ print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value)
+ elif stdout_value:
+ print "%s -> %d: '%s'" % (command, status, stdout_value)
+ else:
+ print "%s -> %d" % (command, status)
+
+ # Allow access to the command's status code
+ if with_extended_output:
+ return (status, stdout_value, stderr_value)
+ else:
+ return stdout_value
+
+ def transform_kwargs(self, **kwargs):
+ """
+ Transforms Python style kwargs into git command line options.
+ """
+ args = []
+ for k, v in kwargs.items():
+ if len(k) == 1:
+ if v is True:
+ args.append("-%s" % k)
+ elif type(v) is not bool:
+ args.append("-%s%s" % (k, v))
+ else:
+ if v is True:
+ args.append("--%s" % dashify(k))
+ elif type(v) is not bool:
+ args.append("--%s=%s" % (dashify(k), v))
+ return args
+
+ @classmethod
+ def __unpack_args(cls, arg_list):
+ if not isinstance(arg_list, (list,tuple)):
+ return [ str(arg_list) ]
+
+ outlist = list()
+ for arg in arg_list:
+ if isinstance(arg_list, (list, tuple)):
+ outlist.extend(cls.__unpack_args( arg ))
+ # END recursion
+ else:
+ outlist.append(str(arg))
+ # END for each arg
+ return outlist
+
+ def _call_process(self, method, *args, **kwargs):
+ """
+ Run the given git command with the specified arguments and return
+ the result as a String
+
+ ``method``
+ is the command. Contained "_" characters will be converted to dashes,
+ such as in 'ls_files' to call 'ls-files'.
+
+ ``args``
+ is the list of arguments. If None is included, it will be pruned.
+ This allows your commands to call git more conveniently as None
+ is realized as non-existent
+
+ ``kwargs``
+ is a dict of keyword arguments.
+ This function accepts the same optional keyword arguments
+ as execute().
+
+ Examples::
+ git.rev_list('master', max_count=10, header=True)
+
+ Returns
+ Same as execute()
+ """
+
+ # Handle optional arguments prior to calling transform_kwargs
+ # otherwise these'll end up in args, which is bad.
+ _kwargs = {}
+ for kwarg in execute_kwargs:
+ try:
+ _kwargs[kwarg] = kwargs.pop(kwarg)
+ except KeyError:
+ pass
+
+ # Prepare the argument list
+ opt_args = self.transform_kwargs(**kwargs)
+
+ ext_args = self.__unpack_args([a for a in args if a is not None])
+ args = opt_args + ext_args
+
+ call = ["git", dashify(method)]
+ call.extend(args)
+
+ return self.execute(call, **_kwargs)
+
+ def _parse_object_header(self, header_line):
+ """
+ ``header_line``
+ <hex_sha> type_string size_as_int
+
+ Returns
+ (hex_sha, type_string, size_as_int)
+
+ Raises
+ ValueError if the header contains indication for an error due to incorrect
+ input sha
+ """
+ tokens = header_line.split()
+ if len(tokens) != 3:
+ raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) )
+ if len(tokens[0]) != 40:
+ raise ValueError("Failed to parse header: %r" % header_line)
+ return (tokens[0], tokens[1], int(tokens[2]))
+
+ def __prepare_ref(self, ref):
+ # required for command to separate refs on stdin
+ refstr = str(ref) # could be ref-object
+ if refstr.endswith("\n"):
+ return refstr
+ return refstr + "\n"
+
+ def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs):
+ cur_val = getattr(self, attr_name)
+ if cur_val is not None:
+ return cur_val
+
+ options = { "istream" : subprocess.PIPE, "as_process" : True }
+ options.update( kwargs )
+
+ cmd = self._call_process( cmd_name, *args, **options )
+ setattr(self, attr_name, cmd )
+ return cmd
+
+ def __get_object_header(self, cmd, ref):
+ cmd.stdin.write(self.__prepare_ref(ref))
+ cmd.stdin.flush()
+ return self._parse_object_header(cmd.stdout.readline())
+
+ def get_object_header(self, ref):
+ """ Use this method to quickly examine the type and size of the object behind
+ the given ref.
+
+ :note: The method will only suffer from the costs of command invocation
+ once and reuses the command in subsequent calls.
+
+ :return: (hexsha, type_string, size_as_int) """
+ cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True)
+ return self.__get_object_header(cmd, ref)
+
+ def get_object_data(self, ref):
+ """ As get_object_header, but returns object data as well
+ :return: (hexsha, type_string, size_as_int,data_string)
+ :note: not threadsafe
+ """
+ hexsha, typename, size, stream = self.stream_object_data(ref)
+ data = stream.read(size)
+ del(stream)
+ return (hexsha, typename, size, data)
+
+ def stream_object_data(self, ref):
+ """As get_object_header, but returns the data as a stream
+ :return: (hexsha, type_string, size_as_int, stream)
+ :note: This method is not threadsafe, you need one independent Command instance
+ per thread to be safe !"""
+ cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True)
+ hexsha, typename, size = self.__get_object_header(cmd, ref)
+ return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout))
+
+ def clear_cache(self):
+ """
+ Clear all kinds of internal caches to release resources.
+
+ Currently persistent commands will be interrupted.
+
+ Returns
+ self
+ """
+ self.cat_file_all = None
+ self.cat_file_header = None
+ return self
diff --git a/lib/git/errors.py b/lib/git/errors.py
index f66fb528..d8a35e02 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -8,19 +8,25 @@ Module containing all exceptions thrown througout the git package,
"""
class InvalidGitRepositoryError(Exception):
- """
- Thrown if the given repository appears to have an invalid format.
- """
+ """ Thrown if the given repository appears to have an invalid format. """
+
+class ODBError(Exception):
+ """All errors thrown by the object database"""
+
+class InvalidDBRoot(ODBError):
+ """Thrown if an object database cannot be initialized at the given path"""
+
+class BadObject(ODBError):
+ """The object with the given SHA does not exist"""
+
+class BadObjectType(ODBError):
+ """The object had an unsupported type"""
class NoSuchPathError(OSError):
- """
- Thrown if a path could not be access by the system.
- """
+ """ Thrown if a path could not be access by the system. """
class GitCommandError(Exception):
- """
- Thrown if execution of the git command fails with non-zero status code.
- """
+ """ Thrown if execution of the git command fails with non-zero status code. """
def __init__(self, command, status, stderr=None):
self.stderr = stderr
self.status = status
diff --git a/lib/git/index.py b/lib/git/index.py
index 8ccc3fe3..36428315 100644
--- a/lib/git/index.py
+++ b/lib/git/index.py
@@ -21,7 +21,7 @@ import git.diff as diff
from errors import GitCommandError
from git.objects import Blob, Tree, Object, Commit
-from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
+from git.utils import IndexFileSHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path_native
class CheckoutError( Exception ):
@@ -461,7 +461,7 @@ class IndexFile(LazyMixin, diff.Diffable):
write_op = ConcurrentWriteOperation(file_path or self._file_path)
stream = write_op._begin_writing()
- stream = SHA1Writer(stream)
+ stream = IndexFileSHA1Writer(stream)
# header
stream.write("DIRC")
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..5a3a15a7 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,224 +4,220 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
-from git.utils import LazyMixin, join_path_native
+from git.utils import LazyMixin, join_path_native, stream_copy
import utils
-
+
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
class Object(LazyMixin):
- """
- Implements an Object which may be Blobs, Trees, Commits and Tags
-
- This Object also serves as a constructor for instances of the correct type::
-
- inst = Object.new(repo,id)
- inst.sha # objects sha in hex
- inst.size # objects uncompressed data size
- inst.data # byte string containing the whole data of the object
- """
- NULL_HEX_SHA = '0'*40
- TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "sha", "size", "data" )
- type = None # to be set by subclass
-
- def __init__(self, repo, id):
- """
- Initialize an object by identifying it by its id. All keyword arguments
- will be set on demand if None.
-
- ``repo``
- repository this object is located in
-
- ``id``
- SHA1 or ref suitable for git-rev-parse
- """
- super(Object,self).__init__()
- self.repo = repo
- self.sha = id
+ """
+ Implements an Object which may be Blobs, Trees, Commits and Tags
+
+ This Object also serves as a constructor for instances of the correct type::
+
+ inst = Object.new(repo,id)
+ inst.sha # objects sha in hex
+ inst.size # objects uncompressed data size
+ inst.data # byte string containing the whole data of the object
+ """
+ NULL_HEX_SHA = '0'*40
+ TYPES = ("blob", "tree", "commit", "tag")
+ __slots__ = ("repo", "sha", "size", "data" )
+ type = None # to be set by subclass
+
+ def __init__(self, repo, id):
+ """
+ Initialize an object by identifying it by its id. All keyword arguments
+ will be set on demand if None.
+
+ ``repo``
+ repository this object is located in
+
+ ``id``
+ SHA1 or ref suitable for git-rev-parse
+ """
+ super(Object,self).__init__()
+ self.repo = repo
+ self.sha = id
- @classmethod
- def new(cls, repo, id):
- """
- Return
- New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a hexsha even though
- the input id may have been a Reference or Rev-Spec
-
- Note
- This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a hexsha.
- """
- hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = obj_type(repo, hexsha)
- inst.size = size
- return inst
-
- def _set_self_from_args_(self, args_dict):
- """
- Initialize attributes on self from the given dict that was retrieved
- from locals() in the calling method.
-
- Will only set an attribute on self if the corresponding value in args_dict
- is not None
- """
- for attr, val in args_dict.items():
- if attr != "self" and val is not None:
- setattr( self, attr, val )
- # END set all non-None attributes
-
- def _set_cache_(self, attr):
- """
- Retrieve object information
- """
- if attr == "size":
- hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- elif attr == "data":
- hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- else:
- super(Object,self)._set_cache_(attr)
-
- def __eq__(self, other):
- """
- Returns
- True if the objects have the same SHA1
- """
- return self.sha == other.sha
-
- def __ne__(self, other):
- """
- Returns
- True if the objects do not have the same SHA1
- """
- return self.sha != other.sha
-
- def __hash__(self):
- """
- Returns
- Hash of our id allowing objects to be used in dicts and sets
- """
- return hash(self.sha)
-
- def __str__(self):
- """
- Returns
- string of our SHA1 as understood by all git commands
- """
- return self.sha
-
- def __repr__(self):
- """
- Returns
- string with pythonic representation of our object
- """
- return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+ @classmethod
+ def new(cls, repo, id):
+ """
+ Return
+ New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a hexsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ Note
+ This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a hexsha.
+ """
+ hexsha, typename, size = repo.git.get_object_header(id)
+ obj_type = utils.get_object_type_by_name(typename)
+ inst = obj_type(repo, hexsha)
+ inst.size = size
+ return inst
+
+ def _set_self_from_args_(self, args_dict):
+ """
+ Initialize attributes on self from the given dict that was retrieved
+ from locals() in the calling method.
+
+ Will only set an attribute on self if the corresponding value in args_dict
+ is not None
+ """
+ for attr, val in args_dict.items():
+ if attr != "self" and val is not None:
+ setattr( self, attr, val )
+ # END set all non-None attributes
+
+ def _set_cache_(self, attr):
+ """
+ Retrieve object information
+ """
+ if attr == "size":
+ oinfo = self.repo.odb.info(self.sha)
+ self.size = oinfo.size
+ assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
+ elif attr == "data":
+ ostream = self.repo.odb.stream(self.sha)
+ self.size = ostream.size
+ self.data = ostream.read()
+ assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """
+ Returns
+ True if the objects have the same SHA1
+ """
+ return self.sha == other.sha
+
+ def __ne__(self, other):
+ """
+ Returns
+ True if the objects do not have the same SHA1
+ """
+ return self.sha != other.sha
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our id allowing objects to be used in dicts and sets
+ """
+ return hash(self.sha)
+
+ def __str__(self):
+ """
+ Returns
+ string of our SHA1 as understood by all git commands
+ """
+ return self.sha
+
+ def __repr__(self):
+ """
+ Returns
+ string with pythonic representation of our object
+ """
+ return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
- @property
- def data_stream(self):
- """
- Returns
- File Object compatible stream to the uncompressed raw data of the object
- """
- proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
- return utils.ProcessStreamAdapter(proc, "stdout")
+ @property
+ def data_stream(self):
+ """ :return: File Object compatible stream to the uncompressed raw data of the object
+ :note: returned streams must be read in order"""
+ return self.repo.odb.stream(self.sha)
- def stream_data(self, ostream):
- """
- Writes our data directly to the given output stream
-
- ``ostream``
- File object compatible stream object.
-
- Returns
- self
- """
- self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
- return self
+ def stream_data(self, ostream):
+ """Writes our data directly to the given output stream
+ :param ostream: File object compatible stream object.
+ :return: self"""
+ istream = self.repo.odb.stream(self.sha)
+ stream_copy(istream, ostream)
+ return self
+
class IndexObject(Object):
- """
- Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects
- """
- __slots__ = ("path", "mode")
-
- def __init__(self, repo, sha, mode=None, path=None):
- """
- Initialize a newly instanced IndexObject
- ``repo``
- is the Repo we are located in
+ """
+ Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects
+ """
+ __slots__ = ("path", "mode")
+
+ def __init__(self, repo, sha, mode=None, path=None):
+ """
+ Initialize a newly instanced IndexObject
+ ``repo``
+ is the Repo we are located in
- ``sha`` : string
- is the git object id as hex sha
+ ``sha`` : string
+ is the git object id as hex sha
- ``mode`` : int
- is the file mode as int, use the stat module to evaluate the infomration
+ ``mode`` : int
+ is the file mode as int, use the stat module to evaluate the infomration
- ``path`` : str
- is the path to the file in the file system, relative to the git repository root, i.e.
- file.ext or folder/other.ext
-
- NOTE
- Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree.
- """
- super(IndexObject, self).__init__(repo, sha)
- self._set_self_from_args_(locals())
- if isinstance(mode, basestring):
- self.mode = self._mode_str_to_int(mode)
-
- def __hash__(self):
- """
- Returns
- Hash of our path as index items are uniquely identifyable by path, not
- by their data !
- """
- return hash(self.path)
-
- def _set_cache_(self, attr):
- if attr in IndexObject.__slots__:
- # they cannot be retrieved lateron ( not without searching for them )
- raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
- else:
- super(IndexObject, self)._set_cache_(attr)
-
- @classmethod
- def _mode_str_to_int(cls, modestr):
- """
- ``modestr``
- string like 755 or 644 or 100644 - only the last 6 chars will be used
-
- Returns
- String identifying a mode compatible to the mode methods ids of the
- stat module regarding the rwx permissions for user, group and other,
- special flags and file system flags, i.e. whether it is a symlink
- for example.
- """
- mode = 0
- for iteration,char in enumerate(reversed(modestr[-6:])):
- mode += int(char) << iteration*3
- # END for each char
- return mode
-
- @property
- def name(self):
- """
- Returns
- Name portion of the path, effectively being the basename
- """
- return os.path.basename(self.path)
-
- @property
- def abspath(self):
- """
- Returns
- Absolute path to this index object in the file system ( as opposed to the
- .path field which is a path relative to the git repository ).
-
- The returned path will be native to the system and contains '\' on windows.
- """
- return join_path_native(self.repo.working_tree_dir, self.path)
-
+ ``path`` : str
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+
+ NOTE
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree.
+ """
+ super(IndexObject, self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+ if isinstance(mode, basestring):
+ self.mode = self._mode_str_to_int(mode)
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our path as index items are uniquely identifyable by path, not
+ by their data !
+ """
+ return hash(self.path)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
+ @classmethod
+ def _mode_str_to_int(cls, modestr):
+ """
+ ``modestr``
+ string like 755 or 644 or 100644 - only the last 6 chars will be used
+
+ Returns
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example.
+ """
+ mode = 0
+ for iteration,char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
+ @property
+ def name(self):
+ """
+ Returns
+ Name portion of the path, effectively being the basename
+ """
+ return os.path.basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ Returns
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository ).
+
+ The returned path will be native to the system and contains '\' on windows.
+ """
+ return join_path_native(self.repo.working_tree_dir, self.path)
+
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..9a3c2c95 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,434 @@
from git.utils import Iterable
import git.diff as diff
import git.stats as stats
+from git.actor import Actor
from tree import Tree
+from git.odb import IStream
+from cStringIO import StringIO
import base
import utils
-import tempfile
+import time
import os
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
- """
- Wraps a git Commit object.
-
- This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
-
- # object configuration
- type = "commit"
- __slots__ = ("tree",
- "author", "authored_date", "author_tz_offset",
- "committer", "committed_date", "committer_tz_offset",
- "message", "parents")
- _id_attribute_ = "sha"
-
- def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
-
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- ``sha``
- is the sha id of the commit or a ref
-
- ``parents`` : tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
-
- ``tree`` : Tree
- is the corresponding tree id or an actual Tree
-
- ``author`` : Actor
- is the author string ( will be implicitly converted into an Actor object )
-
- ``authored_date`` : int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
-
- ``author_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``committer`` : Actor
- is the committer string
-
- ``committed_date`` : int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
-
- ``committer_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``message`` : string
- is the commit message
-
- Returns
- git.Commit
- """
- super(Commit,self).__init__(repo, sha)
- self._set_self_from_args_(locals())
-
- if parents is not None:
- self.parents = tuple( self.__class__(repo, p) for p in parents )
- # END for each parent to convert
-
- if self.sha and tree is not None:
- self.tree = Tree(repo, tree, path='')
- # END id to tree conversion
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- """
- Called by LazyMixin superclass when the given uninitialized member needs
- to be set.
- We set all values at once.
- """
- if attr in Commit.__slots__:
- # prepare our data lines to match rev-list
- data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.sha)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.author_tz_offset = temp.author_tz_offset
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.committer_tz_offset = temp.committer_tz_offset
- self.message = temp.message
- else:
- super(Commit, self)._set_cache_(attr)
-
- @property
- def summary(self):
- """
- Returns
- First line of the commit message.
- """
- return self.message.split('\n', 1)[0]
-
- def count(self, paths='', **kwargs):
- """
- Count the number of commits reachable from this commit
-
- ``paths``
- is an optinal path or a list of paths restricting the return value
- to commits actually containing the paths
-
- ``kwargs``
- Additional options to be passed to git-rev-list. They must not alter
- the ouput style of the command, or parsing will yield incorrect results
- Returns
- int
- """
- # yes, it makes a difference whether empty paths are given or not in our case
- # as the empty paths version will ignore merge commits for some reason.
- if paths:
- return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
- else:
- return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-
-
- @property
- def name_rev(self):
- """
- Returns
- String describing the commits hex sha based on the closest Reference.
- Mostly useful for UI purposes
- """
- return self.repo.git.name_rev(self)
-
- @classmethod
- def iter_items(cls, repo, rev, paths='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
-
- ``paths``
- is an optinal path or list of paths, if set only Commits that include the path
- or paths will be considered
-
- ``kwargs``
- optional keyword arguments to git rev-list where
- ``max_count`` is the maximum number of commits to fetch
- ``skip`` is the number of commits to skip
- ``since`` all commits since i.e. '1970-01-01'
-
- Returns
- iterator yielding Commit items
- """
- options = {'pretty': 'raw', 'as_process' : True }
- options.update(kwargs)
-
- args = list()
- if paths:
- args.extend(('--', paths))
- # END if paths
-
- proc = repo.git.rev_list(rev, args, **options)
- return cls._iter_from_process_or_stream(repo, proc, True)
-
- def iter_parents(self, paths='', **kwargs):
- """
- Iterate _all_ parents of this commit.
-
- ``paths``
- Optional path or list of paths limiting the Commits to those that
- contain at least one of the paths
-
- ``kwargs``
- All arguments allowed by git-rev-list
-
- Return:
- Iterator yielding Commit objects which are parents of self
- """
- # skip ourselves
- skip = kwargs.get("skip", 1)
- if skip == 0: # skip ourselves
- skip = 1
- kwargs['skip'] = skip
-
- return self.iter_items( self.repo, self, paths, **kwargs )
-
- @property
- def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
- or from all changes done if this is the very first commit.
-
- Return
- git.Stats
- """
- if not self.parents:
- text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
- text2 = ""
- for line in text.splitlines()[1:]:
- (insertions, deletions, filename) = line.split("\t")
- text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
- text = text2
- else:
- text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
- return stats.Stats._list_from_string(self.repo, text)
-
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``proc``
- git-rev-list process instance (raw format)
-
- ``from_rev_list``
- If True, the stream was created by rev-list in which case we parse
- the message differently
- Returns
- iterator returning Commit objects
- """
- stream = proc_or_stream
- if not hasattr(stream,'next'):
- stream = proc_or_stream.stdout
-
- for line in stream:
- commit_tokens = line.split()
- id = commit_tokens[1]
- assert commit_tokens[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
- committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-
- # empty line
- stream.next()
-
- message_lines = []
- if from_rev_list:
- for msg_line in stream:
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- else:
- # a stream from our data simply gives us the plain message
- for msg_line in stream:
- message_lines.append(msg_line)
- # END message parsing
- message = '\n'.join(message_lines)
-
- yield Commit(repo, id, parents=tuple(parents), tree=tree,
- author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
- committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
- message=message)
- # END for each line in stream
-
-
- @classmethod
- def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
- """
- Commit the given tree, creating a commit object.
-
- ``repo``
- is the Repo
-
- ``tree``
- Sha of a tree or a tree object to become the tree of the new commit
-
- ``message``
- Commit message. It may be an empty string if no message is provided.
- It will be converted to a string in any case.
-
- ``parent_commits``
- Optional Commit objects to use as parents for the new commit.
- If empty list, the commit will have no parents at all and become
- a root commit.
- If None , the current head commit will be the parent of the
- new commit object
-
- ``head``
- If True, the HEAD will be advanced to the new commit automatically.
- Else the HEAD will remain pointing on the previous commit. This could
- lead to undesired results when diffing files.
-
- Returns
- Commit object representing the new commit
-
- Note:
- Additional information about hte committer and Author are taken from the
- environment or from the git configuration, see git-commit-tree for
- more information
- """
- parents = parent_commits
- if parent_commits is None:
- try:
- parent_commits = [ repo.head.commit ]
- except ValueError:
- # empty repositories have no head commit
- parent_commits = list()
- # END handle parent commits
- # END if parent commits are unset
-
- parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-
- # create message stream
- tmp_file_path = tempfile.mktemp()
- fp = open(tmp_file_path,"wb")
- fp.write(str(message))
- fp.close()
- fp = open(tmp_file_path,"rb")
- fp.seek(0)
-
- try:
- # write the current index as tree
- commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
- new_commit = cls(repo, commit_sha)
-
- if head:
- try:
- repo.head.commit = new_commit
- except ValueError:
- # head is not yet set to the ref our HEAD points to.
- import git.refs
- master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
- repo.head.reference = master
- # END handle empty repositories
- # END advance head handling
-
- return new_commit
- finally:
- fp.close()
- os.remove(tmp_file_path)
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.sha
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.sha
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
+ """
+ Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary.
+ """
+
+ # ENVIRONMENT VARIABLES
+ # read when creating new commits
+ env_author_name = "GIT_AUTHOR_NAME"
+ env_author_email = "GIT_AUTHOR_EMAIL"
+ env_author_date = "GIT_AUTHOR_DATE"
+ env_committer_name = "GIT_COMMITTER_NAME"
+ env_committer_email = "GIT_COMMITTER_EMAIL"
+ env_committer_date = "GIT_COMMITTER_DATE"
+ env_email = "EMAIL"
+
+ # CONFIGURATION KEYS
+ conf_email = 'email'
+ conf_name = 'name'
+ conf_encoding = 'i18n.commitencoding'
+
+ # INVARIANTS
+ default_encoding = "UTF-8"
+
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree",
+ "author", "authored_date", "author_tz_offset",
+ "committer", "committed_date", "committer_tz_offset",
+ "message", "parents", "encoding")
+ _id_attribute_ = "sha"
+
+ def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """
+ Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set if id names a valid sha.
+
+ The parameter documentation indicates the type of the argument after a colon ':'.
+
+ :param sha: is the sha id of the commit or a ref
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ is the corresponding tree id or an actual Tree
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone.
+ """
+ super(Commit,self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ """ Called by LazyMixin superclass when the given uninitialized member needs
+ to be set.
+ We set all values at once. """
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ # Could use self.data, but lets try to get it with less calls
+ hexsha, typename, size, data = self.repo.git.get_object_data(self)
+ self._deserialize(StringIO(data))
+ else:
+ super(Commit, self)._set_cache_(attr)
+
+ @property
+ def summary(self):
+ """
+ Returns
+ First line of the commit message.
+ """
+ return self.message.split('\n', 1)[0]
+
+ def count(self, paths='', **kwargs):
+ """
+ Count the number of commits reachable from this commit
+
+ ``paths``
+ is an optinal path or a list of paths restricting the return value
+ to commits actually containing the paths
+
+ ``kwargs``
+ Additional options to be passed to git-rev-list. They must not alter
+ the ouput style of the command, or parsing will yield incorrect results
+ Returns
+ int
+ """
+ # yes, it makes a difference whether empty paths are given or not in our case
+ # as the empty paths version will ignore merge commits for some reason.
+ if paths:
+ return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+ else:
+ return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+
+
+ @property
+ def name_rev(self):
+ """
+ Returns
+ String describing the commits hex sha based on the closest Reference.
+ Mostly useful for UI purposes
+ """
+ return self.repo.git.name_rev(self)
+
+ @classmethod
+ def iter_items(cls, repo, rev, paths='', **kwargs):
+ """
+ Find all commits matching the given criteria.
+
+ ``repo``
+ is the Repo
+
+ ``rev``
+ revision specifier, see git-rev-parse for viable options
+
+ ``paths``
+ is an optinal path or list of paths, if set only Commits that include the path
+ or paths will be considered
+
+ ``kwargs``
+ optional keyword arguments to git rev-list where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+ ``since`` all commits since i.e. '1970-01-01'
+
+ Returns
+ iterator yielding Commit items
+ """
+ if 'pretty' in kwargs:
+ raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+ # END handle pretty
+ args = list()
+ if paths:
+ args.extend(('--', paths))
+ # END if paths
+
+ proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+ return cls._iter_from_process_or_stream(repo, proc)
+
+ def iter_parents(self, paths='', **kwargs):
+ """
+ Iterate _all_ parents of this commit.
+
+ ``paths``
+ Optional path or list of paths limiting the Commits to those that
+ contain at least one of the paths
+
+ ``kwargs``
+ All arguments allowed by git-rev-list
+
+ Return:
+ Iterator yielding Commit objects which are parents of self
+ """
+ # skip ourselves
+ skip = kwargs.get("skip", 1)
+ if skip == 0: # skip ourselves
+ skip = 1
+ kwargs['skip'] = skip
+
+ return self.iter_items( self.repo, self, paths, **kwargs )
+
+ @property
+ def stats(self):
+ """
+ Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ Return
+ git.Stats
+ """
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+ return stats.Stats._list_from_string(self.repo, text)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ sha = line.strip()
+ if len(sha) > 40:
+ # split additional information, as returned by bisect for instance
+ sha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(sha) == 40, "Invalid line: %s" % sha
+ yield Commit(repo, sha)
+ # END for each line in stream
+
+
+ @classmethod
+ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+ """Commit the given tree, creating a commit object.
+
+ :param repo: Repo object the commit should be part of
+ :param tree: Sha of a tree or a tree object to become the tree of the new commit
+ :param message: Commit message. It may be an empty string if no message is provided.
+ It will be converted to a string in any case.
+ :param parent_commits:
+ Optional Commit objects to use as parents for the new commit.
+ If empty list, the commit will have no parents at all and become
+ a root commit.
+ If None , the current head commit will be the parent of the
+ new commit object
+ :param head:
+ If True, the HEAD will be advanced to the new commit automatically.
+ Else the HEAD will remain pointing on the previous commit. This could
+ lead to undesired results when diffing files.
+
+ :return: Commit object representing the new commit
+
+ :note:
+ Additional information about the committer and Author are taken from the
+ environment or from the git configuration, see git-commit-tree for
+ more information
+ """
+ parents = parent_commits
+ if parent_commits is None:
+ try:
+ parent_commits = [ repo.head.commit ]
+ except ValueError:
+ # empty repositories have no head commit
+ parent_commits = list()
+ # END handle parent commits
+ # END if parent commits are unset
+
+ # retrieve all additional information, create a commit object, and
+ # serialize it
+ # Generally:
+ # * Environment variables override configuration values
+ # * Sensible defaults are set according to the git documentation
+
+ # COMMITER AND AUTHOR INFO
+ cr = repo.config_reader()
+ env = os.environ
+ default_email = utils.get_user_id()
+ default_name = default_email.split('@')[0]
+
+ conf_name = cr.get_value('user', cls.conf_name, default_name)
+ conf_email = cr.get_value('user', cls.conf_email, default_email)
+
+ author_name = env.get(cls.env_author_name, conf_name)
+ author_email = env.get(cls.env_author_email, default_email)
+
+ committer_name = env.get(cls.env_committer_name, conf_name)
+ committer_email = env.get(cls.env_committer_email, conf_email)
+
+ # PARSE THE DATES
+ unix_time = int(time.time())
+ offset = time.altzone
+
+ author_date_str = env.get(cls.env_author_date, '')
+ if author_date_str:
+ author_time, author_offset = utils.parse_date(author_date_str)
+ else:
+ author_time, author_offset = unix_time, offset
+ # END set author time
+
+ committer_date_str = env.get(cls.env_committer_date, '')
+ if committer_date_str:
+ committer_time, committer_offset = utils.parse_date(committer_date_str)
+ else:
+ committer_time, committer_offset = unix_time, offset
+ # END set committer time
+
+ # assume utf8 encoding
+ enc_section, enc_option = cls.conf_encoding.split('.')
+ conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
+
+ author = Actor(author_name, author_email)
+ committer = Actor(committer_name, committer_email)
+
+
+ # CREATE NEW COMMIT
+ new_commit = cls(repo, cls.NULL_HEX_SHA, tree,
+ author, author_time, author_offset,
+ committer, committer_time, committer_offset,
+ message, parent_commits, conf_encoding)
+
+ stream = StringIO()
+ new_commit._serialize(stream)
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ istream = repo.odb.store(IStream(cls.type, streamlen, stream))
+ new_commit.sha = istream.sha
+
+ if head:
+ try:
+ repo.head.commit = new_commit
+ except ValueError:
+ # head is not yet set to the ref our HEAD points to
+ # Happens on first commit
+ import git.refs
+ master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+ repo.head.reference = master
+ # END handle empty repositories
+ # END advance head handling
+
+ return new_commit
+
+
+ def __str__(self):
+ """ Convert commit to string which is SHA1 """
+ return self.sha
+
+ def __repr__(self):
+ return '<git.Commit "%s">' % self.sha
+
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", a.name, a.email,
+ self.authored_date,
+ utils.altz_to_utctz_str(self.author_tz_offset)))
+
+ write(fmt % ("committer", c.name, c.email,
+ self.committed_date,
+ utils.altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+ write(self.message)
+ return self
+
+ def _deserialize(self, stream):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ readline = stream.readline
+ self.tree = Tree(self.repo, readline().split()[1], 0, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+
+
+ # now we can have the encoding line, or an empty line followed by the optional
+ # message.
+ self.encoding = self.default_encoding
+ # read encoding or empty line to separate message
+ enc = readline()
+ enc = enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # now comes the message separator
+ readline()
+ # END handle encoding
+
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()
+ return self
+
+ #} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
visit_once = False, ignore_self=1 ):
"""For documentation, see utils.Traversable.traverse
- Trees are set to visist_once = False to gain more performance in the traversal"""
+ Trees are set to visit_once = False to gain more performance in the traversal"""
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
# List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,302 @@ Module for general utility functions
import re
from collections import deque as Deque
from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date',
+ 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
+ 'verify_utctz')
def get_object_type_by_name(object_type_name):
- """
- Returns
- type suitable to handle the given object type name.
- Use the type to create new instances.
-
- ``object_type_name``
- Member of TYPES
-
- Raises
- ValueError: In case object_type_name is unknown
- """
- if object_type_name == "commit":
- import commit
- return commit.Commit
- elif object_type_name == "tag":
- import tag
- return tag.TagObject
- elif object_type_name == "blob":
- import blob
- return blob.Blob
- elif object_type_name == "tree":
- import tree
- return tree.Tree
- else:
- raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-
-
+ """
+ Returns
+ type suitable to handle the given object type name.
+ Use the type to create new instances.
+
+ ``object_type_name``
+ Member of TYPES
+
+ Raises
+ ValueError: In case object_type_name is unknown
+ """
+ if object_type_name == "commit":
+ import commit
+ return commit.Commit
+ elif object_type_name == "tag":
+ import tag
+ return tag.TagObject
+ elif object_type_name == "blob":
+ import blob
+ return blob.Blob
+ elif object_type_name == "tree":
+ import tree
+ return tree.Tree
+ else:
+ raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+
+
+def get_user_id():
+ """:return: string identifying the currently active system user as name@node
+ :note: user can be set with the 'USER' environment variable, usually set on windows"""
+ ukn = 'UNKNOWN'
+ username = os.environ.get('USER', ukn)
+ if username == ukn and hasattr(os, 'getlogin'):
+ username = os.getlogin()
+ # END get username from login
+ return "%s@%s" % (username, platform.node())
+
+
+def utctz_to_altz(utctz):
+ """we convert utctz to the timezone in seconds, it is the format time.altzone
+ returns. Git stores it as UTC timezon which has the opposite sign as well,
+ which explains the -1 * ( that was made explicit here )
+ :param utctz: git utc timezone string, i.e. +0200"""
+ return -1 * int(float(utctz)/100*3600)
+
+def altz_to_utctz_str(altz):
+ """As above, but inverses the operation, returning a string that can be used
+ in commit objects"""
+ utci = -1 * int((altz / 3600)*100)
+ utcs = str(abs(utci))
+ utcs = "0"*(4-len(utcs)) + utcs
+ prefix = (utci < 0 and '-') or '+'
+ return prefix + utcs
+
+
+def verify_utctz(offset):
+ """:raise ValueError: if offset is incorrect
+ :return: offset"""
+ fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+ if len(offset) != 5:
+ raise fmt_exc
+ if offset[0] not in "+-":
+ raise fmt_exc
+ if offset[1] not in digits or \
+ offset[2] not in digits or \
+ offset[3] not in digits or \
+ offset[4] not in digits:
+ raise fmt_exc
+ # END for each char
+ return offset
+
+def parse_date(string_date):
+ """
+ Parse the given date as one of the following
+ * Git internal format: timestamp offset
+ * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200.
+ * ISO 8601 2005-04-07T22:13:13
+ The T can be a space as well
+
+ :return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+ :raise ValueError: If the format could not be understood
+ :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY
+ """
+ # git time
+ try:
+ if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+ timestamp, offset = string_date.split()
+ timestamp = int(timestamp)
+ return timestamp, utctz_to_altz(verify_utctz(offset))
+ else:
+ offset = "+0000" # local time by default
+ if string_date[-5] in '-+':
+ offset = verify_utctz(string_date[-5:])
+ string_date = string_date[:-6] # skip space as well
+ # END split timezone info
+
+ # now figure out the date and time portion - split time
+ date_formats = list()
+ splitter = -1
+ if ',' in string_date:
+ date_formats.append("%a, %d %b %Y")
+ splitter = string_date.rfind(' ')
+ else:
+ # iso plus additional
+ date_formats.append("%Y-%m-%d")
+ date_formats.append("%Y.%m.%d")
+ date_formats.append("%m/%d/%Y")
+ date_formats.append("%d.%m.%Y")
+
+ splitter = string_date.rfind('T')
+ if splitter == -1:
+ splitter = string_date.rfind(' ')
+ # END handle 'T' and ' '
+ # END handle rfc or iso
+
+ assert splitter > -1
+
+ # split date and time
+ time_part = string_date[splitter+1:] # skip space
+ date_part = string_date[:splitter]
+
+ # parse time
+ tstruct = time.strptime(time_part, "%H:%M:%S")
+
+ for fmt in date_formats:
+ try:
+ dtstruct = time.strptime(date_part, fmt)
+ fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
+ tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+ dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+ return int(time.mktime(fstruct)), utctz_to_altz(offset)
+ except ValueError:
+ continue
+ # END exception handling
+ # END for each fmt
+
+ # still here ? fail
+ raise ValueError("no format matched")
+ # END handle format
+ except Exception:
+ raise ValueError("Unsupported date format: %s" % string_date)
+ # END handle exceptions
+
+
# precompiled regex
_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
def parse_actor_and_date(line):
- """
- Parse out the actor (author or committer) info from a line like::
-
- author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-
- Returns
- [Actor, int_seconds_since_epoch, int_timezone_offset]
- """
- m = _re_actor_epoch.search(line)
- actor, epoch, offset = m.groups()
- return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-
-
-
+ """
+ Parse out the actor (author or committer) info from a line like::
+
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+
+ Returns
+ [Actor, int_seconds_since_epoch, int_timezone_offset]
+ """
+ m = _re_actor_epoch.search(line)
+ actor, epoch, offset = m.groups()
+ return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
+
+
+
class ProcessStreamAdapter(object):
- """
- Class wireing all calls to the contained Process instance.
-
- Use this type to hide the underlying process to provide access only to a specified
- stream. The process is usually wrapped into an AutoInterrupt class to kill
- it if the instance goes out of scope.
- """
- __slots__ = ("_proc", "_stream")
- def __init__(self, process, stream_name):
- self._proc = process
- self._stream = getattr(process, stream_name)
-
- def __getattr__(self, attr):
- return getattr(self._stream, attr)
-
-
+ """
+ Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope.
+ """
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)
+
+
class Traversable(object):
- """Simple interface to perforam depth-first or breadth-first traversals
- into one direction.
- Subclasses only need to implement one function.
- Instances of the Subclass must be hashable"""
- __slots__ = tuple()
-
- @classmethod
- def _get_intermediate_items(cls, item):
- """
- Returns:
- List of items connected to the given item.
- Must be implemented in subclass
- """
- raise NotImplementedError("To be implemented in subclass")
-
-
- def traverse( self, predicate = lambda i,d: True,
- prune = lambda i,d: False, depth = -1, branch_first=True,
- visit_once = True, ignore_self=1, as_edge = False ):
- """
- ``Returns``
- iterator yieling of items found when traversing self
-
- ``predicate``
- f(i,d) returns False if item i at depth d should not be included in the result
-
- ``prune``
- f(i,d) return True if the search should stop at item i at depth d.
- Item i will not be returned.
-
- ``depth``
- define at which level the iteration should not go deeper
- if -1, there is no limit
- if 0, you would effectively only get self, the root of the iteration
- i.e. if 1, you would only get the first level of predessessors/successors
-
- ``branch_first``
- if True, items will be returned branch first, otherwise depth first
-
- ``visit_once``
- if True, items will only be returned once, although they might be encountered
- several times. Loops are prevented that way.
-
- ``ignore_self``
- if True, self will be ignored and automatically pruned from
- the result. Otherwise it will be the first item to be returned.
- If as_edge is True, the source of the first edge is None
-
- ``as_edge``
- if True, return a pair of items, first being the source, second the
- destinatination, i.e. tuple(src, dest) with the edge spanning from
- source to destination"""
- visited = set()
- stack = Deque()
- stack.append( ( 0 ,self, None ) ) # self is always depth level 0
-
- def addToStack( stack, item, branch_first, depth ):
- lst = self._get_intermediate_items( item )
- if not lst:
- return
- if branch_first:
- stack.extendleft( ( depth , i, item ) for i in lst )
- else:
- reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
- stack.extend( reviter )
- # END addToStack local method
-
- while stack:
- d, item, src = stack.pop() # depth of item, item, item_source
-
- if visit_once and item in visited:
- continue
-
- if visit_once:
- visited.add(item)
-
- rval = ( as_edge and (src, item) ) or item
- if prune( rval, d ):
- continue
-
- skipStartItem = ignore_self and ( item == self )
- if not skipStartItem and predicate( rval, d ):
- yield rval
-
- # only continue to next level if this is appropriate !
- nd = d + 1
- if depth > -1 and nd > depth:
- continue
-
- addToStack( stack, item, branch_first, nd )
- # END for each item on work stack
+ """Simple interface to perforam depth-first or breadth-first traversals
+ into one direction.
+ Subclasses only need to implement one function.
+ Instances of the Subclass must be hashable"""
+ __slots__ = tuple()
+
+ @classmethod
+ def _get_intermediate_items(cls, item):
+ """
+ Returns:
+ List of items connected to the given item.
+ Must be implemented in subclass
+ """
+ raise NotImplementedError("To be implemented in subclass")
+
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = True, ignore_self=1, as_edge = False ):
+ """
+ ``Returns``
+ iterator yieling of items found when traversing self
+
+ ``predicate``
+ f(i,d) returns False if item i at depth d should not be included in the result
+
+ ``prune``
+ f(i,d) return True if the search should stop at item i at depth d.
+ Item i will not be returned.
+
+ ``depth``
+ define at which level the iteration should not go deeper
+ if -1, there is no limit
+ if 0, you would effectively only get self, the root of the iteration
+ i.e. if 1, you would only get the first level of predessessors/successors
+
+ ``branch_first``
+ if True, items will be returned branch first, otherwise depth first
+
+ ``visit_once``
+ if True, items will only be returned once, although they might be encountered
+ several times. Loops are prevented that way.
+
+ ``ignore_self``
+ if True, self will be ignored and automatically pruned from
+ the result. Otherwise it will be the first item to be returned.
+ If as_edge is True, the source of the first edge is None
+
+ ``as_edge``
+ if True, return a pair of items, first being the source, second the
+ destinatination, i.e. tuple(src, dest) with the edge spanning from
+ source to destination"""
+ visited = set()
+ stack = Deque()
+ stack.append( ( 0 ,self, None ) ) # self is always depth level 0
+
+ def addToStack( stack, item, branch_first, depth ):
+ lst = self._get_intermediate_items( item )
+ if not lst:
+ return
+ if branch_first:
+ stack.extendleft( ( depth , i, item ) for i in lst )
+ else:
+ reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+ stack.extend( reviter )
+ # END addToStack local method
+
+ while stack:
+ d, item, src = stack.pop() # depth of item, item, item_source
+
+ if visit_once and item in visited:
+ continue
+
+ if visit_once:
+ visited.add(item)
+
+ rval = ( as_edge and (src, item) ) or item
+ if prune( rval, d ):
+ continue
+
+ skipStartItem = ignore_self and ( item == self )
+ if not skipStartItem and predicate( rval, d ):
+ yield rval
+
+ # only continue to next level if this is appropriate !
+ nd = d + 1
+ if depth > -1 and nd > depth:
+ continue
+
+ addToStack( stack, item, branch_first, nd )
+ # END for each item on work stack
+
+
+class Serializable(object):
+ """Defines methods to serialize and deserialize objects from and into a data stream"""
+
+ def _serialize(self, stream):
+ """Serialize the data of this object into the given data stream
+ :note: a serialized object would ``_deserialize`` into the same objet
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def _deserialize(self, stream):
+ """Deserialize all information regarding this object from the stream
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")
diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py
new file mode 100644
index 00000000..5789d7eb
--- /dev/null
+++ b/lib/git/odb/__init__.py
@@ -0,0 +1,6 @@
+"""Initialize the object database module"""
+
+# default imports
+from db import *
+from stream import *
+
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
new file mode 100644
index 00000000..a8de28ec
--- /dev/null
+++ b/lib/git/odb/db.py
@@ -0,0 +1,337 @@
+"""Contains implementations of database retrieveing objects"""
+from git.utils import IndexFileSHA1Writer
+from git.errors import (
+ InvalidDBRoot,
+ BadObject,
+ BadObjectType
+ )
+
+from stream import (
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ Sha1Writer,
+ OStream,
+ OInfo
+ )
+
+from utils import (
+ ENOENT,
+ to_hex_sha,
+ exists,
+ hex_to_bin,
+ isdir,
+ mkdir,
+ rename,
+ dirname,
+ join
+ )
+
+from fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object
+ )
+
+import tempfile
+import mmap
+import os
+
+
+__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB',
+ 'CompoundDB', 'ReferenceDB', 'GitObjectDB' )
+
+class ObjectDBR(object):
+ """Defines an interface for object database lookup.
+ Objects are identified either by hex-sha (40 bytes) or
+ by sha (20 bytes)"""
+
+ def __contains__(self, sha):
+ return self.has_obj
+
+ #{ Query Interface
+ def has_object(self, sha):
+ """
+ :return: True if the object identified by the given 40 byte hexsha or 20 bytes
+ binary sha is contained in the database
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info(self, sha):
+ """ :return: OInfo instance
+ :param sha: 40 bytes hexsha or 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def info_async(self, input_channel):
+ """Retrieve information of a multitude of objects asynchronously
+ :param input_channel: Channel yielding the sha's of the objects of interest
+ :return: Channel yielding OInfo|InvalidOInfo, in any order"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream(self, sha):
+ """:return: OStream instance
+ :param sha: 40 bytes hexsha or 20 bytes binary sha
+ :raise BadObject:"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def stream_async(self, input_channel):
+ """Retrieve the OStream of multiple objects
+ :param input_channel: see ``info``
+ :param max_threads: see ``ObjectDBW.store``
+ :return: Channel yielding OStream|InvalidOStream instances in any order"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END query interface
+
+class ObjectDBW(object):
+ """Defines an interface to create objects in the database"""
+
+ def __init__(self, *args, **kwargs):
+ self._ostream = None
+
+ #{ Edit Interface
+ def set_ostream(self, stream):
+ """Adjusts the stream to which all data should be sent when storing new objects
+ :param stream: if not None, the stream to use, if None the default stream
+ will be used.
+ :return: previously installed stream, or None if there was no override
+ :raise TypeError: if the stream doesn't have the supported functionality"""
+ cstream = self._ostream
+ self._ostream = stream
+ return cstream
+
+ def ostream(self):
+ """:return: overridden output stream this instance will write to, or None
+ if it will write to the default stream"""
+ return self._ostream
+
+ def store(self, istream):
+ """Create a new object in the database
+ :return: the input istream object with its sha set to its corresponding value
+ :param istream: IStream compatible instance. If its sha is already set
+ to a value, the object will just be stored in the our database format,
+ in which case the input stream is expected to be in object format ( header + contents ).
+ :raise IOError: if data could not be written"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def store_async(self, input_channel):
+ """Create multiple new objects in the database asynchronously. The method will
+ return right away, returning an output channel which receives the results as
+ they are computed.
+
+ :return: Channel yielding your IStream which served as input, in any order.
+ The IStreams sha will be set to the sha it received during the process,
+ or its error attribute will be set to the exception informing about the error.
+ :param input_channel: Channel yielding IStream instance.
+ As the same instances will be used in the output channel, you can create a map
+ between the id(istream) -> istream
+ :note:As some ODB implementations implement this operation as atomic, they might
+ abort the whole operation if one item could not be processed. Hence check how
+ many items have actually been produced."""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END edit interface
+
+
+class FileDBBase(object):
+ """Provides basic facilities to retrieve files of interest, including
+ caching facilities to help mapping hexsha's to objects"""
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ :note: The base will perform basic checking for accessability, but the subclass
+ is required to verify that the root_path contains the database structure it needs"""
+ super(FileDBBase, self).__init__()
+ if not os.path.isdir(root_path):
+ raise InvalidDBRoot(root_path)
+ self._root_path = root_path
+
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ return self._root_path
+
+ def db_path(self, rela_path):
+ """
+ :return: the given relative path relative to our database root, allowing
+ to pontentially access datafiles"""
+ return join(self._root_path, rela_path)
+ #} END interface
+
+
+
+class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW):
+ """A database which operates on loose object files"""
+
+ # CONFIGURATION
+ # chunks in which data will be copied between streams
+ stream_chunk_size = chunk_size
+
+
+ def __init__(self, root_path):
+ super(LooseObjectDB, self).__init__(root_path)
+ self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = getattr(os, 'O_NOATIME', 0)
+
+ #{ Interface
+ def object_path(self, hexsha):
+ """
+ :return: path at which the object with the given hexsha would be stored,
+ relative to the database root"""
+ return join(hexsha[:2], hexsha[2:])
+
+ def readable_db_object_path(self, hexsha):
+ """
+ :return: readable object path to the object identified by hexsha
+ :raise BadObject: If the object file does not exist"""
+ try:
+ return self._hexsha_to_file[hexsha]
+ except KeyError:
+ pass
+ # END ignore cache misses
+
+ # try filesystem
+ path = self.db_path(self.object_path(hexsha))
+ if exists(path):
+ self._hexsha_to_file[hexsha] = path
+ return path
+ # END handle cache
+ raise BadObject(hexsha)
+
+ #} END interface
+
+ def _map_loose_object(self, sha):
+ """
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(to_hex_sha(sha)))
+ try:
+ fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ fd = os.open(db_path, os.O_RDONLY)
+ except OSError:
+ raise BadObject(to_hex_sha(sha))
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(to_hex_sha(sha))
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
+
+ def set_ostream(self, stream):
+ """:raise TypeError: if the stream does not support the Sha1Writer interface"""
+ if stream is not None and not isinstance(stream, Sha1Writer):
+ raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__)
+ return super(LooseObjectDB, self).set_ostream(stream)
+
+ def info(self, sha):
+ m = self._map_loose_object(sha)
+ try:
+ type, size = loose_object_header_info(m)
+ return OInfo(sha, type, size)
+ finally:
+ m.close()
+ # END assure release of system resources
+
+ def stream(self, sha):
+ m = self._map_loose_object(sha)
+ type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True)
+ return OStream(sha, type, size, stream)
+
+ def has_object(self, sha):
+ try:
+ self.readable_db_object_path(to_hex_sha(sha))
+ return True
+ except BadObject:
+ return False
+ # END check existance
+
+ def store(self, istream):
+ """note: The sha we produce will be hex by nature"""
+ assert istream.sha is None, "Direct istream writing not yet implemented"
+ tmp_path = None
+ writer = self.ostream()
+ if writer is None:
+ # open a tmp file to write the data to
+ fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
+ writer = FDCompressedSha1Writer(fd)
+ # END handle custom writer
+
+ try:
+ try:
+ write_object(istream.type, istream.size, istream.read, writer.write,
+ chunk_size=self.stream_chunk_size)
+ except:
+ if tmp_path:
+ os.remove(tmp_path)
+ raise
+ # END assure tmpfile removal on error
+ finally:
+ if tmp_path:
+ writer.close()
+ # END assure target stream is closed
+
+ sha = writer.sha(as_hex=True)
+
+ if tmp_path:
+ obj_path = self.db_path(self.object_path(sha))
+ obj_dir = dirname(obj_path)
+ if not isdir(obj_dir):
+ mkdir(obj_dir)
+ # END handle destination directory
+ rename(tmp_path, obj_path)
+ # END handle dry_run
+
+ istream.sha = sha
+ return istream
+
+
+class PackedDB(FileDBBase, ObjectDBR):
+ """A database operating on a set of object packs"""
+
+
+class CompoundDB(ObjectDBR):
+ """A database which delegates calls to sub-databases"""
+
+
+class ReferenceDB(CompoundDB):
+ """A database consisting of database referred to in a file"""
+
+
+#class GitObjectDB(CompoundDB, ObjectDBW):
+class GitObjectDB(LooseObjectDB):
+ """A database representing the default git object store, which includes loose
+ objects, pack files and an alternates file
+
+ It will create objects only in the loose object database.
+ :note: for now, we use the git command to do all the lookup, just until he
+ have packs and the other implementations
+ """
+ def __init__(self, root_path, git):
+ """Initialize this instance with the root and a git command"""
+ super(GitObjectDB, self).__init__(root_path)
+ self._git = git
+
+ def info(self, sha):
+ t = self._git.get_object_header(sha)
+ return OInfo(t[0], t[1], t[2])
+
+ def stream(self, sha):
+ """For now, all lookup is done by git itself"""
+ t = self._git.stream_object_data(sha)
+ return OStream(t[0], t[1], t[2], t[3])
+
diff --git a/lib/git/odb/fun.py b/lib/git/odb/fun.py
new file mode 100644
index 00000000..870a6f02
--- /dev/null
+++ b/lib/git/odb/fun.py
@@ -0,0 +1,108 @@
+"""Contains basic c-functions which usually contain performance critical code
+Keeping this code separate from the beginning makes it easier to out-source
+it into c later, if required"""
+
+from git.errors import (
+ BadObjectType
+ )
+
+import zlib
+decompressobj = zlib.decompressobj
+
+
+# INVARIANTS
+type_id_to_type_map = {
+ 1 : "commit",
+ 2 : "tree",
+ 3 : "blob",
+ 4 : "tag"
+ }
+
+# used when dealing with larger streams
+chunk_size = 1000*1000
+
+__all__ = ('is_loose_object', 'loose_object_header_info', 'object_header_info',
+ 'write_object' )
+
+#{ Routines
+
+def is_loose_object(m):
+ """:return: True the file contained in memory map m appears to be a loose object.
+ Only the first two bytes are needed"""
+ b0, b1 = map(ord, m[:2])
+ word = (b0 << 8) + b1
+ return b0 == 0x78 and (word % 31) == 0
+
+def loose_object_header_info(m):
+ """:return: tuple(type_string, uncompressed_size_in_bytes) the type string of the
+ object as well as its uncompressed size in bytes.
+ :param m: memory map from which to read the compressed object data"""
+ decompress_size = 8192 # is used in cgit as well
+ hdr = decompressobj().decompress(m, decompress_size)
+ type_name, size = hdr[:hdr.find("\0")].split(" ")
+ return type_name, int(size)
+
+def object_header_info(m):
+ """:return: tuple(type_string, uncompressed_size_in_bytes
+ :param mmap: mapped memory map. It will be
+ seeked to the actual start of the object contents, which can be used
+ to initialize a zlib decompress object.
+ :note: This routine can only handle new-style objects which are assumably contained
+ in packs
+ """
+ assert not is_loose_object(m), "Use loose_object_header_info instead"
+
+ c = b0 # first byte
+ i = 1 # next char to read
+ type_id = (c >> 4) & 7 # numeric type
+ size = c & 15 # starting size
+ s = 4 # starting bit-shift size
+ while c & 0x80:
+ c = ord(m[i])
+ i += 1
+ size += (c & 0x7f) << s
+ s += 7
+ # END character loop
+
+ # finally seek the map to the start of the data stream
+ m.seek(i)
+ try:
+ return (type_id_to_type_map[type_id], size)
+ except KeyError:
+ # invalid object type - we could try to be smart now and decode part
+ # of the stream to get the info, problem is that we had trouble finding
+ # the exact start of the content stream
+ raise BadObjectType(type_id)
+ # END handle exceptions
+
+def write_object(type, size, read, write, chunk_size=chunk_size):
+ """Write the object as identified by type, size and source_stream into the
+ target_stream
+
+ :param type: type string of the object
+ :param size: amount of bytes to write from source_stream
+ :param read: read method of a stream providing the content data
+ :param write: write method of the output stream
+ :param close_target_stream: if True, the target stream will be closed when
+ the routine exits, even if an error is thrown
+ :return: The actual amount of bytes written to stream, which includes the header and a trailing newline"""
+ tbw = 0 # total num bytes written
+ dbw = 0 # num data bytes written
+
+ # WRITE HEADER: type SP size NULL
+ tbw += write("%s %i\0" % (type, size))
+
+ # WRITE ALL DATA UP TO SIZE
+ while True:
+ cs = min(chunk_size, size-dbw)
+ data_len = write(read(cs))
+ dbw += data_len
+ if data_len < cs or dbw == size:
+ tbw += dbw
+ break
+ # END check for stream end
+ # END duplicate data
+ return tbw
+
+
+#} END routines
diff --git a/lib/git/odb/stream.py b/lib/git/odb/stream.py
new file mode 100644
index 00000000..d1181382
--- /dev/null
+++ b/lib/git/odb/stream.py
@@ -0,0 +1,446 @@
+import zlib
+from cStringIO import StringIO
+from git.utils import make_sha
+import errno
+
+from utils import (
+ to_hex_sha,
+ to_bin_sha,
+ write,
+ close
+ )
+
+__all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream',
+ 'DecompressMemMapReader', 'FDCompressedSha1Writer')
+
+
+# ZLIB configuration
+# used when compressing objects - 1 to 9 ( slowest )
+Z_BEST_SPEED = 1
+
+
+#{ ODB Bases
+
+class OInfo(tuple):
+ """Carries information about an object in an ODB, provdiing information
+ about the sha of the object, the type_string as well as the uncompressed size
+ in bytes.
+
+ It can be accessed using tuple notation and using attribute access notation::
+
+ assert dbi[0] == dbi.sha
+ assert dbi[1] == dbi.type
+ assert dbi[2] == dbi.size
+
+ The type is designed to be as lighteight as possible."""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size):
+ return tuple.__new__(cls, (sha, type, size))
+
+ def __init__(self, *args):
+ tuple.__init__(self)
+
+ #{ Interface
+ @property
+ def sha(self):
+ return self[0]
+
+ @property
+ def type(self):
+ return self[1]
+
+ @property
+ def size(self):
+ return self[2]
+ #} END interface
+
+
+class OStream(OInfo):
+ """Base for object streams retrieved from the database, providing additional
+ information about the stream.
+ Generally, ODB streams are read-only as objects are immutable"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, type, size, stream, *args, **kwargs):
+ """Helps with the initialization of subclasses"""
+ return tuple.__new__(cls, (sha, type, size, stream))
+
+
+ def __init__(self, *args, **kwargs):
+ tuple.__init__(self)
+ #{ Interface
+
+ def is_compressed(self):
+ """:return: True if reads of this stream yield zlib compressed data. Default False
+ :note: this does not imply anything about the actual internal storage.
+ Hence the data could be uncompressed, but read compressed, or vice versa"""
+ raise False
+
+ #} END interface
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ return self[3].read(size)
+
+ #} END stream reader interface
+
+
+class IStream(list):
+ """Represents an input content stream to be fed into the ODB. It is mutable to allow
+ the ODB to record information about the operations outcome right in this instance.
+
+ It provides interfaces for the OStream and a StreamReader to allow the instance
+ to blend in without prior conversion.
+
+ The only method your content stream must support is 'read'"""
+ __slots__ = tuple()
+
+ def __new__(cls, type, size, stream, sha=None, compressed=False):
+ return list.__new__(cls, (sha, type, size, stream, compressed, None))
+
+ def __init__(self, type, size, stream, sha=None, compressed=None):
+ list.__init__(self, (sha, type, size, stream, compressed, None))
+
+ #{ Interface
+
+ def hexsha(self):
+ """:return: our sha, hex encoded, 40 bytes"""
+ return to_hex_sha(self[0])
+
+ def binsha(self):
+ """:return: our sha as binary, 20 bytes"""
+ return to_bin_sha(self[0])
+
+ def _error(self):
+ """:return: the error that occurred when processing the stream, or None"""
+ return self[5]
+
+ def _set_error(self, exc):
+ """Set this input stream to the given exc, may be None to reset the error"""
+ self[5] = exc
+
+ error = property(_error, _set_error)
+
+ #} END interface
+
+ #{ Stream Reader Interface
+
+ def read(self, size=-1):
+ """Implements a simple stream reader interface, passing the read call on
+ to our internal stream"""
+ return self[3].read(size)
+
+ #} END stream reader interface
+
+ #{ interface
+
+ def _set_sha(self, sha):
+ self[0] = sha
+
+ def _sha(self):
+ return self[0]
+
+ sha = property(_sha, _set_sha)
+
+
+ def _type(self):
+ return self[1]
+
+ def _set_type(self, type):
+ self[1] = type
+
+ type = property(_type, _set_type)
+
+ def _size(self):
+ return self[2]
+
+ def _set_size(self, size):
+ self[2] = size
+
+ size = property(_size, _set_size)
+
+ def _stream(self):
+ return self[3]
+
+ def _set_stream(self, stream):
+ self[3] = stream
+
+ stream = property(_stream, _set_stream)
+
+ #} END odb info interface
+
+ #{ OStream interface
+
+ def is_compressed(self):
+ return self[4]
+
+ #} END OStream interface
+
+
+class InvalidOInfo(tuple):
+ """Carries information about a sha identifying an object which is invalid in
+ the queried database. The exception attribute provides more information about
+ the cause of the issue"""
+ __slots__ = tuple()
+
+ def __new__(cls, sha, exc):
+ return tuple.__new__(cls, (sha, exc))
+
+ def __init__(self, sha, exc):
+ tuple.__init__(self, (sha, exc))
+
+ @property
+ def sha(self):
+ return self[0]
+
+ @property
+ def error(self):
+ """:return: exception instance explaining the failure"""
+ return self[1]
+
+
+class InvalidOStream(InvalidOInfo):
+ """Carries information about an invalid ODB stream"""
+ __slots__ = tuple()
+
+#} END ODB Bases
+
+
+#{ RO Streams
+
+class DecompressMemMapReader(object):
+ """Reads data in chunks from a memory map and decompresses it. The client sees
+ only the uncompressed data, respective file-like read calls are handling on-demand
+ buffered decompression accordingly
+
+ A constraint on the total size of bytes is activated, simulating
+ a logical file within a possibly larger physical memory area
+
+ To read efficiently, you clearly don't want to read individual bytes, instead,
+ read a few kilobytes at least.
+
+ :note: The chunk-size should be carefully selected as it will involve quite a bit
+ of string copying due to the way the zlib is implemented. Its very wasteful,
+ hence we try to find a good tradeoff between allocation time and number of
+ times we actually allocate. An own zlib implementation would be good here
+ to better support streamed reading - it would only need to keep the mmap
+ and decompress it into chunks, thats all ... """
+ __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close')
+
+ max_read_size = 512*1024
+
+ def __init__(self, m, close_on_deletion, size):
+ """Initialize with mmap for stream reading"""
+ self._m = m
+ self._zip = zlib.decompressobj()
+ self._buf = None # buffer of decompressed bytes
+ self._buflen = 0 # length of bytes in buffer
+ self._s = size # size of uncompressed data to read in total
+ self._br = 0 # num uncompressed bytes read
+ self._cws = 0 # start byte of compression window
+ self._cwe = 0 # end byte of compression window
+ self._close = close_on_deletion # close the memmap on deletion ?
+
+ def __del__(self):
+ if self._close:
+ self._m.close()
+ # END handle resource freeing
+
+ @classmethod
+ def new(self, m, close_on_deletion=False):
+ """Create a new DecompressMemMapReader instance for acting as a read-only stream
+ This method parses the object header from m and returns the parsed
+ type and size, as well as the created stream instance.
+ :param m: memory map on which to oparate
+ :param close_on_deletion: if True, the memory map will be closed once we are
+ being deleted"""
+ inst = DecompressMemMapReader(m, close_on_deletion, 0)
+
+ # read header
+ maxb = 512 # should really be enough, cgit uses 8192 I believe
+ inst._s = maxb
+ hdr = inst.read(maxb)
+ hdrend = hdr.find("\0")
+ type, size = hdr[:hdrend].split(" ")
+ size = int(size)
+ inst._s = size
+
+ # adjust internal state to match actual header length that we ignore
+ # The buffer will be depleted first on future reads
+ inst._br = 0
+ hdrend += 1 # count terminating \0
+ inst._buf = StringIO(hdr[hdrend:])
+ inst._buflen = len(hdr) - hdrend
+
+ return type, size, inst
+
+ def read(self, size=-1):
+ if size < 1:
+ size = self._s - self._br
+ else:
+ size = min(size, self._s - self._br)
+ # END clamp size
+
+ if size == 0:
+ return str()
+ # END handle depletion
+
+ # protect from memory peaks
+ # If he tries to read large chunks, our memory patterns get really bad
+ # as we end up copying a possibly huge chunk from our memory map right into
+ # memory. This might not even be possible. Nonetheless, try to dampen the
+ # effect a bit by reading in chunks, returning a huge string in the end.
+ # Our performance now depends on StringIO. This way we don't need two large
+ # buffers in peak times, but only one large one in the end which is
+ # the return buffer
+ # NO: We don't do it - if the user thinks its best, he is right. If he
+ # has trouble, he will start reading in chunks. According to our tests
+ # its still faster if we read 10 Mb at once instead of chunking it.
+
+ # if size > self.max_read_size:
+ # sio = StringIO()
+ # while size:
+ # read_size = min(self.max_read_size, size)
+ # data = self.read(read_size)
+ # sio.write(data)
+ # size -= len(data)
+ # if len(data) < read_size:
+ # break
+ # # END data loop
+ # sio.seek(0)
+ # return sio.getvalue()
+ # # END handle maxread
+ #
+ # deplete the buffer, then just continue using the decompress object
+ # which has an own buffer. We just need this to transparently parse the
+ # header from the zlib stream
+ dat = str()
+ if self._buf:
+ if self._buflen >= size:
+ # have enough data
+ dat = self._buf.read(size)
+ self._buflen -= size
+ self._br += size
+ return dat
+ else:
+ dat = self._buf.read() # ouch, duplicates data
+ size -= self._buflen
+ self._br += self._buflen
+
+ self._buflen = 0
+ self._buf = None
+ # END handle buffer len
+ # END handle buffer
+
+ # decompress some data
+ # Abstract: zlib needs to operate on chunks of our memory map ( which may
+ # be large ), as it will otherwise and always fill in the 'unconsumed_tail'
+ # attribute which possible reads our whole map to the end, forcing
+ # everything to be read from disk even though just a portion was requested.
+ # As this would be a nogo, we workaround it by passing only chunks of data,
+ # moving the window into the memory map along as we decompress, which keeps
+ # the tail smaller than our chunk-size. This causes 'only' the chunk to be
+ # copied once, and another copy of a part of it when it creates the unconsumed
+ # tail. We have to use it to hand in the appropriate amount of bytes durin g
+ # the next read.
+ tail = self._zip.unconsumed_tail
+ if tail:
+ # move the window, make it as large as size demands. For code-clarity,
+ # we just take the chunk from our map again instead of reusing the unconsumed
+ # tail. The latter one would safe some memory copying, but we could end up
+ # with not getting enough data uncompressed, so we had to sort that out as well.
+ # Now we just assume the worst case, hence the data is uncompressed and the window
+ # needs to be as large as the uncompressed bytes we want to read.
+ self._cws = self._cwe - len(tail)
+ self._cwe = self._cws + size
+
+
+ indata = self._m[self._cws:self._cwe] # another copy ... :(
+ # get the actual window end to be sure we don't use it for computations
+ self._cwe = self._cws + len(indata)
+ else:
+ cws = self._cws
+ self._cws = self._cwe
+ self._cwe = cws + size
+ indata = self._m[self._cws:self._cwe] # ... copy it again :(
+ # END handle tail
+
+ dcompdat = self._zip.decompress(indata, size)
+
+ self._br += len(dcompdat)
+ if dat:
+ dcompdat = dat + dcompdat
+
+ return dcompdat
+
+#} END RO streams
+
+
+#{ W Streams
+
+class Sha1Writer(object):
+ """Simple stream writer which produces a sha whenever you like as it degests
+ everything it is supposed to write"""
+
+ def __init__(self):
+ self.sha1 = make_sha("")
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ return len(data)
+
+ # END stream interface
+
+ #{ Interface
+
+ def sha(self, as_hex = False):
+ """:return: sha so far
+ :param as_hex: if True, sha will be hex-encoded, binary otherwise"""
+ if as_hex:
+ return self.sha1.hexdigest()
+ return self.sha1.digest()
+
+ #} END interface
+
+class FDCompressedSha1Writer(Sha1Writer):
+ """Digests data written to it, making the sha available, then compress the
+ data and write it to the file descriptor
+ :note: operates on raw file descriptors
+ :note: for this to work, you have to use the close-method of this instance"""
+ __slots__ = ("fd", "sha1", "zip")
+
+ # default exception
+ exc = IOError("Failed to write all bytes to filedescriptor")
+
+ def __init__(self, fd):
+ super(FDCompressedSha1Writer, self).__init__()
+ self.fd = fd
+ self.zip = zlib.compressobj(Z_BEST_SPEED)
+
+ #{ Stream Interface
+
+ def write(self, data):
+ """:raise IOError: If not all bytes could be written
+ :return: lenght of incoming data"""
+ self.sha1.update(data)
+ cdata = self.zip.compress(data)
+ bytes_written = write(self.fd, cdata)
+ if bytes_written != len(cdata):
+ raise self.exc
+ return len(data)
+
+ def close(self):
+ remainder = self.zip.flush()
+ if write(self.fd, remainder) != len(remainder):
+ raise self.exc
+ return close(self.fd)
+
+ #} END stream interface
+
+#} END W streams
diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py
new file mode 100644
index 00000000..6863e97b
--- /dev/null
+++ b/lib/git/odb/utils.py
@@ -0,0 +1,38 @@
+import binascii
+import os
+import errno
+
+#{ Routines
+
+hex_to_bin = binascii.a2b_hex
+bin_to_hex = binascii.b2a_hex
+
+def to_hex_sha(sha):
+ """:return: hexified version of sha"""
+ if len(sha) == 40:
+ return sha
+ return bin_to_hex(sha)
+
+def to_bin_sha(sha):
+ if len(sha) == 20:
+ return sha
+ return hex_to_bin(sha)
+
+# errors
+ENOENT = errno.ENOENT
+
+# os shortcuts
+exists = os.path.exists
+mkdir = os.mkdir
+isdir = os.path.isdir
+rename = os.rename
+dirname = os.path.dirname
+join = os.path.join
+read = os.read
+write = os.write
+close = os.close
+
+
+#} END Routines
+
+
diff --git a/lib/git/repo.py b/lib/git/repo.py
index f4caa3fb..78e5f526 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -4,12 +4,6 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import os
-import sys
-import re
-import gzip
-import StringIO
-
from errors import InvalidGitRepositoryError, NoSuchPathError
from cmd import Git
from actor import Actor
@@ -19,6 +13,15 @@ from objects import *
from config import GitConfigParser
from remote import Remote
+from odb import GitObjectDB
+
+import os
+import sys
+import re
+import gzip
+import StringIO
+
+
def touch(filename):
fp = open(filename, "a")
fp.close()
@@ -53,7 +56,7 @@ class Repo(object):
'git_dir' is the .git repository directoy, which is always set.
"""
DAEMON_EXPORT_FILE = 'git-daemon-export-ok'
- __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" )
+ __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" )
# precompiled regex
re_whitespace = re.compile(r'\s+')
@@ -65,27 +68,22 @@ class Repo(object):
# represents the configuration level of a configuration file
config_level = ("system", "global", "repository")
- def __init__(self, path=None):
- """
- Create a new Repo instance
+ def __init__(self, path=None, odbt = GitObjectDB):
+ """ Create a new Repo instance
- ``path``
- is the path to either the root git directory or the bare git repo
-
- Examples::
+ :param path: is the path to either the root git directory or the bare git repo::
repo = Repo("/Users/mtrier/Development/git-python")
repo = Repo("/Users/mtrier/Development/git-python.git")
repo = Repo("~/Development/git-python.git")
repo = Repo("$REPOSITORIES/Development/git-python.git")
-
- Raises
- InvalidGitRepositoryError or NoSuchPathError
-
- Returns
- ``git.Repo``
- """
-
+
+ :param odbt: Object DataBase type - a type which is constructed by providing
+ the directory containing the database objects, i.e. .git/objects. It will
+ be used to access all object data
+ :raise InvalidGitRepositoryError:
+ :raise NoSuchPathError:
+ :return: git.Repo """
epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd())))
if not os.path.exists(epath):
@@ -130,6 +128,12 @@ class Repo(object):
self.working_dir = self._working_tree_dir or self.git_dir
self.git = Git(self.working_dir)
+
+ # special handling, in special times
+ args = [os.path.join(self.git_dir, 'objects')]
+ if issubclass(odbt, GitObjectDB):
+ args.append(self.git)
+ self.odb = odbt(*args)
def __eq__(self, rhs):
if isinstance(rhs, Repo):
diff --git a/lib/git/utils.py b/lib/git/utils.py
index c21528b1..60a7de48 100644
--- a/lib/git/utils.py
+++ b/lib/git/utils.py
@@ -27,6 +27,21 @@ def make_sha(source=''):
sha1 = sha.sha(source)
return sha1
+def stream_copy(source, destination, chunk_size=512*1024):
+ """Copy all data from the source stream into the destination stream in chunks
+ of size chunk_size
+ :return: amount of bytes written"""
+ br = 0
+ while True:
+ chunk = source.read(chunk_size)
+ destination.write(chunk)
+ br += len(chunk)
+ if len(chunk) < chunk_size:
+ break
+ # END reading output stream
+ return br
+
+
def join_path(a, *p):
"""Join path tokens together similar to os.path.join, but always use
'/' instead of possibly '\' on windows."""
@@ -61,12 +76,14 @@ def join_path_native(a, *p):
return to_native_path(join_path(a, *p))
-class SHA1Writer(object):
+class IndexFileSHA1Writer(object):
"""
Wrapper around a file-like object that remembers the SHA1 of
the data written to it. It will write a sha when the stream is closed
or if the asked for explicitly usign write_sha.
+ Only useful to the indexfile
+
Note:
Based on the dulwich project
"""
@@ -78,7 +95,7 @@ class SHA1Writer(object):
def write(self, data):
self.sha1.update(data)
- self.f.write(data)
+ return self.f.write(data)
def write_sha(self):
sha = self.sha1.digest()
diff --git a/test/fixtures/rev_list b/test/fixtures/rev_list
index 95a1ebff..1a576118 100644
--- a/test/fixtures/rev_list
+++ b/test/fixtures/rev_list
@@ -1,24 +1,3 @@
-commit 4c8124ffcf4039d292442eeccabdeca5af5c5017
-tree 672eca9b7f9e09c22dcb128c283e8c3c8d7697a4
-parent 634396b2f541a9f2d58b00be1a07f0c358b999b3
-author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-
- implement Grit#heads
-
-commit 634396b2f541a9f2d58b00be1a07f0c358b999b3
-tree b35b4bf642d667fdd613eebcfe4e17efd420fb8a
-author Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1191997100 -0700
-
- initial grit setup
-
-commit ab25fd8483882c3bda8a458ad2965d2248654335
-tree c20b5ec543bde1e43a931449b196052c06ed8acc
-parent 6e64c55896aabb9a7d8e9f8f296f426d21a78c2c
-parent 7f874954efb9ba35210445be456c74e037ba6af2
-author Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700
-committer Tom Preston-Werner <tom@mojombo.com> 1182645538 -0700
-
- Merge branch 'site'
- Some other stuff
+4c8124ffcf4039d292442eeccabdeca5af5c5017
+634396b2f541a9f2d58b00be1a07f0c358b999b3
+ab25fd8483882c3bda8a458ad2965d2248654335
diff --git a/test/git/performance/lib.py b/test/git/performance/lib.py
new file mode 100644
index 00000000..7d2a9f4a
--- /dev/null
+++ b/test/git/performance/lib.py
@@ -0,0 +1,65 @@
+"""Contains library functions"""
+import os
+from test.testlib import *
+import shutil
+import tempfile
+
+from git import (
+ Repo
+ )
+
+#{ Invvariants
+k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE"
+#} END invariants
+
+
+#{ Utilities
+def resolve_or_fail(env_var):
+ """:return: resolved environment variable or raise EnvironmentError"""
+ try:
+ return os.environ[env_var]
+ except KeyError:
+ raise EnvironmentError("Please set the %r envrionment variable and retry" % env_var)
+ # END exception handling
+
+#} END utilities
+
+
+#{ Base Classes
+
+class TestBigRepoR(TestBase):
+ """TestCase providing access to readonly 'big' repositories using the following
+ member variables:
+
+ * gitrepo
+
+ * Read-Only git repository - actually the repo of git itself"""
+
+ #{ Invariants
+ head_sha_2k = '235d521da60e4699e5bd59ac658b5b48bd76ddca'
+ head_sha_50 = '32347c375250fd470973a5d76185cac718955fd5'
+ #} END invariants
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestBigRepoR, cls).setUpAll()
+ cls.gitrorepo = Repo(resolve_or_fail(k_env_git_repo))
+
+
+class TestBigRepoRW(TestBigRepoR):
+ """As above, but provides a big repository that we can write to.
+
+ Provides ``self.gitrwrepo``"""
+
+ @classmethod
+ def setUpAll(cls):
+ super(TestBigRepoRW, cls).setUpAll()
+ dirname = tempfile.mktemp()
+ os.mkdir(dirname)
+ cls.gitrwrepo = cls.gitrorepo.clone(dirname, shared=True, bare=True)
+
+ @classmethod
+ def tearDownAll(cls):
+ shutil.rmtree(cls.gitrwrepo.working_dir)
+
+#} END base classes
diff --git a/test/git/performance/test_commit.py b/test/git/performance/test_commit.py
new file mode 100644
index 00000000..0571d0d9
--- /dev/null
+++ b/test/git/performance/test_commit.py
@@ -0,0 +1,98 @@
+# test_performance.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+from lib import *
+from git import *
+from test.git.test_commit import assert_commit_serialization
+from cStringIO import StringIO
+from time import time
+import sys
+
+class TestPerformance(TestBigRepoRW):
+
+ # ref with about 100 commits in its history
+ ref_100 = '0.1.6'
+
+ def _query_commit_info(self, c):
+ c.author
+ c.authored_date
+ c.author_tz_offset
+ c.committer
+ c.committed_date
+ c.committer_tz_offset
+ c.message
+ c.parents
+
+ def test_iteration(self):
+ no = 0
+ nc = 0
+
+ # find the first commit containing the given path - always do a full
+ # iteration ( restricted to the path in question ), but in fact it should
+ # return quite a lot of commits, we just take one and hence abort the operation
+
+ st = time()
+ for c in self.rorepo.iter_commits(self.ref_100):
+ nc += 1
+ self._query_commit_info(c)
+ for obj in c.tree.traverse():
+ obj.size
+ no += 1
+ # END for each object
+ # END for each commit
+ elapsed_time = time() - st
+ print >> sys.stderr, "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (nc, no, elapsed_time, no/elapsed_time)
+
+ def test_commit_traversal(self):
+ # bound to cat-file parsing performance
+ nc = 0
+ st = time()
+ for c in self.gitrorepo.commit(self.head_sha_2k).traverse(branch_first=False):
+ nc += 1
+ self._query_commit_info(c)
+ # END for each traversed commit
+ elapsed_time = time() - st
+ print >> sys.stderr, "Traversed %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
+
+ def test_commit_iteration(self):
+ # bound to stream parsing performance
+ nc = 0
+ st = time()
+ for c in Commit.iter_items(self.gitrorepo, self.head_sha_2k):
+ nc += 1
+ self._query_commit_info(c)
+ # END for each traversed commit
+ elapsed_time = time() - st
+ print >> sys.stderr, "Iterated %i Commits in %s [s] ( %f commits/s )" % (nc, elapsed_time, nc/elapsed_time)
+
+ def test_commit_serialization(self):
+ assert_commit_serialization(self.gitrwrepo, self.head_sha_2k, True)
+
+ rwrepo = self.gitrwrepo
+ make_object = rwrepo.odb.store
+ # direct serialization - deserialization can be tested afterwards
+ # serialization is probably limited on IO
+ hc = rwrepo.commit(self.head_sha_2k)
+
+ commits = list()
+ nc = 5000
+ st = time()
+ for i in xrange(nc):
+ cm = Commit( rwrepo, Commit.NULL_HEX_SHA, hc.tree,
+ hc.author, hc.authored_date, hc.author_tz_offset,
+ hc.committer, hc.committed_date, hc.committer_tz_offset,
+ str(i), parents=hc.parents, encoding=hc.encoding)
+
+ stream = StringIO()
+ cm._serialize(stream)
+ slen = stream.tell()
+ stream.seek(0)
+
+ cm.sha = make_object(IStream(Commit.type, slen, stream)).sha
+ # END commit creation
+ elapsed = time() - st
+
+ print >> sys.stderr, "Serialized %i commits to loose objects in %f s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
new file mode 100644
index 00000000..7b1ee838
--- /dev/null
+++ b/test/git/performance/test_odb.py
@@ -0,0 +1,61 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from lib import (
+ TestBigRepoR
+ )
+
+
+class TestObjDBPerformance(TestBigRepoR):
+
+ def test_random_access(self):
+
+ # GET COMMITS
+ # TODO: use the actual db for this
+ st = time()
+ root_commit = self.gitrorepo.commit(self.head_sha_2k)
+ commits = list(root_commit.traverse())
+ nc = len(commits)
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+
+
+ # GET TREES
+ # walk all trees of all commits
+ st = time()
+ blobs_per_commit = list()
+ nt = 0
+ for commit in commits:
+ tree = commit.tree
+ blobs = list()
+ for item in tree.traverse():
+ nt += 1
+ if item.type == 'blob':
+ blobs.append(item)
+ # direct access for speed
+ # END while trees are there for walking
+ blobs_per_commit.append(blobs)
+ # END for each commit
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
+
+ # GET BLOBS
+ st = time()
+ nb = 0
+ too_many = 15000
+ for blob_list in blobs_per_commit:
+ for blob in blob_list:
+ blob.data
+ # END for each blobsha
+ nb += len(blob_list)
+ if nb > too_many:
+ break
+ # END for each bloblist
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
diff --git a/test/git/performance/test_streams.py b/test/git/performance/test_streams.py
new file mode 100644
index 00000000..01ec9fc4
--- /dev/null
+++ b/test/git/performance/test_streams.py
@@ -0,0 +1,144 @@
+"""Performance data streaming performance"""
+
+from test.testlib import *
+from git.odb import *
+
+from array import array
+from cStringIO import StringIO
+from time import time
+import os
+import sys
+import stat
+import random
+import subprocess
+
+
+from lib import (
+ TestBigRepoR
+ )
+
+
+
+def make_memory_file(size_in_bytes, randomize=False):
+ """:return: tuple(size_of_stream, stream)
+ :param randomize: try to produce a very random stream"""
+ actual_size = size_in_bytes / 4
+ producer = xrange(actual_size)
+ if randomize:
+ producer = list(producer)
+ random.shuffle(producer)
+ # END randomize
+ a = array('i', producer)
+ return actual_size*4, StringIO(a.tostring())
+
+
+class TestObjDBPerformance(TestBigRepoR):
+
+ large_data_size_bytes = 1000*1000*10 # some MiB should do it
+ moderate_data_size_bytes = 1000*1000*1 # just 1 MiB
+
+ @with_bare_rw_repo
+ def test_large_data_streaming(self, rwrepo):
+ ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+
+ for randomize in range(2):
+ desc = (randomize and 'random ') or ''
+ print >> sys.stderr, "Creating %s data ..." % desc
+ st = time()
+ size, stream = make_memory_file(self.large_data_size_bytes, randomize)
+ elapsed = time() - st
+ print >> sys.stderr, "Done (in %f s)" % elapsed
+
+ # writing - due to the compression it will seem faster than it is
+ st = time()
+ sha = ldb.store(IStream('blob', size, stream)).sha
+ elapsed_add = time() - st
+ assert ldb.has_object(sha)
+ db_file = ldb.readable_db_object_path(sha)
+ fsize_kib = os.path.getsize(db_file) / 1000
+
+
+ size_kib = size / 1000
+ print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
+
+ # reading all at once
+ st = time()
+ ostream = ldb.stream(sha)
+ shadata = ostream.read()
+ elapsed_readall = time() - st
+
+ stream.seek(0)
+ assert shadata == stream.getvalue()
+ print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
+
+
+ # reading in chunks of 1 MiB
+ cs = 512*1000
+ chunks = list()
+ st = time()
+ ostream = ldb.stream(sha)
+ while True:
+ data = ostream.read(cs)
+ chunks.append(data)
+ if len(data) < cs:
+ break
+ # END read in chunks
+ elapsed_readchunks = time() - st
+
+ stream.seek(0)
+ assert ''.join(chunks) == stream.getvalue()
+
+ cs_kib = cs / 1000
+ print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
+
+ # del db file so git has something to do
+ os.remove(db_file)
+
+ # VS. CGIT
+ ##########
+ # CGIT ! Can using the cgit programs be faster ?
+ proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
+
+ # write file - pump everything in at once to be a fast as possible
+ data = stream.getvalue() # cache it
+ st = time()
+ proc.stdin.write(data)
+ proc.stdin.close()
+ gitsha = proc.stdout.read().strip()
+ proc.wait()
+ gelapsed_add = time() - st
+ del(data)
+ assert gitsha == sha # we do it the same way, right ?
+
+ # as its the same sha, we reuse our path
+ fsize_kib = os.path.getsize(db_file) / 1000
+ print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
+
+ # compare ...
+ print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
+
+
+ # read all
+ st = time()
+ s, t, size, data = rwrepo.git.get_object_data(gitsha)
+ gelapsed_readall = time() - st
+ print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)
+
+ # compare
+ print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
+
+
+ # read chunks
+ st = time()
+ s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
+ while True:
+ data = stream.read(cs)
+ if len(data) < cs:
+ break
+ # END read stream
+ gelapsed_readchunks = time() - st
+ print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
+
+ # compare
+ print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
+ # END for each randomization factor
diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py
new file mode 100644
index 00000000..76adffec
--- /dev/null
+++ b/test/git/performance/test_utils.py
@@ -0,0 +1,59 @@
+"""Performance of utilities"""
+from time import time
+import sys
+import stat
+
+from lib import (
+ TestBigRepoR
+ )
+
+
+class TestUtilPerformance(TestBigRepoR):
+
+ def test_access(self):
+ # compare dict vs. slot access
+ class Slotty(object):
+ __slots__ = "attr"
+ def __init__(self):
+ self.attr = 1
+
+ class Dicty(object):
+ def __init__(self):
+ self.attr = 1
+
+ class BigSlotty(object):
+ __slots__ = ('attr', ) + tuple('abcdefghijk')
+ def __init__(self):
+ for attr in self.__slots__:
+ setattr(self, attr, 1)
+
+ class BigDicty(object):
+ def __init__(self):
+ for attr in BigSlotty.__slots__:
+ setattr(self, attr, 1)
+
+ ni = 1000000
+ for cls in (Slotty, Dicty, BigSlotty, BigDicty):
+ cli = cls()
+ st = time()
+ for i in xrange(ni):
+ cli.attr
+ # END for each access
+ elapsed = time() - st
+ print >> sys.stderr, "Accessed %s.attr %i times in %s s ( %f acc / s)" % (cls.__name__, ni, elapsed, ni / elapsed)
+ # END for each class type
+
+ # check num of sequence-acceses
+ for cls in (list, tuple):
+ x = 10
+ st = time()
+ s = cls(range(x))
+ for i in xrange(ni):
+ s[0]
+ s[1]
+ s[2]
+ # END for
+ elapsed = time() - st
+ na = ni * 3
+ print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed)
+ # END for each sequence
diff --git a/test/git/test_commit.py b/test/git/test_commit.py
index 48937c93..e65e2e59 100644
--- a/test/git/test_commit.py
+++ b/test/git/test_commit.py
@@ -6,172 +6,229 @@
from test.testlib import *
from git import *
+from git.odb import IStream
+
+from cStringIO import StringIO
+import time
+import sys
-class TestCommit(TestBase):
- def test_bake(self):
+def assert_commit_serialization(rwrepo, commit_id, print_performance_info=False):
+ """traverse all commits in the history of commit identified by commit_id and check
+ if the serialization works.
+ :param print_performance_info: if True, we will show how fast we are"""
+ ns = 0 # num serializations
+ nds = 0 # num deserializations
+
+ st = time.time()
+ for cm in rwrepo.commit(commit_id).traverse():
+ nds += 1
+
+ # assert that we deserialize commits correctly, hence we get the same
+ # sha on serialization
+ stream = StringIO()
+ cm._serialize(stream)
+ ns += 1
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ istream = rwrepo.odb.store(IStream(Commit.type, streamlen, stream))
+ assert istream.sha == cm.sha
+
+ nc = Commit(rwrepo, Commit.NULL_HEX_SHA, cm.tree.sha,
+ cm.author, cm.authored_date, cm.author_tz_offset,
+ cm.committer, cm.committed_date, cm.committer_tz_offset,
+ cm.message, cm.parents, cm.encoding)
+
+ assert nc.parents == cm.parents
+ stream = StringIO()
+ nc._serialize(stream)
+ ns += 1
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ # reuse istream
+ istream.size = streamlen
+ istream.stream = stream
+ istream.sha = None
+ nc.sha = rwrepo.odb.store(istream).sha
+
+ # if it worked, we have exactly the same contents !
+ assert nc.sha == cm.sha
+ # END check commits
+ elapsed = time.time() - st
+
+ if print_performance_info:
+ print >> sys.stderr, "Serialized %i and deserialized %i commits in %f s ( (%f, %f) commits / s" % (ns, nds, elapsed, ns/elapsed, nds/elapsed)
+ # END handle performance info
+
- commit = Commit(self.rorepo, **{'sha': '2454ae89983a4496a445ce347d7a41c0bb0ea7ae'})
- commit.author # bake
+class TestCommit(TestBase):
- assert_equal("Sebastian Thiel", commit.author.name)
- assert_equal("byronimo@gmail.com", commit.author.email)
- assert commit.author == commit.committer
- assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int)
- assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int)
- assert commit.message == "Added missing information to docstrings of commit and stats module"
+ def test_bake(self):
+ commit = Commit(self.rorepo, '2454ae89983a4496a445ce347d7a41c0bb0ea7ae')
+ commit.author # bake
- def test_stats(self):
- commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781')
- stats = commit.stats
-
- def check_entries(d):
- assert isinstance(d, dict)
- for key in ("insertions", "deletions", "lines"):
- assert key in d
- # END assertion helper
- assert stats.files
- assert stats.total
-
- check_entries(stats.total)
- assert "files" in stats.total
-
- for filepath, d in stats.files.items():
- check_entries(d)
- # END for each stated file
-
- # assure data is parsed properly
- michael = Actor._from_string("Michael Trier <mtrier@gmail.com>")
- assert commit.author == michael
- assert commit.committer == michael
- assert commit.authored_date == 1210193388
- assert commit.committed_date == 1210193388
- assert commit.author_tz_offset == 14400, commit.author_tz_offset
- assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
- assert commit.message == "initial project"
-
- def test_traversal(self):
- start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
- first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
- p0 = start.parents[0]
- p1 = start.parents[1]
- p00 = p0.parents[0]
- p10 = p1.parents[0]
-
- # basic branch first, depth first
- dfirst = start.traverse(branch_first=False)
- bfirst = start.traverse(branch_first=True)
- assert dfirst.next() == p0
- assert dfirst.next() == p00
-
- assert bfirst.next() == p0
- assert bfirst.next() == p1
- assert bfirst.next() == p00
- assert bfirst.next() == p10
-
- # at some point, both iterations should stop
- assert list(bfirst)[-1] == first
- stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True)
- l = list(stoptraverse)
- assert len(l[0]) == 2
-
- # ignore self
- assert start.traverse(ignore_self=False).next() == start
-
- # depth
- assert len(list(start.traverse(ignore_self=False, depth=0))) == 1
-
- # prune
- assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1
-
- # predicate
- assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1
-
- # traversal should stop when the beginning is reached
- self.failUnlessRaises(StopIteration, first.traverse().next)
-
- # parents of the first commit should be empty ( as the only parent has a null
- # sha )
- assert len(first.parents) == 0
-
- def test_iteration(self):
- # we can iterate commits
- all_commits = Commit.list_items(self.rorepo, self.rorepo.head)
- assert all_commits
- assert all_commits == list(self.rorepo.iter_commits())
-
- # this includes merge commits
- mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d')
- assert mcomit in all_commits
-
- # we can limit the result to paths
- ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES'))
- assert ltd_commits and len(ltd_commits) < len(all_commits)
-
- # show commits of multiple paths, resulting in a union of commits
- less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS')))
- assert len(ltd_commits) < len(less_ltd_commits)
-
-
- @patch_object(Git, '_call_process')
- def test_rev_list_bisect_all(self, git):
- """
- 'git rev-list --bisect-all' returns additional information
- in the commit header. This test ensures that we properly parse it.
- """
+ assert_equal("Sebastian Thiel", commit.author.name)
+ assert_equal("byronimo@gmail.com", commit.author.email)
+ assert commit.author == commit.committer
+ assert isinstance(commit.authored_date, int) and isinstance(commit.committed_date, int)
+ assert isinstance(commit.author_tz_offset, int) and isinstance(commit.committer_tz_offset, int)
+ assert commit.message == "Added missing information to docstrings of commit and stats module\n"
- git.return_value = fixture('rev_list_bisect_all')
- revs = self.rorepo.git.rev_list('HEAD',
- pretty='raw',
- first_parent=True,
- bisect_all=True)
- assert_true(git.called)
+ def test_stats(self):
+ commit = Commit(self.rorepo, '33ebe7acec14b25c5f84f35a664803fcab2f7781')
+ stats = commit.stats
+
+ def check_entries(d):
+ assert isinstance(d, dict)
+ for key in ("insertions", "deletions", "lines"):
+ assert key in d
+ # END assertion helper
+ assert stats.files
+ assert stats.total
+
+ check_entries(stats.total)
+ assert "files" in stats.total
+
+ for filepath, d in stats.files.items():
+ check_entries(d)
+ # END for each stated file
+
+ # assure data is parsed properly
+ michael = Actor._from_string("Michael Trier <mtrier@gmail.com>")
+ assert commit.author == michael
+ assert commit.committer == michael
+ assert commit.authored_date == 1210193388
+ assert commit.committed_date == 1210193388
+ assert commit.author_tz_offset == 14400, commit.author_tz_offset
+ assert commit.committer_tz_offset == 14400, commit.committer_tz_offset
+ assert commit.message == "initial project\n"
+
+ def test_traversal(self):
+ start = self.rorepo.commit("a4d06724202afccd2b5c54f81bcf2bf26dea7fff")
+ first = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781")
+ p0 = start.parents[0]
+ p1 = start.parents[1]
+ p00 = p0.parents[0]
+ p10 = p1.parents[0]
+
+ # basic branch first, depth first
+ dfirst = start.traverse(branch_first=False)
+ bfirst = start.traverse(branch_first=True)
+ assert dfirst.next() == p0
+ assert dfirst.next() == p00
+
+ assert bfirst.next() == p0
+ assert bfirst.next() == p1
+ assert bfirst.next() == p00
+ assert bfirst.next() == p10
+
+ # at some point, both iterations should stop
+ assert list(bfirst)[-1] == first
+ stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True)
+ l = list(stoptraverse)
+ assert len(l[0]) == 2
+
+ # ignore self
+ assert start.traverse(ignore_self=False).next() == start
+
+ # depth
+ assert len(list(start.traverse(ignore_self=False, depth=0))) == 1
+
+ # prune
+ assert start.traverse(branch_first=1, prune=lambda i,d: i==p0).next() == p1
+
+ # predicate
+ assert start.traverse(branch_first=1, predicate=lambda i,d: i==p1).next() == p1
+
+ # traversal should stop when the beginning is reached
+ self.failUnlessRaises(StopIteration, first.traverse().next)
+
+ # parents of the first commit should be empty ( as the only parent has a null
+ # sha )
+ assert len(first.parents) == 0
+
+ def test_iteration(self):
+ # we can iterate commits
+ all_commits = Commit.list_items(self.rorepo, self.rorepo.head)
+ assert all_commits
+ assert all_commits == list(self.rorepo.iter_commits())
+
+ # this includes merge commits
+ mcomit = Commit(self.rorepo, 'd884adc80c80300b4cc05321494713904ef1df2d')
+ assert mcomit in all_commits
+
+ # we can limit the result to paths
+ ltd_commits = list(self.rorepo.iter_commits(paths='CHANGES'))
+ assert ltd_commits and len(ltd_commits) < len(all_commits)
+
+ # show commits of multiple paths, resulting in a union of commits
+ less_ltd_commits = list(Commit.iter_items(self.rorepo, 'master', paths=('CHANGES', 'AUTHORS')))
+ assert len(ltd_commits) < len(less_ltd_commits)
+
+ def test_iter_items(self):
+ # pretty not allowed
+ self.failUnlessRaises(ValueError, Commit.iter_items, self.rorepo, 'master', pretty="raw")
+
+ def test_rev_list_bisect_all(self):
+ """
+ 'git rev-list --bisect-all' returns additional information
+ in the commit header. This test ensures that we properly parse it.
+ """
+ revs = self.rorepo.git.rev_list('933d23bf95a5bd1624fbcdf328d904e1fa173474',
+ first_parent=True,
+ bisect_all=True)
- commits = Commit._iter_from_process_or_stream(self.rorepo, ListProcessAdapter(revs), True)
- expected_ids = (
- 'cf37099ea8d1d8c7fbf9b6d12d7ec0249d3acb8b',
- '33ebe7acec14b25c5f84f35a664803fcab2f7781',
- 'a6604a00a652e754cb8b6b0b9f194f839fc38d7c',
- '8df638c22c75ddc9a43ecdde90c0c9939f5009e7',
- 'c231551328faa864848bde6ff8127f59c9566e90',
- )
- for sha1, commit in zip(expected_ids, commits):
- assert_equal(sha1, commit.sha)
+ commits = Commit._iter_from_process_or_stream(self.rorepo, StringProcessAdapter(revs))
+ expected_ids = (
+ '7156cece3c49544abb6bf7a0c218eb36646fad6d',
+ '1f66cfbbce58b4b552b041707a12d437cc5f400a',
+ '33ebe7acec14b25c5f84f35a664803fcab2f7781',
+ '933d23bf95a5bd1624fbcdf328d904e1fa173474'
+ )
+ for sha1, commit in zip(expected_ids, commits):
+ assert_equal(sha1, commit.sha)
- def test_count(self):
- assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143
-
- def test_list(self):
- assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit)
+ def test_count(self):
+ assert self.rorepo.tag('refs/tags/0.1.5').commit.count( ) == 143
+
+ def test_list(self):
+ assert isinstance(Commit.list_items(self.rorepo, '0.1.5', max_count=5)['5117c9c8a4d3af19a9958677e45cda9269de1541'], Commit)
- def test_str(self):
- commit = Commit(self.rorepo, 'abc')
- assert_equal ("abc", str(commit))
+ def test_str(self):
+ commit = Commit(self.rorepo, 'abc')
+ assert_equal ("abc", str(commit))
- def test_repr(self):
- commit = Commit(self.rorepo, 'abc')
- assert_equal('<git.Commit "abc">', repr(commit))
+ def test_repr(self):
+ commit = Commit(self.rorepo, 'abc')
+ assert_equal('<git.Commit "abc">', repr(commit))
- def test_equality(self):
- commit1 = Commit(self.rorepo, 'abc')
- commit2 = Commit(self.rorepo, 'abc')
- commit3 = Commit(self.rorepo, 'zyx')
- assert_equal(commit1, commit2)
- assert_not_equal(commit2, commit3)
-
- def test_iter_parents(self):
- # should return all but ourselves, even if skip is defined
- c = self.rorepo.commit('0.1.5')
- for skip in (0, 1):
- piter = c.iter_parents(skip=skip)
- first_parent = piter.next()
- assert first_parent != c
- assert first_parent == c.parents[0]
- # END for each
-
- def test_base(self):
- name_rev = self.rorepo.head.commit.name_rev
- assert isinstance(name_rev, basestring)
-
+ def test_equality(self):
+ commit1 = Commit(self.rorepo, 'abc')
+ commit2 = Commit(self.rorepo, 'abc')
+ commit3 = Commit(self.rorepo, 'zyx')
+ assert_equal(commit1, commit2)
+ assert_not_equal(commit2, commit3)
+
+ def test_iter_parents(self):
+ # should return all but ourselves, even if skip is defined
+ c = self.rorepo.commit('0.1.5')
+ for skip in (0, 1):
+ piter = c.iter_parents(skip=skip)
+ first_parent = piter.next()
+ assert first_parent != c
+ assert first_parent == c.parents[0]
+ # END for each
+
+ def test_base(self):
+ name_rev = self.rorepo.head.commit.name_rev
+ assert isinstance(name_rev, basestring)
+
+ @with_bare_rw_repo
+ def test_serialization(self, rwrepo):
+ # create all commits of our repo
+ assert_commit_serialization(rwrepo, '0.1.6')
+
diff --git a/test/git/test_diff.py b/test/git/test_diff.py
index 2f6a19bd..a113b992 100644
--- a/test/git/test_diff.py
+++ b/test/git/test_diff.py
@@ -20,7 +20,7 @@ class TestDiff(TestBase):
return diffs
def test_list_from_string_new_mode(self):
- output = ListProcessAdapter(fixture('diff_new_mode'))
+ output = StringProcessAdapter(fixture('diff_new_mode'))
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
self._assert_diff_format(diffs)
@@ -28,7 +28,7 @@ class TestDiff(TestBase):
assert_equal(10, len(diffs[0].diff.splitlines()))
def test_diff_with_rename(self):
- output = ListProcessAdapter(fixture('diff_rename'))
+ output = StringProcessAdapter(fixture('diff_rename'))
diffs = Diff._index_from_patch_format(self.rorepo, output.stdout)
self._assert_diff_format(diffs)
@@ -47,7 +47,7 @@ class TestDiff(TestBase):
"diff_tree_numstat_root" )
for fixture_name in fixtures:
- diff_proc = ListProcessAdapter(fixture(fixture_name))
+ diff_proc = StringProcessAdapter(fixture(fixture_name))
diffs = Diff._index_from_patch_format(self.rorepo, diff_proc.stdout)
# END for each fixture
diff --git a/test/git/test_odb.py b/test/git/test_odb.py
new file mode 100644
index 00000000..c3a03714
--- /dev/null
+++ b/test/git/test_odb.py
@@ -0,0 +1,66 @@
+"""Test for object db"""
+
+from test.testlib import *
+from git.odb import *
+from git.odb.stream import Sha1Writer
+from git import Blob
+from git.errors import BadObject
+
+from cStringIO import StringIO
+import os
+
+
+class TestDB(TestBase):
+ """Test the different db class implementations"""
+
+ # data
+ two_lines = "1234\nhello world"
+
+ all_data = (two_lines, )
+
+ def _assert_object_writing(self, db):
+ """General tests to verify object writing, compatible to ObjectDBW
+ :note: requires write access to the database"""
+ # start in 'dry-run' mode, using a simple sha1 writer
+ ostreams = (Sha1Writer, None)
+ for ostreamcls in ostreams:
+ for data in self.all_data:
+ dry_run = ostreamcls is not None
+ ostream = None
+ if ostreamcls is not None:
+ ostream = ostreamcls()
+ # END create ostream
+
+ prev_ostream = db.set_ostream(ostream)
+ assert type(prev_ostream) in ostreams or prev_ostream in ostreams
+
+ istream = IStream(Blob.type, len(data), StringIO(data))
+ my_istream = db.store(istream)
+ sha = istream.sha
+ assert my_istream is istream
+ assert db.has_object(sha) != dry_run
+ assert len(sha) == 40 # for now we require 40 byte shas as default
+
+ # verify data - the slow way, we want to run code
+ if not dry_run:
+ info = db.info(sha)
+ assert Blob.type == info.type
+ assert info.size == len(data)
+
+ ostream = db.stream(sha)
+ assert ostream.read() == data
+ assert ostream.type == Blob.type
+ assert ostream.size == len(data)
+ else:
+ self.failUnlessRaises(BadObject, db.info, sha)
+ self.failUnlessRaises(BadObject, db.stream, sha)
+ # END for each data set
+ # END for each dry_run mode
+
+ @with_bare_rw_repo
+ def test_writing(self, rwrepo):
+ ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
+
+ # write data
+ self._assert_object_writing(ldb)
+
diff --git a/test/git/test_performance.py b/test/git/test_performance.py
deleted file mode 100644
index 72acfcac..00000000
--- a/test/git/test_performance.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# test_performance.py
-# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
-#
-# This module is part of GitPython and is released under
-# the BSD License: http://www.opensource.org/licenses/bsd-license.php
-
-from test.testlib import *
-from git import *
-from time import time
-
-class TestPerformance(TestBase):
-
- def _query_commit_info(self, c):
- c.author
- c.authored_date
- c.author_tz_offset
- c.committer
- c.committed_date
- c.committer_tz_offset
- c.message
- c.parents
-
- def test_iteration(self):
- num_objs = 0
- num_commits = 0
-
- # find the first commit containing the given path - always do a full
- # iteration ( restricted to the path in question ), but in fact it should
- # return quite a lot of commits, we just take one and hence abort the operation
-
- st = time()
- for c in self.rorepo.iter_commits('0.1.6'):
- num_commits += 1
- self._query_commit_info(c)
- for obj in c.tree.traverse():
- obj.size
- num_objs += 1
- # END for each object
- # END for each commit
- elapsed_time = time() - st
- print "Traversed %i Trees and a total of %i unchached objects in %s [s] ( %f objs/s )" % (num_commits, num_objs, elapsed_time, num_objs/elapsed_time)
-
- def test_commit_traversal(self):
- num_commits = 0
-
- st = time()
- for c in self.rorepo.commit('0.1.6').traverse(branch_first=False):
- num_commits += 1
- self._query_commit_info(c)
- # END for each traversed commit
- elapsed_time = time() - st
- print "Traversed %i Commits in %s [s] ( %f commits/s )" % (num_commits, elapsed_time, num_commits/elapsed_time)
diff --git a/test/git/test_repo.py b/test/git/test_repo.py
index ce79402a..d2c7c742 100644
--- a/test/git/test_repo.py
+++ b/test/git/test_repo.py
@@ -10,325 +10,396 @@ from git import *
from git.utils import join_path_native
import tempfile
import shutil
+from cStringIO import StringIO
class TestRepo(TestBase):
-
- @raises(InvalidGitRepositoryError)
- def test_new_should_raise_on_invalid_repo_location(self):
- Repo(tempfile.gettempdir())
-
- @raises(NoSuchPathError)
- def test_new_should_raise_on_non_existant_path(self):
- Repo("repos/foobar")
-
- def test_repo_creation_from_different_paths(self):
- r_from_gitdir = Repo(self.rorepo.git_dir)
- assert r_from_gitdir.git_dir == self.rorepo.git_dir
- assert r_from_gitdir.git_dir.endswith('.git')
- assert not self.rorepo.git.working_dir.endswith('.git')
- assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir
-
- def test_description(self):
- txt = "Test repository"
- self.rorepo.description = txt
- assert_equal(self.rorepo.description, txt)
-
- def test_heads_should_return_array_of_head_objects(self):
- for head in self.rorepo.heads:
- assert_equal(Head, head.__class__)
-
- def test_heads_should_populate_head_data(self):
- for head in self.rorepo.heads:
- assert head.name
- assert isinstance(head.commit,Commit)
- # END for each head
-
- assert isinstance(self.rorepo.heads.master, Head)
- assert isinstance(self.rorepo.heads['master'], Head)
-
- def test_tree_from_revision(self):
- tree = self.rorepo.tree('0.1.6')
- assert tree.type == "tree"
- assert self.rorepo.tree(tree) == tree
-
- # try from invalid revision that does not exist
- self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world')
-
- @patch_object(Git, '_call_process')
- def test_commits(self, git):
- git.return_value = ListProcessAdapter(fixture('rev_list'))
-
- commits = list( self.rorepo.iter_commits('master', max_count=10) )
-
- c = commits[0]
- assert_equal('4c8124ffcf4039d292442eeccabdeca5af5c5017', c.sha)
- assert_equal(["634396b2f541a9f2d58b00be1a07f0c358b999b3"], [p.sha for p in c.parents])
- assert_equal("672eca9b7f9e09c22dcb128c283e8c3c8d7697a4", c.tree.sha)
- assert_equal("Tom Preston-Werner", c.author.name)
- assert_equal("tom@mojombo.com", c.author.email)
- assert_equal(1191999972, c.authored_date)
- assert_equal("Tom Preston-Werner", c.committer.name)
- assert_equal("tom@mojombo.com", c.committer.email)
- assert_equal(1191999972, c.committed_date)
- assert_equal("implement Grit#heads", c.message)
-
- c = commits[1]
- assert_equal(tuple(), c.parents)
-
- c = commits[2]
- assert_equal(["6e64c55896aabb9a7d8e9f8f296f426d21a78c2c", "7f874954efb9ba35210445be456c74e037ba6af2"], map(lambda p: p.sha, c.parents))
- assert_equal("Merge branch 'site'", c.summary)
-
- assert_true(git.called)
-
- def test_trees(self):
- mc = 30
- num_trees = 0
- for tree in self.rorepo.iter_trees('0.1.5', max_count=mc):
- num_trees += 1
- assert isinstance(tree, Tree)
- # END for each tree
- assert num_trees == mc
-
-
- def _test_empty_repo(self, repo):
- # test all kinds of things with an empty, freshly initialized repo.
- # It should throw good errors
-
- # entries should be empty
- assert len(repo.index.entries) == 0
-
- # head is accessible
- assert repo.head
- assert repo.head.ref
- assert not repo.head.is_valid()
-
- # we can change the head to some other ref
- head_ref = Head.from_path(repo, Head.to_full_path('some_head'))
- assert not head_ref.is_valid()
- repo.head.ref = head_ref
-
- # is_dirty can handle all kwargs
- for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)):
- assert not repo.is_dirty(*args)
- # END for each arg
-
- # we can add a file to the index ( if we are not bare )
- if not repo.bare:
- pass
- # END test repos with working tree
-
-
- def test_init(self):
- prev_cwd = os.getcwd()
- os.chdir(tempfile.gettempdir())
- git_dir_rela = "repos/foo/bar.git"
- del_dir_abs = os.path.abspath("repos")
- git_dir_abs = os.path.abspath(git_dir_rela)
- try:
- # with specific path
- for path in (git_dir_rela, git_dir_abs):
- r = Repo.init(path=path, bare=True)
- assert isinstance(r, Repo)
- assert r.bare == True
- assert os.path.isdir(r.git_dir)
-
- self._test_empty_repo(r)
- shutil.rmtree(git_dir_abs)
- # END for each path
-
- os.makedirs(git_dir_rela)
- os.chdir(git_dir_rela)
- r = Repo.init(bare=False)
- r.bare == False
-
- self._test_empty_repo(r)
- finally:
- try:
- shutil.rmtree(del_dir_abs)
- except OSError:
- pass
- os.chdir(prev_cwd)
- # END restore previous state
-
- def test_bare_property(self):
- self.rorepo.bare
-
- @patch_object(Repo, '__init__')
- @patch_object(Git, '_call_process')
- def test_init_with_options(self, git, repo):
- git.return_value = True
- repo.return_value = None
-
- r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"})
- assert isinstance(r, Repo)
-
- assert_true(git.called)
- assert_true(repo.called)
-
- @patch_object(Repo, '__init__')
- @patch_object(Git, '_call_process')
- def test_clone(self, git, repo):
- git.return_value = None
- repo.return_value = None
-
- self.rorepo.clone("repos/foo/bar.git")
-
- assert_true(git.called)
- path = os.path.join(absolute_project_path(), '.git')
- assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {}))
- assert_true(repo.called)
-
- @patch_object(Repo, '__init__')
- @patch_object(Git, '_call_process')
- def test_clone_with_options(self, git, repo):
- git.return_value = None
- repo.return_value = None
-
- self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'})
-
- assert_true(git.called)
- path = os.path.join(absolute_project_path(), '.git')
- assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'),
- { 'template': '/awesome'}))
- assert_true(repo.called)
-
-
- def test_daemon_export(self):
- orig_val = self.rorepo.daemon_export
- self.rorepo.daemon_export = not orig_val
- assert self.rorepo.daemon_export == ( not orig_val )
- self.rorepo.daemon_export = orig_val
- assert self.rorepo.daemon_export == orig_val
+
+ @raises(InvalidGitRepositoryError)
+ def test_new_should_raise_on_invalid_repo_location(self):
+ Repo(tempfile.gettempdir())
+
+ @raises(NoSuchPathError)
+ def test_new_should_raise_on_non_existant_path(self):
+ Repo("repos/foobar")
+
+ def test_repo_creation_from_different_paths(self):
+ r_from_gitdir = Repo(self.rorepo.git_dir)
+ assert r_from_gitdir.git_dir == self.rorepo.git_dir
+ assert r_from_gitdir.git_dir.endswith('.git')
+ assert not self.rorepo.git.working_dir.endswith('.git')
+ assert r_from_gitdir.git.working_dir == self.rorepo.git.working_dir
+
+ def test_description(self):
+ txt = "Test repository"
+ self.rorepo.description = txt
+ assert_equal(self.rorepo.description, txt)
+
+ def test_heads_should_return_array_of_head_objects(self):
+ for head in self.rorepo.heads:
+ assert_equal(Head, head.__class__)
+
+ def test_heads_should_populate_head_data(self):
+ for head in self.rorepo.heads:
+ assert head.name
+ assert isinstance(head.commit,Commit)
+ # END for each head
+
+ assert isinstance(self.rorepo.heads.master, Head)
+ assert isinstance(self.rorepo.heads['master'], Head)
+
+ def test_tree_from_revision(self):
+ tree = self.rorepo.tree('0.1.6')
+ assert len(tree.sha) == 40
+ assert tree.type == "tree"
+ assert self.rorepo.tree(tree) == tree
+
+ # try from invalid revision that does not exist
+ self.failUnlessRaises(ValueError, self.rorepo.tree, 'hello world')
+
+ def test_commits(self):
+ mc = 10
+ commits = list(self.rorepo.iter_commits('0.1.6', max_count=mc))
+ assert len(commits) == mc
+
+ c = commits[0]
+ assert_equal('9a4b1d4d11eee3c5362a4152216376e634bd14cf', c.sha)
+ assert_equal(["c76852d0bff115720af3f27acdb084c59361e5f6"], [p.sha for p in c.parents])
+ assert_equal("ce41fc29549042f1aa09cc03174896cf23f112e3", c.tree.sha)
+ assert_equal("Michael Trier", c.author.name)
+ assert_equal("mtrier@gmail.com", c.author.email)
+ assert_equal(1232829715, c.authored_date)
+ assert_equal(5*3600, c.author_tz_offset)
+ assert_equal("Michael Trier", c.committer.name)
+ assert_equal("mtrier@gmail.com", c.committer.email)
+ assert_equal(1232829715, c.committed_date)
+ assert_equal(5*3600, c.committer_tz_offset)
+ assert_equal("Bumped version 0.1.6\n", c.message)
+
+ c = commits[1]
+ assert isinstance(c.parents, tuple)
+
+ def test_trees(self):
+ mc = 30
+ num_trees = 0
+ for tree in self.rorepo.iter_trees('0.1.5', max_count=mc):
+ num_trees += 1
+ assert isinstance(tree, Tree)
+ # END for each tree
+ assert num_trees == mc
+
+
+ def _test_empty_repo(self, repo):
+ # test all kinds of things with an empty, freshly initialized repo.
+ # It should throw good errors
+
+ # entries should be empty
+ assert len(repo.index.entries) == 0
+
+ # head is accessible
+ assert repo.head
+ assert repo.head.ref
+ assert not repo.head.is_valid()
+
+ # we can change the head to some other ref
+ head_ref = Head.from_path(repo, Head.to_full_path('some_head'))
+ assert not head_ref.is_valid()
+ repo.head.ref = head_ref
+
+ # is_dirty can handle all kwargs
+ for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)):
+ assert not repo.is_dirty(*args)
+ # END for each arg
+
+ # we can add a file to the index ( if we are not bare )
+ if not repo.bare:
+ pass
+ # END test repos with working tree
+
+
+ def test_init(self):
+ prev_cwd = os.getcwd()
+ os.chdir(tempfile.gettempdir())
+ git_dir_rela = "repos/foo/bar.git"
+ del_dir_abs = os.path.abspath("repos")
+ git_dir_abs = os.path.abspath(git_dir_rela)
+ try:
+ # with specific path
+ for path in (git_dir_rela, git_dir_abs):
+ r = Repo.init(path=path, bare=True)
+ assert isinstance(r, Repo)
+ assert r.bare == True
+ assert os.path.isdir(r.git_dir)
+
+ self._test_empty_repo(r)
+ shutil.rmtree(git_dir_abs)
+ # END for each path
+
+ os.makedirs(git_dir_rela)
+ os.chdir(git_dir_rela)
+ r = Repo.init(bare=False)
+ r.bare == False
+
+ self._test_empty_repo(r)
+ finally:
+ try:
+ shutil.rmtree(del_dir_abs)
+ except OSError:
+ pass
+ os.chdir(prev_cwd)
+ # END restore previous state
+
+ def test_bare_property(self):
+ self.rorepo.bare
+
+ @patch_object(Repo, '__init__')
+ @patch_object(Git, '_call_process')
+ def test_init_with_options(self, git, repo):
+ git.return_value = True
+ repo.return_value = None
+
+ r = Repo.init("repos/foo/bar.git", **{'bare' : True,'template': "/baz/sweet"})
+ assert isinstance(r, Repo)
+
+ assert_true(git.called)
+ assert_true(repo.called)
+
+ @patch_object(Repo, '__init__')
+ @patch_object(Git, '_call_process')
+ def test_clone(self, git, repo):
+ git.return_value = None
+ repo.return_value = None
+
+ self.rorepo.clone("repos/foo/bar.git")
+
+ assert_true(git.called)
+ path = os.path.join(absolute_project_path(), '.git')
+ assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'), {}))
+ assert_true(repo.called)
+
+ @patch_object(Repo, '__init__')
+ @patch_object(Git, '_call_process')
+ def test_clone_with_options(self, git, repo):
+ git.return_value = None
+ repo.return_value = None
+
+ self.rorepo.clone("repos/foo/bar.git", **{'template': '/awesome'})
+
+ assert_true(git.called)
+ path = os.path.join(absolute_project_path(), '.git')
+ assert_equal(git.call_args, (('clone', path, 'repos/foo/bar.git'),
+ { 'template': '/awesome'}))
+ assert_true(repo.called)
+
+
+ def test_daemon_export(self):
+ orig_val = self.rorepo.daemon_export
+ self.rorepo.daemon_export = not orig_val
+ assert self.rorepo.daemon_export == ( not orig_val )
+ self.rorepo.daemon_export = orig_val
+ assert self.rorepo.daemon_export == orig_val
- def test_alternates(self):
- cur_alternates = self.rorepo.alternates
- # empty alternates
- self.rorepo.alternates = []
- assert self.rorepo.alternates == []
- alts = [ "other/location", "this/location" ]
- self.rorepo.alternates = alts
- assert alts == self.rorepo.alternates
- self.rorepo.alternates = cur_alternates
-
- def test_repr(self):
- path = os.path.join(os.path.abspath(GIT_REPO), '.git')
- assert_equal('<git.Repo "%s">' % path, repr(self.rorepo))
-
- def test_is_dirty_with_bare_repository(self):
- self.rorepo._bare = True
- assert_false(self.rorepo.is_dirty())
-
- def test_is_dirty(self):
- self.rorepo._bare = False
- for index in (0,1):
- for working_tree in (0,1):
- for untracked_files in (0,1):
- assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False)
- # END untracked files
- # END working tree
- # END index
- self.rorepo._bare = True
- assert self.rorepo.is_dirty() == False
-
- def test_head(self):
- assert self.rorepo.head.reference.object == self.rorepo.active_branch.object
-
- def test_index(self):
- index = self.rorepo.index
- assert isinstance(index, IndexFile)
-
- def test_tag(self):
- assert self.rorepo.tag('refs/tags/0.1.5').commit
-
- def test_archive(self):
- tmpfile = os.tmpfile()
- self.rorepo.archive(tmpfile, '0.1.5')
- assert tmpfile.tell()
-
- @patch_object(Git, '_call_process')
- def test_should_display_blame_information(self, git):
- git.return_value = fixture('blame')
- b = self.rorepo.blame( 'master', 'lib/git.py')
- assert_equal(13, len(b))
- assert_equal( 2, len(b[0]) )
- # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
- assert_equal(hash(b[0][0]), hash(b[9][0]))
- c = b[0][0]
- assert_true(git.called)
- assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
-
- assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha)
- assert_equal('Tom Preston-Werner', c.author.name)
- assert_equal('tom@mojombo.com', c.author.email)
- assert_equal(1191997100, c.authored_date)
- assert_equal('Tom Preston-Werner', c.committer.name)
- assert_equal('tom@mojombo.com', c.committer.email)
- assert_equal(1191997100, c.committed_date)
- assert_equal('initial grit setup', c.message)
-
- # test the 'lines per commit' entries
- tlist = b[0][1]
- assert_true( tlist )
- assert_true( isinstance( tlist[0], basestring ) )
- assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug
-
- def test_untracked_files(self):
- base = self.rorepo.working_tree_dir
- files = ( join_path_native(base, "__test_myfile"),
- join_path_native(base, "__test_other_file") )
- num_recently_untracked = 0
- try:
- for fpath in files:
- fd = open(fpath,"wb")
- fd.close()
- # END for each filename
- untracked_files = self.rorepo.untracked_files
- num_recently_untracked = len(untracked_files)
-
- # assure we have all names - they are relative to the git-dir
- num_test_untracked = 0
- for utfile in untracked_files:
- num_test_untracked += join_path_native(base, utfile) in files
- assert len(files) == num_test_untracked
- finally:
- for fpath in files:
- if os.path.isfile(fpath):
- os.remove(fpath)
- # END handle files
-
- assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files))
-
- def test_config_reader(self):
- reader = self.rorepo.config_reader() # all config files
- assert reader.read_only
- reader = self.rorepo.config_reader("repository") # single config file
- assert reader.read_only
-
- def test_config_writer(self):
- for config_level in self.rorepo.config_level:
- try:
- writer = self.rorepo.config_writer(config_level)
- assert not writer.read_only
- except IOError:
- # its okay not to get a writer for some configuration files if we
- # have no permissions
- pass
- # END for each config level
-
- def test_creation_deletion(self):
- # just a very quick test to assure it generally works. There are
- # specialized cases in the test_refs module
- head = self.rorepo.create_head("new_head", "HEAD~1")
- self.rorepo.delete_head(head)
-
- tag = self.rorepo.create_tag("new_tag", "HEAD~2")
- self.rorepo.delete_tag(tag)
-
- remote = self.rorepo.create_remote("new_remote", "git@server:repo.git")
- self.rorepo.delete_remote(remote)
-
- def test_comparison_and_hash(self):
- # this is only a preliminary test, more testing done in test_index
- assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo)
- assert len(set((self.rorepo, self.rorepo))) == 1
+ def test_alternates(self):
+ cur_alternates = self.rorepo.alternates
+ # empty alternates
+ self.rorepo.alternates = []
+ assert self.rorepo.alternates == []
+ alts = [ "other/location", "this/location" ]
+ self.rorepo.alternates = alts
+ assert alts == self.rorepo.alternates
+ self.rorepo.alternates = cur_alternates
+
+ def test_repr(self):
+ path = os.path.join(os.path.abspath(GIT_REPO), '.git')
+ assert_equal('<git.Repo "%s">' % path, repr(self.rorepo))
+
+ def test_is_dirty_with_bare_repository(self):
+ self.rorepo._bare = True
+ assert_false(self.rorepo.is_dirty())
+
+ def test_is_dirty(self):
+ self.rorepo._bare = False
+ for index in (0,1):
+ for working_tree in (0,1):
+ for untracked_files in (0,1):
+ assert self.rorepo.is_dirty(index, working_tree, untracked_files) in (True, False)
+ # END untracked files
+ # END working tree
+ # END index
+ self.rorepo._bare = True
+ assert self.rorepo.is_dirty() == False
+
+ def test_head(self):
+ assert self.rorepo.head.reference.object == self.rorepo.active_branch.object
+
+ def test_index(self):
+ index = self.rorepo.index
+ assert isinstance(index, IndexFile)
+
+ def test_tag(self):
+ assert self.rorepo.tag('refs/tags/0.1.5').commit
+
+ def test_archive(self):
+ tmpfile = os.tmpfile()
+ self.rorepo.archive(tmpfile, '0.1.5')
+ assert tmpfile.tell()
+
+ @patch_object(Git, '_call_process')
+ def test_should_display_blame_information(self, git):
+ git.return_value = fixture('blame')
+ b = self.rorepo.blame( 'master', 'lib/git.py')
+ assert_equal(13, len(b))
+ assert_equal( 2, len(b[0]) )
+ # assert_equal(25, reduce(lambda acc, x: acc + len(x[-1]), b))
+ assert_equal(hash(b[0][0]), hash(b[9][0]))
+ c = b[0][0]
+ assert_true(git.called)
+ assert_equal(git.call_args, (('blame', 'master', '--', 'lib/git.py'), {'p': True}))
+
+ assert_equal('634396b2f541a9f2d58b00be1a07f0c358b999b3', c.sha)
+ assert_equal('Tom Preston-Werner', c.author.name)
+ assert_equal('tom@mojombo.com', c.author.email)
+ assert_equal(1191997100, c.authored_date)
+ assert_equal('Tom Preston-Werner', c.committer.name)
+ assert_equal('tom@mojombo.com', c.committer.email)
+ assert_equal(1191997100, c.committed_date)
+ assert_equal('initial grit setup', c.message)
+
+ # test the 'lines per commit' entries
+ tlist = b[0][1]
+ assert_true( tlist )
+ assert_true( isinstance( tlist[0], basestring ) )
+ assert_true( len( tlist ) < sum( len(t) for t in tlist ) ) # test for single-char bug
+
+ def test_untracked_files(self):
+ base = self.rorepo.working_tree_dir
+ files = ( join_path_native(base, "__test_myfile"),
+ join_path_native(base, "__test_other_file") )
+ num_recently_untracked = 0
+ try:
+ for fpath in files:
+ fd = open(fpath,"wb")
+ fd.close()
+ # END for each filename
+ untracked_files = self.rorepo.untracked_files
+ num_recently_untracked = len(untracked_files)
+
+ # assure we have all names - they are relative to the git-dir
+ num_test_untracked = 0
+ for utfile in untracked_files:
+ num_test_untracked += join_path_native(base, utfile) in files
+ assert len(files) == num_test_untracked
+ finally:
+ for fpath in files:
+ if os.path.isfile(fpath):
+ os.remove(fpath)
+ # END handle files
+
+ assert len(self.rorepo.untracked_files) == (num_recently_untracked - len(files))
+
+ def test_config_reader(self):
+ reader = self.rorepo.config_reader() # all config files
+ assert reader.read_only
+ reader = self.rorepo.config_reader("repository") # single config file
+ assert reader.read_only
+
+ def test_config_writer(self):
+ for config_level in self.rorepo.config_level:
+ try:
+ writer = self.rorepo.config_writer(config_level)
+ assert not writer.read_only
+ except IOError:
+ # its okay not to get a writer for some configuration files if we
+ # have no permissions
+ pass
+ # END for each config level
+
+ def test_creation_deletion(self):
+ # just a very quick test to assure it generally works. There are
+ # specialized cases in the test_refs module
+ head = self.rorepo.create_head("new_head", "HEAD~1")
+ self.rorepo.delete_head(head)
+
+ tag = self.rorepo.create_tag("new_tag", "HEAD~2")
+ self.rorepo.delete_tag(tag)
+
+ remote = self.rorepo.create_remote("new_remote", "git@server:repo.git")
+ self.rorepo.delete_remote(remote)
+
+ def test_comparison_and_hash(self):
+ # this is only a preliminary test, more testing done in test_index
+ assert self.rorepo == self.rorepo and not (self.rorepo != self.rorepo)
+ assert len(set((self.rorepo, self.rorepo))) == 1
+
+ def test_git_cmd(self):
+ # test CatFileContentStream, just to be very sure we have no fencepost errors
+ # last \n is the terminating newline that it expects
+ l1 = "0123456789\n"
+ l2 = "abcdefghijklmnopqrstxy\n"
+ l3 = "z\n"
+ d = "%s%s%s\n" % (l1, l2, l3)
+
+ l1p = l1[:5]
+
+ # full size
+ # size is without terminating newline
+ def mkfull():
+ return Git.CatFileContentStream(len(d)-1, StringIO(d))
+
+ ts = 5
+ def mktiny():
+ return Git.CatFileContentStream(ts, StringIO(d))
+
+ # readlines no limit
+ s = mkfull()
+ lines = s.readlines()
+ assert len(lines) == 3 and lines[-1].endswith('\n')
+ assert s._stream.tell() == len(d) # must have scrubbed to the end
+
+ # realines line limit
+ s = mkfull()
+ lines = s.readlines(5)
+ assert len(lines) == 1
+
+ # readlines on tiny sections
+ s = mktiny()
+ lines = s.readlines()
+ assert len(lines) == 1 and lines[0] == l1p
+ assert s._stream.tell() == ts+1
+
+ # readline no limit
+ s = mkfull()
+ assert s.readline() == l1
+ assert s.readline() == l2
+ assert s.readline() == l3
+ assert s.readline() == ''
+ assert s._stream.tell() == len(d)
+
+ # readline limit
+ s = mkfull()
+ assert s.readline(5) == l1p
+ assert s.readline() == l1[5:]
+
+ # readline on tiny section
+ s = mktiny()
+ assert s.readline() == l1p
+ assert s.readline() == ''
+ assert s._stream.tell() == ts+1
+
+ # read no limit
+ s = mkfull()
+ assert s.read() == d[:-1]
+ assert s.read() == ''
+ assert s._stream.tell() == len(d)
+
+ # read limit
+ s = mkfull()
+ assert s.read(5) == l1p
+ assert s.read(6) == l1[5:]
+ assert s._stream.tell() == 5 + 6 # its not yet done
+
+ # read tiny
+ s = mktiny()
+ assert s.read(2) == l1[:2]
+ assert s._stream.tell() == 2
+ assert s.read() == l1[2:ts]
+ assert s._stream.tell() == ts+1
diff --git a/test/git/test_utils.py b/test/git/test_utils.py
index f843c12e..83ef7e4b 100644
--- a/test/git/test_utils.py
+++ b/test/git/test_utils.py
@@ -9,112 +9,144 @@ import tempfile
from test.testlib import *
from git.utils import *
+from git.objects.utils import *
from git import *
from git.cmd import dashify
import time
class TestUtils(TestCase):
- def setup(self):
- self.testdict = {
- "string": "42",
- "int": 42,
- "array": [ 42 ],
- }
+ def setup(self):
+ self.testdict = {
+ "string": "42",
+ "int": 42,
+ "array": [ 42 ],
+ }
- def test_it_should_dashify(self):
- assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
- assert_equal('foo', dashify('foo'))
-
-
- def test_lock_file(self):
- my_file = tempfile.mktemp()
- lock_file = LockFile(my_file)
- assert not lock_file._has_lock()
- # release lock we don't have - fine
- lock_file._release_lock()
-
- # get lock
- lock_file._obtain_lock_or_raise()
- assert lock_file._has_lock()
-
- # concurrent access
- other_lock_file = LockFile(my_file)
- assert not other_lock_file._has_lock()
- self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
-
- lock_file._release_lock()
- assert not lock_file._has_lock()
-
- other_lock_file._obtain_lock_or_raise()
- self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
-
- # auto-release on destruction
- del(other_lock_file)
- lock_file._obtain_lock_or_raise()
- lock_file._release_lock()
-
- def test_blocking_lock_file(self):
- my_file = tempfile.mktemp()
- lock_file = BlockingLockFile(my_file)
- lock_file._obtain_lock()
-
- # next one waits for the lock
- start = time.time()
- wait_time = 0.1
- wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
- self.failUnlessRaises(IOError, wait_lock._obtain_lock)
- elapsed = time.time() - start
- assert elapsed <= wait_time + 0.02 # some extra time it may cost
-
- def _cmp_contents(self, file_path, data):
- # raise if data from file at file_path
- # does not match data string
- fp = open(file_path, "rb")
- try:
- assert fp.read() == data
- finally:
- fp.close()
-
- def test_safe_operation(self):
- my_file = tempfile.mktemp()
- orig_data = "hello"
- new_data = "world"
- my_file_fp = open(my_file, "wb")
- my_file_fp.write(orig_data)
- my_file_fp.close()
-
- try:
- cwrite = ConcurrentWriteOperation(my_file)
-
- # didn't start writing, doesnt matter
- cwrite._end_writing(False)
- cwrite._end_writing(True)
- assert not cwrite._is_writing()
-
- # write data and fail
- stream = cwrite._begin_writing()
- assert cwrite._is_writing()
- stream.write(new_data)
- cwrite._end_writing(successful=False)
- self._cmp_contents(my_file, orig_data)
- assert not os.path.exists(stream.name)
-
- # write data - concurrently
- ocwrite = ConcurrentWriteOperation(my_file)
- stream = cwrite._begin_writing()
- self.failUnlessRaises(IOError, ocwrite._begin_writing)
-
- stream.write("world")
- cwrite._end_writing(successful=True)
- self._cmp_contents(my_file, new_data)
- assert not os.path.exists(stream.name)
-
- # could test automatic _end_writing on destruction
- finally:
- os.remove(my_file)
- # END final cleanup
-
-
-
-
+ def test_it_should_dashify(self):
+ assert_equal('this-is-my-argument', dashify('this_is_my_argument'))
+ assert_equal('foo', dashify('foo'))
+
+
+ def test_lock_file(self):
+ my_file = tempfile.mktemp()
+ lock_file = LockFile(my_file)
+ assert not lock_file._has_lock()
+ # release lock we don't have - fine
+ lock_file._release_lock()
+
+ # get lock
+ lock_file._obtain_lock_or_raise()
+ assert lock_file._has_lock()
+
+ # concurrent access
+ other_lock_file = LockFile(my_file)
+ assert not other_lock_file._has_lock()
+ self.failUnlessRaises(IOError, other_lock_file._obtain_lock_or_raise)
+
+ lock_file._release_lock()
+ assert not lock_file._has_lock()
+
+ other_lock_file._obtain_lock_or_raise()
+ self.failUnlessRaises(IOError, lock_file._obtain_lock_or_raise)
+
+ # auto-release on destruction
+ del(other_lock_file)
+ lock_file._obtain_lock_or_raise()
+ lock_file._release_lock()
+
+ def test_blocking_lock_file(self):
+ my_file = tempfile.mktemp()
+ lock_file = BlockingLockFile(my_file)
+ lock_file._obtain_lock()
+
+ # next one waits for the lock
+ start = time.time()
+ wait_time = 0.1
+ wait_lock = BlockingLockFile(my_file, 0.05, wait_time)
+ self.failUnlessRaises(IOError, wait_lock._obtain_lock)
+ elapsed = time.time() - start
+ assert elapsed <= wait_time + 0.02 # some extra time it may cost
+
+ def _cmp_contents(self, file_path, data):
+ # raise if data from file at file_path
+ # does not match data string
+ fp = open(file_path, "rb")
+ try:
+ assert fp.read() == data
+ finally:
+ fp.close()
+
+ def test_safe_operation(self):
+ my_file = tempfile.mktemp()
+ orig_data = "hello"
+ new_data = "world"
+ my_file_fp = open(my_file, "wb")
+ my_file_fp.write(orig_data)
+ my_file_fp.close()
+
+ try:
+ cwrite = ConcurrentWriteOperation(my_file)
+
+ # didn't start writing, doesnt matter
+ cwrite._end_writing(False)
+ cwrite._end_writing(True)
+ assert not cwrite._is_writing()
+
+ # write data and fail
+ stream = cwrite._begin_writing()
+ assert cwrite._is_writing()
+ stream.write(new_data)
+ cwrite._end_writing(successful=False)
+ self._cmp_contents(my_file, orig_data)
+ assert not os.path.exists(stream.name)
+
+ # write data - concurrently
+ ocwrite = ConcurrentWriteOperation(my_file)
+ stream = cwrite._begin_writing()
+ self.failUnlessRaises(IOError, ocwrite._begin_writing)
+
+ stream.write("world")
+ cwrite._end_writing(successful=True)
+ self._cmp_contents(my_file, new_data)
+ assert not os.path.exists(stream.name)
+
+ # could test automatic _end_writing on destruction
+ finally:
+ os.remove(my_file)
+ # END final cleanup
+
+ def test_user_id(self):
+ assert '@' in get_user_id()
+
+ def test_parse_date(self):
+ # test all supported formats
+ def assert_rval(rval, veri_time, offset=0):
+ assert len(rval) == 2
+ assert isinstance(rval[0], int) and isinstance(rval[1], int)
+ assert rval[0] == veri_time
+ assert rval[1] == offset
+
+ # now that we are here, test our conversion functions as well
+ utctz = altz_to_utctz_str(offset)
+ assert isinstance(utctz, basestring)
+ assert utctz_to_altz(verify_utctz(utctz)) == offset
+ # END assert rval utility
+
+ rfc = ("Thu, 07 Apr 2005 22:13:11 +0000", 0)
+ iso = ("2005-04-07T22:13:11 -0200", 7200)
+ iso2 = ("2005-04-07 22:13:11 +0400", -14400)
+ iso3 = ("2005.04.07 22:13:11 -0000", 0)
+ alt = ("04/07/2005 22:13:11", 0)
+ alt2 = ("07.04.2005 22:13:11", 0)
+ veri_time = 1112904791 # the time this represents
+ for date, offset in (rfc, iso, iso2, iso3, alt, alt2):
+ assert_rval(parse_date(date), veri_time, offset)
+ # END for each date type
+
+ # and failure
+ self.failUnlessRaises(ValueError, parse_date, 'invalid format')
+ self.failUnlessRaises(ValueError, parse_date, '123456789 -02000')
+ self.failUnlessRaises(ValueError, parse_date, ' 123456789 -0200')
+
+
diff --git a/test/testlib/helper.py b/test/testlib/helper.py
index 9c38ffd5..c9b4c2ac 100644
--- a/test/testlib/helper.py
+++ b/test/testlib/helper.py
@@ -9,6 +9,7 @@ from git import Repo, Remote, GitCommandError
from unittest import TestCase
import tempfile
import shutil
+import cStringIO
GIT_REPO = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
@@ -23,40 +24,13 @@ def absolute_project_path():
return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
-class ListProcessAdapter(object):
- """Allows to use lists as Process object as returned by SubProcess.Popen.
+class StringProcessAdapter(object):
+ """Allows to use strings as Process object as returned by SubProcess.Popen.
Its tailored to work with the test system only"""
- class Stream(object):
- """Simple stream emulater meant to work only with tests"""
- def __init__(self, data):
- self.data = data
- self.cur_iter = None
-
- def __iter__(self):
- dat = self.data
- if isinstance(dat, basestring):
- dat = dat.splitlines()
- if self.cur_iter is None:
- self.cur_iter = iter(dat)
- return self.cur_iter
-
- def read(self):
- dat = self.data
- if isinstance(dat, (tuple,list)):
- dat = "\n".join(dat)
- return dat
-
- def next(self):
- if self.cur_iter is None:
- self.cur_iter = iter(self)
- return self.cur_iter.next()
-
- # END stream
-
- def __init__(self, input_list_or_string):
- self.stdout = self.Stream(input_list_or_string)
- self.stderr = self.Stream('')
+ def __init__(self, input_string):
+ self.stdout = cStringIO.StringIO(input_string)
+ self.stderr = cStringIO.StringIO()
def wait(self):
return 0