diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-03-04 09:06:03 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-03-04 09:06:03 +0100 |
commit | 72bcdbd0a0c8cc6aa2a7433169aa49c7fc19b55b (patch) | |
tree | 09e28dfbad901791a8306e890f69a6b737d20a03 /lib/git | |
parent | 4956c1e618bfcfeef86c6ea90c22dd04ca81b9db (diff) | |
download | gitpython-72bcdbd0a0c8cc6aa2a7433169aa49c7fc19b55b.tar.gz |
Converted all tabs to 4 space characters each to comply with pep8
Diffstat (limited to 'lib/git')
-rw-r--r-- | lib/git/__init__.py | 2 | ||||
-rw-r--r-- | lib/git/actor.py | 106 | ||||
-rw-r--r-- | lib/git/cmd.py | 742 | ||||
-rw-r--r-- | lib/git/config.py | 784 | ||||
-rw-r--r-- | lib/git/diff.py | 668 | ||||
-rw-r--r-- | lib/git/errors.py | 34 | ||||
-rw-r--r-- | lib/git/index.py | 2644 | ||||
-rw-r--r-- | lib/git/objects/__init__.py | 2 | ||||
-rw-r--r-- | lib/git/objects/base.py | 416 | ||||
-rw-r--r-- | lib/git/objects/blob.py | 42 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 698 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 124 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 454 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 296 | ||||
-rw-r--r-- | lib/git/refs.py | 1826 | ||||
-rw-r--r-- | lib/git/remote.py | 1520 | ||||
-rw-r--r-- | lib/git/repo.py | 1522 | ||||
-rw-r--r-- | lib/git/stats.py | 104 | ||||
-rw-r--r-- | lib/git/utils.py | 716 |
19 files changed, 6350 insertions, 6350 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py index 75eba3c9..aac539eb 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -23,4 +23,4 @@ from git.index import * from git.utils import LockFile, BlockingLockFile __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ] + if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/lib/git/actor.py b/lib/git/actor.py index 5de55b8d..b5426f21 100644 --- a/lib/git/actor.py +++ b/lib/git/actor.py @@ -7,56 +7,56 @@ import re class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - # precompiled regex - name_only_regex = re.compile( r'<(.+)>' ) - name_email_regex = re.compile( r'(.*) <(.+?)>' ) - - def __init__(self, name, email): - self.name = name - self.email = email - - def __eq__(self, other): - return self.name == other.name and self.email == other.email - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.name, self.email)) - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.Actor "%s <%s>">' % (self.name, self.email) - - @classmethod - def _from_string(cls, string): - """ - Create an Actor from a string. - - ``str`` - is the string, which is expected to be in regular git format - - Format - John Doe <jdoe@example.com> - - Returns - Actor - """ - m = cls.name_email_regex.search(string) - if m: - name, email = m.groups() - return Actor(name, email) - else: - m = cls.name_only_regex.search(string) - if m: - return Actor(m.group(1), None) - else: - # assume best and use the whole string as name - return Actor(string, None) - # END special case name - # END handle name/email matching + """Actors hold information about a person acting on the repository. They + can be committers and authors or anything with a name and an email as + mentioned in the git log entries.""" + # precompiled regex + name_only_regex = re.compile( r'<(.+)>' ) + name_email_regex = re.compile( r'(.*) <(.+?)>' ) + + def __init__(self, name, email): + self.name = name + self.email = email + + def __eq__(self, other): + return self.name == other.name and self.email == other.email + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + return hash((self.name, self.email)) + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.Actor "%s <%s>">' % (self.name, self.email) + + @classmethod + def _from_string(cls, string): + """ + Create an Actor from a string. + + ``str`` + is the string, which is expected to be in regular git format + + Format + John Doe <jdoe@example.com> + + Returns + Actor + """ + m = cls.name_email_regex.search(string) + if m: + name, email = m.groups() + return Actor(name, email) + else: + m = cls.name_only_regex.search(string) + if m: + return Actor(m.group(1), None) + else: + # assume best and use the whole string as name + return Actor(string, None) + # END special case name + # END handle name/email matching diff --git a/lib/git/cmd.py b/lib/git/cmd.py index e361e772..60912142 100644 --- a/lib/git/cmd.py +++ b/lib/git/cmd.py @@ -13,8 +13,8 @@ from errors import GitCommandError GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'as_process', - 'output_stream' ) + 'with_exceptions', 'as_process', + 'output_stream' ) extra = {} # NOTE: Execution through a shell on windows appears to be slightly faster, but in fact @@ -23,401 +23,401 @@ extra = {} # between the OS which is why the shell should not be used ( unless it does not work # otherwise ) #if sys.platform == 'win32': -# extra = {'shell': False} +# extra = {'shell': False} def dashify(string): - return string.replace('_', '-') + return string.replace('_', '-') class Git(object): - """ - The Git class manages communication with the Git binary. - - It provides a convenient interface to calling the Git binary, such as in:: - - g = Git( git_dir ) - g.init() # calls 'git init' program - rval = g.ls_files() # calls 'git ls-files' program - - ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. - """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header") - - class AutoInterrupt(object): - """ - Kill/Interrupt the stored process instance once this instance goes out of scope. It is - used to prevent processes piling up in case iterators stop reading. - Besides all attributes are wired through to the contained process object. - - The wait method was overridden to perform automatic status code checking - and possibly raise. - """ - __slots__= ("proc", "args") - - def __init__(self, proc, args ): - self.proc = proc - self.args = args - - def __del__(self): - # did the process finish already so we have a return code ? - if self.proc.poll() is not None: - return - - # try to kill it - try: - os.kill(self.proc.pid, 2) # interrupt signal - except AttributeError: - # try windows - # for some reason, providing None for stdout/stderr still prints something. This is why - # we simply use the shell and redirect to nul. Its slower than CreateProcess, question - # is whether we really want to see all these messages. Its annoying no matter what. - subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) - # END exception handling - - def __getattr__(self, attr): - return getattr(self.proc, attr) - - def wait(self): - """ - Wait for the process and return its status code. - - Raise - GitCommandError if the return status is not 0 - """ - status = self.proc.wait() - if status != 0: - raise GitCommandError(self.args, status, self.proc.stderr.read()) - # END status handling - return status - - - - def __init__(self, working_dir=None): - """ - Initialize this instance with: - - ``working_dir`` - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd(). - It is meant to be the working tree directory if available, or the - .git directory in case of bare repositories. - """ - super(Git, self).__init__() - self._working_dir = working_dir - - # cached command slots - self.cat_file_header = None - self.cat_file_all = None + """ + The Git class manages communication with the Git binary. + + It provides a convenient interface to calling the Git binary, such as in:: + + g = Git( git_dir ) + g.init() # calls 'git init' program + rval = g.ls_files() # calls 'git ls-files' program + + ``Debugging`` + Set the GIT_PYTHON_TRACE environment variable print each invocation + of the command to stdout. + Set its value to 'full' to see details about the returned values. + """ + __slots__ = ("_working_dir", "cat_file_all", "cat_file_header") + + class AutoInterrupt(object): + """ + Kill/Interrupt the stored process instance once this instance goes out of scope. It is + used to prevent processes piling up in case iterators stop reading. + Besides all attributes are wired through to the contained process object. + + The wait method was overridden to perform automatic status code checking + and possibly raise. + """ + __slots__= ("proc", "args") + + def __init__(self, proc, args ): + self.proc = proc + self.args = args + + def __del__(self): + # did the process finish already so we have a return code ? + if self.proc.poll() is not None: + return + + # try to kill it + try: + os.kill(self.proc.pid, 2) # interrupt signal + except AttributeError: + # try windows + # for some reason, providing None for stdout/stderr still prints something. This is why + # we simply use the shell and redirect to nul. Its slower than CreateProcess, question + # is whether we really want to see all these messages. Its annoying no matter what. + subprocess.call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) + # END exception handling + + def __getattr__(self, attr): + return getattr(self.proc, attr) + + def wait(self): + """ + Wait for the process and return its status code. + + Raise + GitCommandError if the return status is not 0 + """ + status = self.proc.wait() + if status != 0: + raise GitCommandError(self.args, status, self.proc.stderr.read()) + # END status handling + return status + + + + def __init__(self, working_dir=None): + """ + Initialize this instance with: + + ``working_dir`` + Git directory we should work in. If None, we always work in the current + directory as returned by os.getcwd(). + It is meant to be the working tree directory if available, or the + .git directory in case of bare repositories. + """ + super(Git, self).__init__() + self._working_dir = working_dir + + # cached command slots + self.cat_file_header = None + self.cat_file_all = None - def __getattr__(self, name): - """ - A convenience method as it allows to call the command as if it was - an object. - Returns - Callable object that will execute call _call_process with your arguments. - """ - if name[:1] == '_': - raise AttributeError(name) - return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) + def __getattr__(self, name): + """ + A convenience method as it allows to call the command as if it was + an object. + Returns + Callable object that will execute call _call_process with your arguments. + """ + if name[:1] == '_': + raise AttributeError(name) + return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - @property - def working_dir(self): - """ - Returns - Git directory we are working on - """ - return self._working_dir + @property + def working_dir(self): + """ + Returns + Git directory we are working on + """ + return self._working_dir - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - as_process=False, - output_stream=None - ): - """ - Handles executing the command on the shell and consumes and returns - the returned information (stdout) + def execute(self, command, + istream=None, + with_keep_cwd=False, + with_extended_output=False, + with_exceptions=True, + as_process=False, + output_stream=None + ): + """ + Handles executing the command on the shell and consumes and returns + the returned information (stdout) - ``command`` - The command argument list to execute. - It should be a string, or a sequence of program arguments. The - program to execute is the first item in the args sequence or string. + ``command`` + The command argument list to execute. + It should be a string, or a sequence of program arguments. The + program to execute is the first item in the args sequence or string. - ``istream`` - Standard input filehandle passed to subprocess.Popen. + ``istream`` + Standard input filehandle passed to subprocess.Popen. - ``with_keep_cwd`` - Whether to use the current working directory from os.getcwd(). - The cmd otherwise uses its own working_dir that it has been initialized - with if possible. + ``with_keep_cwd`` + Whether to use the current working directory from os.getcwd(). + The cmd otherwise uses its own working_dir that it has been initialized + with if possible. - ``with_extended_output`` - Whether to return a (status, stdout, stderr) tuple. + ``with_extended_output`` + Whether to return a (status, stdout, stderr) tuple. - ``with_exceptions`` - Whether to raise an exception when git returns a non-zero status. + ``with_exceptions`` + Whether to raise an exception when git returns a non-zero status. - ``as_process`` - Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output and - with_exceptions ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. - - ``output_stream`` - If set to a file-like object, data produced by the git command will be - output to the given stream directly. - This feature only has any effect if as_process is False. Processes will - always be created with a pipe due to issues with subprocess. - This merely is a workaround as data will be copied from the - output pipe to the given output stream directly. - - - Returns:: - - str(output) # extended_output = False (Default) - tuple(int(status), str(stdout), str(stderr)) # extended_output = True - - if ouput_stream is True, the stdout value will be your output stream: - output_stream # extended_output = False - tuple(int(status), output_stream, str(stderr))# extended_output = True - - Raise - GitCommandError - - NOTE - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module. - """ - if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': - print ' '.join(command) + ``as_process`` + Whether to return the created process instance directly from which + streams can be read on demand. This will render with_extended_output and + with_exceptions ineffective - the caller will have + to deal with the details himself. + It is important to note that the process will be placed into an AutoInterrupt + wrapper that will interrupt the process once it goes out of scope. If you + use the command in iterators, you should pass the whole process instance + instead of a single stream. + + ``output_stream`` + If set to a file-like object, data produced by the git command will be + output to the given stream directly. + This feature only has any effect if as_process is False. Processes will + always be created with a pipe due to issues with subprocess. + This merely is a workaround as data will be copied from the + output pipe to the given output stream directly. + + + Returns:: + + str(output) # extended_output = False (Default) + tuple(int(status), str(stdout), str(stderr)) # extended_output = True + + if ouput_stream is True, the stdout value will be your output stream: + output_stream # extended_output = False + tuple(int(status), output_stream, str(stderr))# extended_output = True + + Raise + GitCommandError + + NOTE + If you add additional keyword arguments to the signature of this method, + you must update the execute_kwargs tuple housed in this module. + """ + if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': + print ' '.join(command) - # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self._working_dir is None: - cwd = os.getcwd() - else: - cwd=self._working_dir - - # Start the process - proc = subprocess.Popen(command, - cwd=cwd, - stdin=istream, - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - **extra - ) - if as_process: - return self.AutoInterrupt(proc, command) - - # Wait for the process to return - status = 0 - stdout_value = '' - stderr_value = '' - try: - if output_stream is None: - stdout_value = proc.stdout.read().rstrip() # strip trailing "\n" - else: - max_chunk_size = 1024*64 - while True: - chunk = proc.stdout.read(max_chunk_size) - output_stream.write(chunk) - if len(chunk) < max_chunk_size: - break - # END reading output stream - stdout_value = output_stream - # END stdout handling - stderr_value = proc.stderr.read().rstrip() # strip trailing "\n" - - # waiting here should do nothing as we have finished stream reading - status = proc.wait() - finally: - proc.stdout.close() - proc.stderr.close() + # Allow the user to have the command executed in their working dir. + if with_keep_cwd or self._working_dir is None: + cwd = os.getcwd() + else: + cwd=self._working_dir + + # Start the process + proc = subprocess.Popen(command, + cwd=cwd, + stdin=istream, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + **extra + ) + if as_process: + return self.AutoInterrupt(proc, command) + + # Wait for the process to return + status = 0 + stdout_value = '' + stderr_value = '' + try: + if output_stream is None: + stdout_value = proc.stdout.read().rstrip() # strip trailing "\n" + else: + max_chunk_size = 1024*64 + while True: + chunk = proc.stdout.read(max_chunk_size) + output_stream.write(chunk) + if len(chunk) < max_chunk_size: + break + # END reading output stream + stdout_value = output_stream + # END stdout handling + stderr_value = proc.stderr.read().rstrip() # strip trailing "\n" + + # waiting here should do nothing as we have finished stream reading + status = proc.wait() + finally: + proc.stdout.close() + proc.stderr.close() - if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value) + if with_exceptions and status != 0: + raise GitCommandError(command, status, stderr_value) - if GIT_PYTHON_TRACE == 'full': - if stderr_value: - print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) - elif stdout_value: - print "%s -> %d: '%s'" % (command, status, stdout_value) - else: - print "%s -> %d" % (command, status) + if GIT_PYTHON_TRACE == 'full': + if stderr_value: + print "%s -> %d: '%s' !! '%s'" % (command, status, stdout_value, stderr_value) + elif stdout_value: + print "%s -> %d: '%s'" % (command, status, stdout_value) + else: + print "%s -> %d" % (command, status) - # Allow access to the command's status code - if with_extended_output: - return (status, stdout_value, stderr_value) - else: - return stdout_value + # Allow access to the command's status code + if with_extended_output: + return (status, stdout_value, stderr_value) + else: + return stdout_value - def transform_kwargs(self, **kwargs): - """ - Transforms Python style kwargs into git command line options. - """ - args = [] - for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - args.append("-%s%s" % (k, v)) - else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) - return args + def transform_kwargs(self, **kwargs): + """ + Transforms Python style kwargs into git command line options. + """ + args = [] + for k, v in kwargs.items(): + if len(k) == 1: + if v is True: + args.append("-%s" % k) + elif type(v) is not bool: + args.append("-%s%s" % (k, v)) + else: + if v is True: + args.append("--%s" % dashify(k)) + elif type(v) is not bool: + args.append("--%s=%s" % (dashify(k), v)) + return args - @classmethod - def __unpack_args(cls, arg_list): - if not isinstance(arg_list, (list,tuple)): - return [ str(arg_list) ] - - outlist = list() - for arg in arg_list: - if isinstance(arg_list, (list, tuple)): - outlist.extend(cls.__unpack_args( arg )) - # END recursion - else: - outlist.append(str(arg)) - # END for each arg - return outlist + @classmethod + def __unpack_args(cls, arg_list): + if not isinstance(arg_list, (list,tuple)): + return [ str(arg_list) ] + + outlist = list() + for arg in arg_list: + if isinstance(arg_list, (list, tuple)): + outlist.extend(cls.__unpack_args( arg )) + # END recursion + else: + outlist.append(str(arg)) + # END for each arg + return outlist - def _call_process(self, method, *args, **kwargs): - """ - Run the given git command with the specified arguments and return - the result as a String + def _call_process(self, method, *args, **kwargs): + """ + Run the given git command with the specified arguments and return + the result as a String - ``method`` - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. + ``method`` + is the command. Contained "_" characters will be converted to dashes, + such as in 'ls_files' to call 'ls-files'. - ``args`` - is the list of arguments. If None is included, it will be pruned. - This allows your commands to call git more conveniently as None - is realized as non-existent + ``args`` + is the list of arguments. If None is included, it will be pruned. + This allows your commands to call git more conveniently as None + is realized as non-existent - ``kwargs`` - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). + ``kwargs`` + is a dict of keyword arguments. + This function accepts the same optional keyword arguments + as execute(). - Examples:: - git.rev_list('master', max_count=10, header=True) + Examples:: + git.rev_list('master', max_count=10, header=True) - Returns - Same as execute() - """ + Returns + Same as execute() + """ - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = {} - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass + # Handle optional arguments prior to calling transform_kwargs + # otherwise these'll end up in args, which is bad. + _kwargs = {} + for kwarg in execute_kwargs: + try: + _kwargs[kwarg] = kwargs.pop(kwarg) + except KeyError: + pass - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) - - ext_args = self.__unpack_args([a for a in args if a is not None]) - args = opt_args + ext_args + # Prepare the argument list + opt_args = self.transform_kwargs(**kwargs) + + ext_args = self.__unpack_args([a for a in args if a is not None]) + args = opt_args + ext_args - call = ["git", dashify(method)] - call.extend(args) + call = ["git", dashify(method)] + call.extend(args) - return self.execute(call, **_kwargs) - - def _parse_object_header(self, header_line): - """ - ``header_line`` - <hex_sha> type_string size_as_int - - Returns - (hex_sha, type_string, size_as_int) - - Raises - ValueError if the header contains indication for an error due to incorrect - input sha - """ - tokens = header_line.split() - if len(tokens) != 3: - raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) ) - if len(tokens[0]) != 40: - raise ValueError("Failed to parse header: %r" % header_line) - return (tokens[0], tokens[1], int(tokens[2])) - - def __prepare_ref(self, ref): - # required for command to separate refs on stdin - refstr = str(ref) # could be ref-object - if refstr.endswith("\n"): - return refstr - return refstr + "\n" - - def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): - cur_val = getattr(self, attr_name) - if cur_val is not None: - return cur_val - - options = { "istream" : subprocess.PIPE, "as_process" : True } - options.update( kwargs ) - - cmd = self._call_process( cmd_name, *args, **options ) - setattr(self, attr_name, cmd ) - return cmd - - def __get_object_header(self, cmd, ref): - cmd.stdin.write(self.__prepare_ref(ref)) - cmd.stdin.flush() - return self._parse_object_header(cmd.stdout.readline()) - - def get_object_header(self, ref): - """ - Use this method to quickly examine the type and size of the object behind - the given ref. - - NOTE - The method will only suffer from the costs of command invocation - once and reuses the command in subsequent calls. - - Return: - (hexsha, type_string, size_as_int) - """ - cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) - return self.__get_object_header(cmd, ref) - - def get_object_data(self, ref): - """ - As get_object_header, but returns object data as well - - Return: - (hexsha, type_string, size_as_int,data_string) - """ - cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) - hexsha, typename, size = self.__get_object_header(cmd, ref) - data = cmd.stdout.read(size) - cmd.stdout.read(1) # finishing newlines - - return (hexsha, typename, size, data) - - def clear_cache(self): - """ - Clear all kinds of internal caches to release resources. - - Currently persistent commands will be interrupted. - - Returns - self - """ - self.cat_file_all = None - self.cat_file_header = None - return self + return self.execute(call, **_kwargs) + + def _parse_object_header(self, header_line): + """ + ``header_line`` + <hex_sha> type_string size_as_int + + Returns + (hex_sha, type_string, size_as_int) + + Raises + ValueError if the header contains indication for an error due to incorrect + input sha + """ + tokens = header_line.split() + if len(tokens) != 3: + raise ValueError("SHA named %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip()) ) + if len(tokens[0]) != 40: + raise ValueError("Failed to parse header: %r" % header_line) + return (tokens[0], tokens[1], int(tokens[2])) + + def __prepare_ref(self, ref): + # required for command to separate refs on stdin + refstr = str(ref) # could be ref-object + if refstr.endswith("\n"): + return refstr + return refstr + "\n" + + def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): + cur_val = getattr(self, attr_name) + if cur_val is not None: + return cur_val + + options = { "istream" : subprocess.PIPE, "as_process" : True } + options.update( kwargs ) + + cmd = self._call_process( cmd_name, *args, **options ) + setattr(self, attr_name, cmd ) + return cmd + + def __get_object_header(self, cmd, ref): + cmd.stdin.write(self.__prepare_ref(ref)) + cmd.stdin.flush() + return self._parse_object_header(cmd.stdout.readline()) + + def get_object_header(self, ref): + """ + Use this method to quickly examine the type and size of the object behind + the given ref. + + NOTE + The method will only suffer from the costs of command invocation + once and reuses the command in subsequent calls. + + Return: + (hexsha, type_string, size_as_int) + """ + cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) + return self.__get_object_header(cmd, ref) + + def get_object_data(self, ref): + """ + As get_object_header, but returns object data as well + + Return: + (hexsha, type_string, size_as_int,data_string) + """ + cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) + hexsha, typename, size = self.__get_object_header(cmd, ref) + data = cmd.stdout.read(size) + cmd.stdout.read(1) # finishing newlines + + return (hexsha, typename, size, data) + + def clear_cache(self): + """ + Clear all kinds of internal caches to release resources. + + Currently persistent commands will be interrupted. + + Returns + self + """ + self.cat_file_all = None + self.cat_file_header = None + return self diff --git a/lib/git/config.py b/lib/git/config.py index 6228ebfa..e5fd9902 100644 --- a/lib/git/config.py +++ b/lib/git/config.py @@ -18,403 +18,403 @@ from git.odict import OrderedDict from git.utils import LockFile class _MetaParserBuilder(type): - """ - Utlity class wrapping base-class methods into decorators that assure read-only properties - """ - def __new__(metacls, name, bases, clsdict): - """ - Equip all base-class methods with a _needs_values decorator, and all non-const methods - with a _set_dirty_and_flush_changes decorator in addition to that. - """ - mutating_methods = clsdict['_mutating_methods_'] - for base in bases: - methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) - for name, method in methods: - if name in clsdict: - continue - method_with_values = _needs_values(method) - if name in mutating_methods: - method_with_values = _set_dirty_and_flush_changes(method_with_values) - # END mutating methods handling - - clsdict[name] = method_with_values - # END for each base - - new_type = super(_MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) - return new_type - - + """ + Utlity class wrapping base-class methods into decorators that assure read-only properties + """ + def __new__(metacls, name, bases, clsdict): + """ + Equip all base-class methods with a _needs_values decorator, and all non-const methods + with a _set_dirty_and_flush_changes decorator in addition to that. + """ + mutating_methods = clsdict['_mutating_methods_'] + for base in bases: + methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) + for name, method in methods: + if name in clsdict: + continue + method_with_values = _needs_values(method) + if name in mutating_methods: + method_with_values = _set_dirty_and_flush_changes(method_with_values) + # END mutating methods handling + + clsdict[name] = method_with_values + # END for each base + + new_type = super(_MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) + return new_type + + def _needs_values(func): - """ - Returns method assuring we read values (on demand) before we try to access them - """ - def assure_data_present(self, *args, **kwargs): - self.read() - return func(self, *args, **kwargs) - # END wrapper method - assure_data_present.__name__ = func.__name__ - return assure_data_present - + """ + Returns method assuring we read values (on demand) before we try to access them + """ + def assure_data_present(self, *args, **kwargs): + self.read() + return func(self, *args, **kwargs) + # END wrapper method + assure_data_present.__name__ = func.__name__ + return assure_data_present + def _set_dirty_and_flush_changes(non_const_func): - """ - Return method that checks whether given non constant function may be called. - If so, the instance will be set dirty. - Additionally, we flush the changes right to disk - """ - def flush_changes(self, *args, **kwargs): - rval = non_const_func(self, *args, **kwargs) - self.write() - return rval - # END wrapper method - flush_changes.__name__ = non_const_func.__name__ - return flush_changes - - + """ + Return method that checks whether given non constant function may be called. + If so, the instance will be set dirty. + Additionally, we flush the changes right to disk + """ + def flush_changes(self, *args, **kwargs): + rval = non_const_func(self, *args, **kwargs) + self.write() + return rval + # END wrapper method + flush_changes.__name__ = non_const_func.__name__ + return flush_changes + + class GitConfigParser(cp.RawConfigParser, object): - """ - Implements specifics required to read git style configuration files. - - This variation behaves much like the git.config command such that the configuration - will be read on demand based on the filepath given during initialization. - - The changes will automatically be written once the instance goes out of scope, but - can be triggered manually as well. - - The configuration file will be locked if you intend to change values preventing other - instances to write concurrently. - - NOTE - The config is case-sensitive even when queried, hence section and option names - must match perfectly. - """ - __metaclass__ = _MetaParserBuilder - - - #{ Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile - t_lock = LockFile - - #} END configuration - - OPTCRE = re.compile( - r'\s?(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace - r'\s*(?P<vi>[:=])\s*' # any number of space/tab, - # followed by separator - # (either : or =), followed - # by any # space/tab - r'(?P<value>.*)$' # everything up to eol - ) - - # list of RawConfigParser methods able to change the instance - _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") - __slots__ = ("_sections", "_defaults", "_file_or_files", "_read_only","_is_initialized", '_lock') - - def __init__(self, file_or_files, read_only=True): - """ - Initialize a configuration reader to read the given file_or_files and to - possibly allow changes to it by setting read_only False - - ``file_or_files`` - A single file path or file objects or multiple of these - - ``read_only`` - If True, the ConfigParser may only read the data , but not change it. - If False, only a single file path or file object may be given. - """ - super(GitConfigParser, self).__init__() - # initialize base with ordered dictionaries to be sure we write the same - # file back - self._sections = OrderedDict() - self._defaults = OrderedDict() - - self._file_or_files = file_or_files - self._read_only = read_only - self._is_initialized = False - self._lock = None - - if not read_only: - if isinstance(file_or_files, (tuple, list)): - raise ValueError("Write-ConfigParsers can operate on a single file only, multiple files have been passed") - # END single file check - - if not isinstance(file_or_files, basestring): - file_or_files = file_or_files.name - # END get filename from handle/stream - # initialize lock base - we want to write - self._lock = self.t_lock(file_or_files) - - self._lock._obtain_lock() - # END read-only check - - - def __del__(self): - """ - Write pending changes if required and release locks - """ - # checking for the lock here makes sure we do not raise during write() - # in case an invalid parser was created who could not get a lock - if self.read_only or not self._lock._has_lock(): - return - - try: - try: - self.write() - except IOError,e: - print "Exception during destruction of GitConfigParser: %s" % str(e) - finally: - self._lock._release_lock() - - def optionxform(self, optionstr): - """ - Do not transform options in any way when writing - """ - return optionstr - - def _read(self, fp, fpname): - """ - A direct copy of the py2.4 version of the super class's _read method - to assure it uses ordered dicts. Had to change one line to make it work. - - Future versions have this fixed, but in fact its quite embarassing for the - guys not to have done it right in the first place ! - - Removed big comments to make it more compact. - - Made sure it ignores initial whitespace as git uses tabs - """ - cursect = None # None, or a dictionary - optname = None - lineno = 0 - e = None # None, or an exception - while True: - line = fp.readline() - if not line: - break - lineno = lineno + 1 - # comment or blank line? - if line.strip() == '' or line[0] in '#;': - continue - if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": - # no leading whitespace - continue - else: - # is it a section header? - mo = self.SECTCRE.match(line) - if mo: - sectname = mo.group('header') - if sectname in self._sections: - cursect = self._sections[sectname] - elif sectname == cp.DEFAULTSECT: - cursect = self._defaults - else: - # THE ONLY LINE WE CHANGED ! - cursect = OrderedDict((('__name__', sectname),)) - self._sections[sectname] = cursect - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise cp.MissingSectionHeaderError(fpname, lineno, line) - # an option line? - else: - mo = self.OPTCRE.match(line) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if vi in ('=', ':') and ';' in optval: - pos = optval.find(';') - if pos != -1 and optval[pos-1].isspace(): - optval = optval[:pos] - optval = optval.strip() - if optval == '""': - optval = '' - optname = self.optionxform(optname.rstrip()) - cursect[optname] = optval - else: - if not e: - e = cp.ParsingError(fpname) - e.append(lineno, repr(line)) - # END - # END ? - # END ? - # END while reading - # if any parsing errors occurred, raise an exception - if e: - raise e - - - def read(self): - """ - Reads the data stored in the files we have been initialized with. It will - ignore files that cannot be read, possibly leaving an empty configuration - - Returns - Nothing - - Raises - IOError if a file cannot be handled - """ - if self._is_initialized: - return - - - files_to_read = self._file_or_files - if not isinstance(files_to_read, (tuple, list)): - files_to_read = [ files_to_read ] - - for file_object in files_to_read: - fp = file_object - close_fp = False - # assume a path if it is not a file-object - if not hasattr(file_object, "seek"): - try: - fp = open(file_object) - except IOError,e: - continue - close_fp = True - # END fp handling - - try: - self._read(fp, fp.name) - finally: - if close_fp: - fp.close() - # END read-handling - # END for each file object to read - self._is_initialized = True - - def _write(self, fp): - """Write an .ini-format representation of the configuration state in - git compatible format""" - def write_section(name, section_dict): - fp.write("[%s]\n" % name) - for (key, value) in section_dict.items(): - if key != "__name__": - fp.write("\t%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) - # END if key is not __name__ - # END section writing - - if self._defaults: - write_section(cp.DEFAULTSECT, self._defaults) - map(lambda t: write_section(t[0],t[1]), self._sections.items()) + """ + Implements specifics required to read git style configuration files. + + This variation behaves much like the git.config command such that the configuration + will be read on demand based on the filepath given during initialization. + + The changes will automatically be written once the instance goes out of scope, but + can be triggered manually as well. + + The configuration file will be locked if you intend to change values preventing other + instances to write concurrently. + + NOTE + The config is case-sensitive even when queried, hence section and option names + must match perfectly. + """ + __metaclass__ = _MetaParserBuilder + + + #{ Configuration + # The lock type determines the type of lock to use in new configuration readers. + # They must be compatible to the LockFile interface. + # A suitable alternative would be the BlockingLockFile + t_lock = LockFile + + #} END configuration + + OPTCRE = re.compile( + r'\s?(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace + r'\s*(?P<vi>[:=])\s*' # any number of space/tab, + # followed by separator + # (either : or =), followed + # by any # space/tab + r'(?P<value>.*)$' # everything up to eol + ) + + # list of RawConfigParser methods able to change the instance + _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") + __slots__ = ("_sections", "_defaults", "_file_or_files", "_read_only","_is_initialized", '_lock') + + def __init__(self, file_or_files, read_only=True): + """ + Initialize a configuration reader to read the given file_or_files and to + possibly allow changes to it by setting read_only False + + ``file_or_files`` + A single file path or file objects or multiple of these + + ``read_only`` + If True, the ConfigParser may only read the data , but not change it. + If False, only a single file path or file object may be given. + """ + super(GitConfigParser, self).__init__() + # initialize base with ordered dictionaries to be sure we write the same + # file back + self._sections = OrderedDict() + self._defaults = OrderedDict() + + self._file_or_files = file_or_files + self._read_only = read_only + self._is_initialized = False + self._lock = None + + if not read_only: + if isinstance(file_or_files, (tuple, list)): + raise ValueError("Write-ConfigParsers can operate on a single file only, multiple files have been passed") + # END single file check + + if not isinstance(file_or_files, basestring): + file_or_files = file_or_files.name + # END get filename from handle/stream + # initialize lock base - we want to write + self._lock = self.t_lock(file_or_files) + + self._lock._obtain_lock() + # END read-only check + + + def __del__(self): + """ + Write pending changes if required and release locks + """ + # checking for the lock here makes sure we do not raise during write() + # in case an invalid parser was created who could not get a lock + if self.read_only or not self._lock._has_lock(): + return + + try: + try: + self.write() + except IOError,e: + print "Exception during destruction of GitConfigParser: %s" % str(e) + finally: + self._lock._release_lock() + + def optionxform(self, optionstr): + """ + Do not transform options in any way when writing + """ + return optionstr + + def _read(self, fp, fpname): + """ + A direct copy of the py2.4 version of the super class's _read method + to assure it uses ordered dicts. Had to change one line to make it work. + + Future versions have this fixed, but in fact its quite embarassing for the + guys not to have done it right in the first place ! + + Removed big comments to make it more compact. + + Made sure it ignores initial whitespace as git uses tabs + """ + cursect = None # None, or a dictionary + optname = None + lineno = 0 + e = None # None, or an exception + while True: + line = fp.readline() + if not line: + break + lineno = lineno + 1 + # comment or blank line? + if line.strip() == '' or line[0] in '#;': + continue + if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": + # no leading whitespace + continue + else: + # is it a section header? + mo = self.SECTCRE.match(line) + if mo: + sectname = mo.group('header') + if sectname in self._sections: + cursect = self._sections[sectname] + elif sectname == cp.DEFAULTSECT: + cursect = self._defaults + else: + # THE ONLY LINE WE CHANGED ! + cursect = OrderedDict((('__name__', sectname),)) + self._sections[sectname] = cursect + # So sections can't start with a continuation line + optname = None + # no section header in the file? + elif cursect is None: + raise cp.MissingSectionHeaderError(fpname, lineno, line) + # an option line? + else: + mo = self.OPTCRE.match(line) + if mo: + optname, vi, optval = mo.group('option', 'vi', 'value') + if vi in ('=', ':') and ';' in optval: + pos = optval.find(';') + if pos != -1 and optval[pos-1].isspace(): + optval = optval[:pos] + optval = optval.strip() + if optval == '""': + optval = '' + optname = self.optionxform(optname.rstrip()) + cursect[optname] = optval + else: + if not e: + e = cp.ParsingError(fpname) + e.append(lineno, repr(line)) + # END + # END ? + # END ? + # END while reading + # if any parsing errors occurred, raise an exception + if e: + raise e + + + def read(self): + """ + Reads the data stored in the files we have been initialized with. It will + ignore files that cannot be read, possibly leaving an empty configuration + + Returns + Nothing + + Raises + IOError if a file cannot be handled + """ + if self._is_initialized: + return + + + files_to_read = self._file_or_files + if not isinstance(files_to_read, (tuple, list)): + files_to_read = [ files_to_read ] + + for file_object in files_to_read: + fp = file_object + close_fp = False + # assume a path if it is not a file-object + if not hasattr(file_object, "seek"): + try: + fp = open(file_object) + except IOError,e: + continue + close_fp = True + # END fp handling + + try: + self._read(fp, fp.name) + finally: + if close_fp: + fp.close() + # END read-handling + # END for each file object to read + self._is_initialized = True + + def _write(self, fp): + """Write an .ini-format representation of the configuration state in + git compatible format""" + def write_section(name, section_dict): + fp.write("[%s]\n" % name) + for (key, value) in section_dict.items(): + if key != "__name__": + fp.write("\t%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) + # END if key is not __name__ + # END section writing + + if self._defaults: + write_section(cp.DEFAULTSECT, self._defaults) + map(lambda t: write_section(t[0],t[1]), self._sections.items()) - - @_needs_values - def write(self): - """ - Write changes to our file, if there are changes at all - - Raise - IOError if this is a read-only writer instance or if we could not obtain - a file lock - """ - self._assure_writable("write") - self._lock._obtain_lock() - - - fp = self._file_or_files - close_fp = False - - if not hasattr(fp, "seek"): - fp = open(self._file_or_files, "w") - close_fp = True - else: - fp.seek(0) - - # WRITE DATA - try: - self._write(fp) - finally: - if close_fp: - fp.close() - # END data writing - - # we do not release the lock - it will be done automatically once the - # instance vanishes - - def _assure_writable(self, method_name): - if self.read_only: - raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) - - @_needs_values - @_set_dirty_and_flush_changes - def add_section(self, section): - """ - Assures added options will stay in order - """ - super(GitConfigParser, self).add_section(section) - self._sections[section] = OrderedDict() - - @property - def read_only(self): - """ - Returns - True if this instance may change the configuration file - """ - return self._read_only - - def get_value(self, section, option, default = None): - """ - ``default`` - If not None, the given default value will be returned in case - the option did not exist - Returns - a properly typed value, either int, float or string - Raises TypeError in case the value could not be understood - Otherwise the exceptions known to the ConfigParser will be raised. - """ - try: - valuestr = self.get(section, option) - except Exception: - if default is not None: - return default - raise - - types = ( long, float ) - for numtype in types: - try: - val = numtype( valuestr ) + + @_needs_values + def write(self): + """ + Write changes to our file, if there are changes at all + + Raise + IOError if this is a read-only writer instance or if we could not obtain + a file lock + """ + self._assure_writable("write") + self._lock._obtain_lock() + + + fp = self._file_or_files + close_fp = False + + if not hasattr(fp, "seek"): + fp = open(self._file_or_files, "w") + close_fp = True + else: + fp.seek(0) + + # WRITE DATA + try: + self._write(fp) + finally: + if close_fp: + fp.close() + # END data writing + + # we do not release the lock - it will be done automatically once the + # instance vanishes + + def _assure_writable(self, method_name): + if self.read_only: + raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) + + @_needs_values + @_set_dirty_and_flush_changes + def add_section(self, section): + """ + Assures added options will stay in order + """ + super(GitConfigParser, self).add_section(section) + self._sections[section] = OrderedDict() + + @property + def read_only(self): + """ + Returns + True if this instance may change the configuration file + """ + return self._read_only + + def get_value(self, section, option, default = None): + """ + ``default`` + If not None, the given default value will be returned in case + the option did not exist + Returns + a properly typed value, either int, float or string + Raises TypeError in case the value could not be understood + Otherwise the exceptions known to the ConfigParser will be raised. + """ + try: + valuestr = self.get(section, option) + except Exception: + if default is not None: + return default + raise + + types = ( long, float ) + for numtype in types: + try: + val = numtype( valuestr ) - # truncated value ? - if val != float( valuestr ): - continue + # truncated value ? + if val != float( valuestr ): + continue - return val - except (ValueError,TypeError): - continue - # END for each numeric type - - # try boolean values as git uses them - vl = valuestr.lower() - if vl == 'false': - return False - if vl == 'true': - return True - - if not isinstance( valuestr, basestring ): - raise TypeError( "Invalid value type: only int, long, float and str are allowed", valuestr ) - - return valuestr - - @_needs_values - @_set_dirty_and_flush_changes - def set_value(self, section, option, value): - """Sets the given option in section to the given value. - It will create the section if required, and will not throw as opposed to the default - ConfigParser 'set' method. - - ``section`` - Name of the section in which the option resides or should reside - - ``option`` - Name of the options whose value to set - - ``value`` - Value to set the option to. It must be a string or convertible to a string - """ - if not self.has_section(section): - self.add_section(section) - self.set(section, option, str(value)) + return val + except (ValueError,TypeError): + continue + # END for each numeric type + + # try boolean values as git uses them + vl = valuestr.lower() + if vl == 'false': + return False + if vl == 'true': + return True + + if not isinstance( valuestr, basestring ): + raise TypeError( "Invalid value type: only int, long, float and str are allowed", valuestr ) + + return valuestr + + @_needs_values + @_set_dirty_and_flush_changes + def set_value(self, section, option, value): + """Sets the given option in section to the given value. + It will create the section if required, and will not throw as opposed to the default + ConfigParser 'set' method. + + ``section`` + Name of the section in which the option resides or should reside + + ``option`` + Name of the options whose value to set + + ``value`` + Value to set the option to. It must be a string or convertible to a string + """ + if not self.has_section(section): + self.add_section(section) + self.set(section, option, str(value)) diff --git a/lib/git/diff.py b/lib/git/diff.py index 9b07b5ea..c7c54bdf 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -7,361 +7,361 @@ import re import objects.blob as blob from errors import GitCommandError - + class Diffable(object): - """ - Common interface for all object that can be diffed against another object of compatible type. - - NOTE: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object. - """ - __slots__ = tuple() - - # standin indicating you want to diff against the index - class Index(object): - pass - - def _process_diff_args(self, args): - """ - Returns - possibly altered version of the given args list. - Method is called right before git command execution. - Subclasses can use it to alter the behaviour of the superclass - """ - return args - - def diff(self, other=Index, paths=None, create_patch=False, **kwargs): - """ - Creates diffs between two items being trees, trees and index or an - index and the working tree. + """ + Common interface for all object that can be diffed against another object of compatible type. + + NOTE: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object. + """ + __slots__ = tuple() + + # standin indicating you want to diff against the index + class Index(object): + pass + + def _process_diff_args(self, args): + """ + Returns + possibly altered version of the given args list. + Method is called right before git command execution. + Subclasses can use it to alter the behaviour of the superclass + """ + return args + + def diff(self, other=Index, paths=None, create_patch=False, **kwargs): + """ + Creates diffs between two items being trees, trees and index or an + index and the working tree. - ``other`` - Is the item to compare us with. - If None, we will be compared to the working tree. - If Treeish, it will be compared against the respective tree - If Index ( type ), it will be compared against the index. - It defaults to Index to assure the method will not by-default fail - on bare repositories. + ``other`` + Is the item to compare us with. + If None, we will be compared to the working tree. + If Treeish, it will be compared against the respective tree + If Index ( type ), it will be compared against the index. + It defaults to Index to assure the method will not by-default fail + on bare repositories. - ``paths`` - is a list of paths or a single path to limit the diff to. - It will only include at least one of the givne path or paths. + ``paths`` + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. - ``create_patch`` - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somwhat costly as blobs have to be read - and diffed. + ``create_patch`` + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. - ``kwargs`` - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. + ``kwargs`` + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. - Returns - git.DiffIndex - - Note - Rename detection will only work if create_patch is True. - - On a bare repository, 'other' needs to be provided as Index or as - as Tree/Commit, or a git command error will occour - """ - args = list() - args.append( "--abbrev=40" ) # we need full shas - args.append( "--full-index" ) # get full index paths, not only filenames - - if create_patch: - args.append("-p") - args.append("-M") # check for renames - else: - args.append("--raw") - - if paths is not None and not isinstance(paths, (tuple,list)): - paths = [ paths ] + Returns + git.DiffIndex + + Note + Rename detection will only work if create_patch is True. + + On a bare repository, 'other' needs to be provided as Index or as + as Tree/Commit, or a git command error will occour + """ + args = list() + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + if paths is not None and not isinstance(paths, (tuple,list)): + paths = [ paths ] - if other is not None and other is not self.Index: - args.insert(0, other) - if other is self.Index: - args.insert(0, "--cached") - - args.insert(0,self) - - # paths is list here or None - if paths: - args.append("--") - args.extend(paths) - # END paths handling - - kwargs['as_process'] = True - proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) - - diff_method = Diff._index_from_raw_format - if create_patch: - diff_method = Diff._index_from_patch_format - index = diff_method(self.repo, proc.stdout) - - status = proc.wait() - return index + if other is not None and other is not self.Index: + args.insert(0, other) + if other is self.Index: + args.insert(0, "--cached") + + args.insert(0,self) + + # paths is list here or None + if paths: + args.append("--") + args.extend(paths) + # END paths handling + + kwargs['as_process'] = True + proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) + + diff_method = Diff._index_from_raw_format + if create_patch: + diff_method = Diff._index_from_patch_format + index = diff_method(self.repo, proc.stdout) + + status = proc.wait() + return index class DiffIndex(list): - """ - Implements an Index for diffs, allowing a list of Diffs to be queried by - the diff properties. - - The class improves the diff handling convenience - """ - # change type invariant identifying possible ways a blob can have changed - # A = Added - # D = Deleted - # R = Renamed - # M = modified - change_type = ("A", "D", "R", "M") - - - def iter_change_type(self, change_type): - """ - Return - iterator yieling Diff instances that match the given change_type - - ``change_type`` - Member of DiffIndex.change_type, namely - - 'A' for added paths - - 'D' for deleted paths - - 'R' for renamed paths - - 'M' for paths with modified data - """ - if change_type not in self.change_type: - raise ValueError( "Invalid change type: %s" % change_type ) - - for diff in self: - if change_type == "A" and diff.new_file: - yield diff - elif change_type == "D" and diff.deleted_file: - yield diff - elif change_type == "R" and diff.renamed: - yield diff - elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: - yield diff - # END for each diff - + """ + Implements an Index for diffs, allowing a list of Diffs to be queried by + the diff properties. + + The class improves the diff handling convenience + """ + # change type invariant identifying possible ways a blob can have changed + # A = Added + # D = Deleted + # R = Renamed + # M = modified + change_type = ("A", "D", "R", "M") + + + def iter_change_type(self, change_type): + """ + Return + iterator yieling Diff instances that match the given change_type + + ``change_type`` + Member of DiffIndex.change_type, namely + + 'A' for added paths + + 'D' for deleted paths + + 'R' for renamed paths + + 'M' for paths with modified data + """ + if change_type not in self.change_type: + raise ValueError( "Invalid change type: %s" % change_type ) + + for diff in self: + if change_type == "A" and diff.new_file: + yield diff + elif change_type == "D" and diff.deleted_file: + yield diff + elif change_type == "R" and diff.renamed: + yield diff + elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: + yield diff + # END for each diff + class Diff(object): - """ - A Diff contains diff information between two Trees. - - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. - - Diffs keep information about the changed blob objects, the file mode, renames, - deletions and new files. - - There are a few cases where None has to be expected as member variable value: - - ``New File``:: - - a_mode is None - a_blob is None - - ``Deleted File``:: - - b_mode is None - b_blob is None - - ``Working Tree Blobs`` - - When comparing to working trees, the working tree blob will have a null hexsha - as a corresponding object does not yet exist. The mode will be null as well. - But the path will be available though. - If it is listed in a diff the working tree version of the file must - be different to the version in the index or tree, and hence has been modified. - """ - - # precompiled regex - re_header = re.compile(r""" - #^diff[ ]--git - [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n - (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n - ^rename[ ]from[ ](?P<rename_from>\S+)\n - ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? - (?:^old[ ]mode[ ](?P<old_mode>\d+)\n - ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? - (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) - \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE) - # can be used for comparisons - null_hex_sha = "0"*40 - - __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", - "rename_from", "rename_to", "diff") + """ + A Diff contains diff information between two Trees. + + It contains two sides a and b of the diff, members are prefixed with + "a" and "b" respectively to inidcate that. + + Diffs keep information about the changed blob objects, the file mode, renames, + deletions and new files. + + There are a few cases where None has to be expected as member variable value: + + ``New File``:: + + a_mode is None + a_blob is None + + ``Deleted File``:: + + b_mode is None + b_blob is None + + ``Working Tree Blobs`` + + When comparing to working trees, the working tree blob will have a null hexsha + as a corresponding object does not yet exist. The mode will be null as well. + But the path will be available though. + If it is listed in a diff the working tree version of the file must + be different to the version in the index or tree, and hence has been modified. + """ + + # precompiled regex + re_header = re.compile(r""" + #^diff[ ]--git + [ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n + (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n + ^rename[ ]from[ ](?P<rename_from>\S+)\n + ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? + (?:^old[ ]mode[ ](?P<old_mode>\d+)\n + ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? + (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? + (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? + (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) + \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) + # can be used for comparisons + null_hex_sha = "0"*40 + + __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", + "rename_from", "rename_to", "diff") - def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): - if a_blob_id is None: - self.a_blob = None - else: - self.a_blob = blob.Blob(repo, a_blob_id, mode=a_mode, path=a_path) - if b_blob_id is None: - self.b_blob = None - else: - self.b_blob = blob.Blob(repo, b_blob_id, mode=b_mode, path=b_path) + def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, + b_mode, new_file, deleted_file, rename_from, + rename_to, diff): + if a_blob_id is None: + self.a_blob = None + else: + self.a_blob = blob.Blob(repo, a_blob_id, mode=a_mode, path=a_path) + if b_blob_id is None: + self.b_blob = None + else: + self.b_blob = blob.Blob(repo, b_blob_id, mode=b_mode, path=b_path) - self.a_mode = a_mode - self.b_mode = b_mode - - if self.a_mode: - self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) - if self.b_mode: - self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) - - self.new_file = new_file - self.deleted_file = deleted_file - - # be clear and use None instead of empty strings - self.rename_from = rename_from or None - self.rename_to = rename_to or None - - self.diff = diff + self.a_mode = a_mode + self.b_mode = b_mode + + if self.a_mode: + self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) + if self.b_mode: + self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) + + self.new_file = new_file + self.deleted_file = deleted_file + + # be clear and use None instead of empty strings + self.rename_from = rename_from or None + self.rename_to = rename_to or None + + self.diff = diff - def __eq__(self, other): - for name in self.__slots__: - if getattr(self, name) != getattr(other, name): - return False - # END for each name - return True - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(tuple(getattr(self,n) for n in self.__slots__)) + def __eq__(self, other): + for name in self.__slots__: + if getattr(self, name) != getattr(other, name): + return False + # END for each name + return True + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(tuple(getattr(self,n) for n in self.__slots__)) - def __str__(self): - h = "%s" - if self.a_blob: - h %= self.a_blob.path - elif self.b_blob: - h %= self.b_blob.path - - msg = '' - l = None # temp line - ll = 0 # line length - for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): - if b: - l = "\n%s: %o | %s" % (n, b.mode, b.sha) - else: - l = "\n%s: None" % n - # END if blob is not None - ll = max(len(l), ll) - msg += l - # END for each blob - - # add headline - h += '\n' + '='*ll - - if self.deleted_file: - msg += '\nfile deleted in rhs' - if self.new_file: - msg += '\nfile added in rhs' - if self.rename_from: - msg += '\nfile renamed from %r' % self.rename_from - if self.rename_to: - msg += '\nfile renamed to %r' % self.rename_to - if self.diff: - msg += '\n---' - msg += self.diff - msg += '\n---' - # END diff info - - return h + msg + def __str__(self): + h = "%s" + if self.a_blob: + h %= self.a_blob.path + elif self.b_blob: + h %= self.b_blob.path + + msg = '' + l = None # temp line + ll = 0 # line length + for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): + if b: + l = "\n%s: %o | %s" % (n, b.mode, b.sha) + else: + l = "\n%s: None" % n + # END if blob is not None + ll = max(len(l), ll) + msg += l + # END for each blob + + # add headline + h += '\n' + '='*ll + + if self.deleted_file: + msg += '\nfile deleted in rhs' + if self.new_file: + msg += '\nfile added in rhs' + if self.rename_from: + msg += '\nfile renamed from %r' % self.rename_from + if self.rename_to: + msg += '\nfile renamed to %r' % self.rename_to + if self.diff: + msg += '\n---' + msg += self.diff + msg += '\n---' + # END diff info + + return h + msg - @property - def renamed(self): - """ - Returns: - True if the blob of our diff has been renamed - """ - return self.rename_from != self.rename_to + @property + def renamed(self): + """ + Returns: + True if the blob of our diff has been renamed + """ + return self.rename_from != self.rename_to - @classmethod - def _index_from_patch_format(cls, repo, stream): - """ - Create a new DiffIndex from the given text which must be in patch format - ``repo`` - is the repository we are operating on - it is required - - ``stream`` - result of 'git diff' as a stream (supporting file protocol) - - Returns - git.DiffIndex - """ - # for now, we have to bake the stream - text = stream.read() - index = DiffIndex() + @classmethod + def _index_from_patch_format(cls, repo, stream): + """ + Create a new DiffIndex from the given text which must be in patch format + ``repo`` + is the repository we are operating on - it is required + + ``stream`` + result of 'git diff' as a stream (supporting file protocol) + + Returns + git.DiffIndex + """ + # for now, we have to bake the stream + text = stream.read() + index = DiffIndex() - diff_header = cls.re_header.match - for diff in ('\n' + text).split('\ndiff --git')[1:]: - header = diff_header(diff) + diff_header = cls.re_header.match + for diff in ('\n' + text).split('\ndiff --git')[1:]: + header = diff_header(diff) - a_path, b_path, similarity_index, rename_from, rename_to, \ - old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob_id, b_blob_id, b_mode = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) + a_path, b_path, similarity_index, rename_from, rename_to, \ + old_mode, new_mode, new_file_mode, deleted_file_mode, \ + a_blob_id, b_blob_id, b_mode = header.groups() + new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, - old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, - new_file, deleted_file, rename_from, rename_to, diff[header.end():])) + index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, + new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - return index - - @classmethod - def _index_from_raw_format(cls, repo, stream): - """ - Create a new DiffIndex from the given stream which must be in raw format. - - NOTE: - This format is inherently incapable of detecting renames, hence we only - modify, delete and add files - - Returns - git.DiffIndex - """ - # handles - # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore - index = DiffIndex() - for line in stream: - if not line.startswith(":"): - continue - # END its not a valid diff line - old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split() - a_path = path - b_path = path - deleted_file = False - new_file = False - - # NOTE: We cannot conclude from the existance of a blob to change type - # as diffs with the working do not have blobs yet - if change_type == 'D': - b_blob_id = None - deleted_file = True - elif change_type == 'A': - a_blob_id = None - new_file = True - # END add/remove handling - - diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, - new_file, deleted_file, None, None, '') - index.append(diff) - # END for each line - - return index + return index + + @classmethod + def _index_from_raw_format(cls, repo, stream): + """ + Create a new DiffIndex from the given stream which must be in raw format. + + NOTE: + This format is inherently incapable of detecting renames, hence we only + modify, delete and add files + + Returns + git.DiffIndex + """ + # handles + # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore + index = DiffIndex() + for line in stream: + if not line.startswith(":"): + continue + # END its not a valid diff line + old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split() + a_path = path + b_path = path + deleted_file = False + new_file = False + + # NOTE: We cannot conclude from the existance of a blob to change type + # as diffs with the working do not have blobs yet + if change_type == 'D': + b_blob_id = None + deleted_file = True + elif change_type == 'A': + a_blob_id = None + new_file = True + # END add/remove handling + + diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, + new_file, deleted_file, None, None, '') + index.append(diff) + # END for each line + + return index diff --git a/lib/git/errors.py b/lib/git/errors.py index cde2798a..f66fb528 100644 --- a/lib/git/errors.py +++ b/lib/git/errors.py @@ -8,25 +8,25 @@ Module containing all exceptions thrown througout the git package, """ class InvalidGitRepositoryError(Exception): - """ - Thrown if the given repository appears to have an invalid format. - """ + """ + Thrown if the given repository appears to have an invalid format. + """ class NoSuchPathError(OSError): - """ - Thrown if a path could not be access by the system. - """ + """ + Thrown if a path could not be access by the system. + """ class GitCommandError(Exception): - """ - Thrown if execution of the git command fails with non-zero status code. - """ - def __init__(self, command, status, stderr=None): - self.stderr = stderr - self.status = status - self.command = command - - def __str__(self): - return ("'%s' returned exit status %i: %s" % - (' '.join(str(i) for i in self.command), self.status, self.stderr)) + """ + Thrown if execution of the git command fails with non-zero status code. + """ + def __init__(self, command, status, stderr=None): + self.stderr = stderr + self.status = status + self.command = command + + def __str__(self): + return ("'%s' returned exit status %i: %s" % + (' '.join(str(i) for i in self.command), self.status, self.stderr)) diff --git a/lib/git/index.py b/lib/git/index.py index 17fe9437..6f615024 100644 --- a/lib/git/index.py +++ b/lib/git/index.py @@ -25,1348 +25,1348 @@ from git.utils import SHA1Writer, LazyMixin, ConcurrentWriteOperation, join_path class CheckoutError( Exception ): - """Thrown if a file could not be checked out from the index as it contained - changes. - - The .failed_files attribute contains a list of relative paths that failed - to be checked out as they contained changes that did not exist in the index. - - The .failed_reasons attribute contains a string informing about the actual - cause of the issue. - - The .valid_files attribute contains a list of relative paths to files that - were checked out successfully and hence match the version stored in the - index""" - def __init__(self, message, failed_files, valid_files, failed_reasons): - Exception.__init__(self, message) - self.failed_files = failed_files - self.failed_reasons = failed_reasons - self.valid_files = valid_files + """Thrown if a file could not be checked out from the index as it contained + changes. + + The .failed_files attribute contains a list of relative paths that failed + to be checked out as they contained changes that did not exist in the index. + + The .failed_reasons attribute contains a string informing about the actual + cause of the issue. + + The .valid_files attribute contains a list of relative paths to files that + were checked out successfully and hence match the version stored in the + index""" + def __init__(self, message, failed_files, valid_files, failed_reasons): + Exception.__init__(self, message) + self.failed_files = failed_files + self.failed_reasons = failed_reasons + self.valid_files = valid_files - def __str__(self): - return Exception.__str__(self) + ":%s" % self.failed_files - + def __str__(self): + return Exception.__str__(self) + ":%s" % self.failed_files + class _TemporaryFileSwap(object): - """ - Utility class moving a file to a temporary location within the same directory - and moving it back on to where on object deletion. - """ - __slots__ = ("file_path", "tmp_file_path") - - def __init__(self, file_path): - self.file_path = file_path - self.tmp_file_path = self.file_path + tempfile.mktemp('','','') - # it may be that the source does not exist - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass - - def __del__(self): - if os.path.isfile(self.tmp_file_path): - if os.name == 'nt' and os.path.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - # END temp file exists + """ + Utility class moving a file to a temporary location within the same directory + and moving it back on to where on object deletion. + """ + __slots__ = ("file_path", "tmp_file_path") + + def __init__(self, file_path): + self.file_path = file_path + self.tmp_file_path = self.file_path + tempfile.mktemp('','','') + # it may be that the source does not exist + try: + os.rename(self.file_path, self.tmp_file_path) + except OSError: + pass + + def __del__(self): + if os.path.isfile(self.tmp_file_path): + if os.name == 'nt' and os.path.exists(self.file_path): + os.remove(self.file_path) + os.rename(self.tmp_file_path, self.file_path) + # END temp file exists - + class BlobFilter(object): - """ - Predicate to be used by iter_blobs allowing to filter only return blobs which - match the given list of directories or files. - - The given paths are given relative to the repository. - """ - __slots__ = 'paths' - - def __init__(self, paths): - """ - ``paths`` - tuple or list of paths which are either pointing to directories or - to files relative to the current repository - """ - self.paths = paths - - def __call__(self, stage_blob): - path = stage_blob[1].path - for p in self.paths: - if path.startswith(p): - return True - # END for each path in filter paths - return False + """ + Predicate to be used by iter_blobs allowing to filter only return blobs which + match the given list of directories or files. + + The given paths are given relative to the repository. + """ + __slots__ = 'paths' + + def __init__(self, paths): + """ + ``paths`` + tuple or list of paths which are either pointing to directories or + to files relative to the current repository + """ + self.paths = paths + + def __call__(self, stage_blob): + path = stage_blob[1].path + for p in self.paths: + if path.startswith(p): + return True + # END for each path in filter paths + return False class BaseIndexEntry(tuple): - """ - Small Brother of an index entry which can be created to describe changes - done to the index in which case plenty of additional information is not requried. - - As the first 4 data members match exactly to the IndexEntry type, methods - expecting a BaseIndexEntry can also handle full IndexEntries even if they - use numeric indices for performance reasons. - """ - - def __str__(self): - return "%o %s %i\t%s\n" % (self.mode, self.sha, self.stage, self.path) - - @property - def mode(self): - """ - File Mode, compatible to stat module constants - """ - return self[0] - - @property - def sha(self): - """ - hex sha of the blob - """ - return self[1] - - @property - def stage(self): - """ - Stage of the entry, either: - 0 = default stage - 1 = stage before a merge or common ancestor entry in case of a 3 way merge - 2 = stage of entries from the 'left' side of the merge - 3 = stage of entries from the right side of the merge - Note: - For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html - """ - return self[2] + """ + Small Brother of an index entry which can be created to describe changes + done to the index in which case plenty of additional information is not requried. + + As the first 4 data members match exactly to the IndexEntry type, methods + expecting a BaseIndexEntry can also handle full IndexEntries even if they + use numeric indices for performance reasons. + """ + + def __str__(self): + return "%o %s %i\t%s\n" % (self.mode, self.sha, self.stage, self.path) + + @property + def mode(self): + """ + File Mode, compatible to stat module constants + """ + return self[0] + + @property + def sha(self): + """ + hex sha of the blob + """ + return self[1] + + @property + def stage(self): + """ + Stage of the entry, either: + 0 = default stage + 1 = stage before a merge or common ancestor entry in case of a 3 way merge + 2 = stage of entries from the 'left' side of the merge + 3 = stage of entries from the right side of the merge + Note: + For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html + """ + return self[2] - @property - def path(self): - return self[3] - - @classmethod - def from_blob(cls, blob, stage = 0): - """ - Returns - Fully equipped BaseIndexEntry at the given stage - """ - return cls((blob.mode, blob.sha, stage, blob.path)) - + @property + def path(self): + return self[3] + + @classmethod + def from_blob(cls, blob, stage = 0): + """ + Returns + Fully equipped BaseIndexEntry at the given stage + """ + return cls((blob.mode, blob.sha, stage, blob.path)) + class IndexEntry(BaseIndexEntry): - """ - Allows convenient access to IndexEntry data without completely unpacking it. - - Attributes usully accessed often are cached in the tuple whereas others are - unpacked on demand. - - See the properties for a mapping between names and tuple indices. - """ - @property - def ctime(self): - """ - Returns - Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the - file's creation time - """ - return struct.unpack(">LL", self[4]) - - @property - def mtime(self): - """ - See ctime property, but returns modification time - """ - return struct.unpack(">LL", self[5]) - - @property - def dev(self): - """ - Device ID - """ - return self[6] - - @property - def inode(self): - """ - Inode ID - """ - return self[7] - - @property - def uid(self): - """ - User ID - """ - return self[8] - - @property - def gid(self): - """ - Group ID - """ - return self[9] + """ + Allows convenient access to IndexEntry data without completely unpacking it. + + Attributes usully accessed often are cached in the tuple whereas others are + unpacked on demand. + + See the properties for a mapping between names and tuple indices. + """ + @property + def ctime(self): + """ + Returns + Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the + file's creation time + """ + return struct.unpack(">LL", self[4]) + + @property + def mtime(self): + """ + See ctime property, but returns modification time + """ + return struct.unpack(">LL", self[5]) + + @property + def dev(self): + """ + Device ID + """ + return self[6] + + @property + def inode(self): + """ + Inode ID + """ + return self[7] + + @property + def uid(self): + """ + User ID + """ + return self[8] + + @property + def gid(self): + """ + Group ID + """ + return self[9] - @property - def size(self): - """ - Uncompressed size of the blob - - Note - Will be 0 if the stage is not 0 ( hence it is an unmerged entry ) - """ - return self[10] - - @classmethod - def from_base(cls, base): - """ - Returns - Minimal entry as created from the given BaseIndexEntry instance. - Missing values will be set to null-like values - - ``base`` - Instance of type BaseIndexEntry - """ - time = struct.pack(">LL", 0, 0) - return IndexEntry((base.mode, base.sha, base.stage, base.path, time, time, 0, 0, 0, 0, 0)) - - @classmethod - def from_blob(cls, blob): - """ - Returns - Minimal entry resembling the given blob objecft - """ - time = struct.pack(">LL", 0, 0) - return IndexEntry((blob.mode, blob.sha, 0, blob.path, time, time, 0, 0, 0, 0, blob.size)) + @property + def size(self): + """ + Uncompressed size of the blob + + Note + Will be 0 if the stage is not 0 ( hence it is an unmerged entry ) + """ + return self[10] + + @classmethod + def from_base(cls, base): + """ + Returns + Minimal entry as created from the given BaseIndexEntry instance. + Missing values will be set to null-like values + + ``base`` + Instance of type BaseIndexEntry + """ + time = struct.pack(">LL", 0, 0) + return IndexEntry((base.mode, base.sha, base.stage, base.path, time, time, 0, 0, 0, 0, 0)) + + @classmethod + def from_blob(cls, blob): + """ + Returns + Minimal entry resembling the given blob objecft + """ + time = struct.pack(">LL", 0, 0) + return IndexEntry((blob.mode, blob.sha, 0, blob.path, time, time, 0, 0, 0, 0, blob.size)) def clear_cache(func): - """ - Decorator for functions that alter the index using the git command. This would - invalidate our possibly existing entries dictionary which is why it must be - deleted to allow it to be lazily reread later. - - Note - This decorator will not be required once all functions are implemented - natively which in fact is possible, but probably not feasible performance wise. - """ - def clear_cache_if_not_raised(self, *args, **kwargs): - rval = func(self, *args, **kwargs) - del(self.entries) - return rval - - # END wrapper method - clear_cache_if_not_raised.__name__ = func.__name__ - return clear_cache_if_not_raised - + """ + Decorator for functions that alter the index using the git command. This would + invalidate our possibly existing entries dictionary which is why it must be + deleted to allow it to be lazily reread later. + + Note + This decorator will not be required once all functions are implemented + natively which in fact is possible, but probably not feasible performance wise. + """ + def clear_cache_if_not_raised(self, *args, **kwargs): + rval = func(self, *args, **kwargs) + del(self.entries) + return rval + + # END wrapper method + clear_cache_if_not_raised.__name__ = func.__name__ + return clear_cache_if_not_raised + def default_index(func): - """ - Decorator assuring the wrapped method may only run if we are the default - repository index. This is as we rely on git commands that operate - on that index only. - """ - def check_default_index(self, *args, **kwargs): - if self._file_path != self._index_path(): - raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) - return func(self, *args, **kwargs) - # END wrpaper method - - check_default_index.__name__ = func.__name__ - return check_default_index + """ + Decorator assuring the wrapped method may only run if we are the default + repository index. This is as we rely on git commands that operate + on that index only. + """ + def check_default_index(self, *args, **kwargs): + if self._file_path != self._index_path(): + raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) + return func(self, *args, **kwargs) + # END wrpaper method + + check_default_index.__name__ = func.__name__ + return check_default_index class IndexFile(LazyMixin, diff.Diffable): - """ - Implements an Index that can be manipulated using a native implementation in - order to save git command function calls wherever possible. - - It provides custom merging facilities allowing to merge without actually changing - your index or your working tree. This way you can perform own test-merges based - on the index only without having to deal with the working copy. This is useful - in case of partial working trees. - - ``Entries`` - The index contains an entries dict whose keys are tuples of type IndexEntry - to facilitate access. - - You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: - index.entries[index.get_entries_key(index_entry_instance)] = index_entry_instance - Otherwise changes to it will be lost when changing the index using its methods. - """ - __slots__ = ( "repo", "version", "entries", "_extension_data", "_file_path" ) - _VERSION = 2 # latest version we support - S_IFGITLINK = 0160000 - - def __init__(self, repo, file_path=None): - """ - Initialize this Index instance, optionally from the given ``file_path``. - If no file_path is given, we will be created from the current index file. - - If a stream is not given, the stream will be initialized from the current - repository's index on demand. - """ - self.repo = repo - self.version = self._VERSION - self._extension_data = '' - self._file_path = file_path or self._index_path() - - def _set_cache_(self, attr): - if attr == "entries": - # read the current index - # try memory map for speed - fp = open(self._file_path, "rb") - stream = fp - try: - raise Exception() - stream = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) - except Exception: - pass - # END memory mapping - - try: - self._read_from_stream(stream) - finally: - pass - # make sure we close the stream ( possibly an mmap ) - # and the file - #stream.close() - #if stream is not fp: - # fp.close() - # END read from default index on demand - else: - super(IndexFile, self)._set_cache_(attr) - - def _index_path(self): - return join_path_native(self.repo.git_dir, "index") - - - @property - def path(self): - """ - Returns - Path to the index file we are representing - """ - return self._file_path - - @classmethod - def _read_entry(cls, stream): - """Return: One entry of the given stream""" - beginoffset = stream.tell() - ctime = struct.unpack(">8s", stream.read(8))[0] - mtime = struct.unpack(">8s", stream.read(8))[0] - (dev, ino, mode, uid, gid, size, sha, flags) = \ - struct.unpack(">LLLLLL20sH", stream.read(20 + 4 * 6 + 2)) - path_size = flags & 0x0fff - path = stream.read(path_size) - - real_size = ((stream.tell() - beginoffset + 8) & ~7) - data = stream.read((beginoffset + real_size) - stream.tell()) - return IndexEntry((mode, binascii.hexlify(sha), flags >> 12, path, ctime, mtime, dev, ino, uid, gid, size)) - - @classmethod - def _read_header(cls, stream): - """Return tuple(version_long, num_entries) from the given stream""" - type_id = stream.read(4) - if type_id != "DIRC": - raise AssertionError("Invalid index file header: %r" % type_id) - version, num_entries = struct.unpack(">LL", stream.read(4 * 2)) - assert version in (1, 2) - return version, num_entries - - def _read_from_stream(self, stream): - """ - Initialize this instance with index values read from the given stream - """ - self.version, num_entries = self._read_header(stream) - count = 0 - self.entries = dict() - while count < num_entries: - entry = self._read_entry(stream) - self.entries[self.get_entries_key(entry)] = entry - count += 1 - # END for each entry - - # the footer contains extension data and a sha on the content so far - # Keep the extension footer,and verify we have a sha in the end - # Extension data format is: - # 4 bytes ID - # 4 bytes length of chunk - # repeated 0 - N times - self._extension_data = stream.read(~0) - assert len(self._extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(self._extension_data) - - content_sha = self._extension_data[-20:] - - # truncate the sha in the end as we will dynamically create it anyway - self._extension_data = self._extension_data[:-20] - - - @classmethod - def _write_cache_entry(cls, stream, entry): - """ - Write an IndexEntry to a stream - """ - beginoffset = stream.tell() - stream.write(entry[4]) # ctime - stream.write(entry[5]) # mtime - path = entry[3] - plen = len(path) & 0x0fff # path length - assert plen == len(path), "Path %s too long to fit into index" % entry[3] - flags = plen | (entry[2] << 12)# stage and path length are 2 byte flags - stream.write(struct.pack(">LLLLLL20sH", entry[6], entry[7], entry[0], - entry[8], entry[9], entry[10], binascii.unhexlify(entry[1]), flags)) - stream.write(path) - real_size = ((stream.tell() - beginoffset + 8) & ~7) - stream.write("\0" * ((beginoffset + real_size) - stream.tell())) + """ + Implements an Index that can be manipulated using a native implementation in + order to save git command function calls wherever possible. + + It provides custom merging facilities allowing to merge without actually changing + your index or your working tree. This way you can perform own test-merges based + on the index only without having to deal with the working copy. This is useful + in case of partial working trees. + + ``Entries`` + The index contains an entries dict whose keys are tuples of type IndexEntry + to facilitate access. + + You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: + index.entries[index.get_entries_key(index_entry_instance)] = index_entry_instance + Otherwise changes to it will be lost when changing the index using its methods. + """ + __slots__ = ( "repo", "version", "entries", "_extension_data", "_file_path" ) + _VERSION = 2 # latest version we support + S_IFGITLINK = 0160000 + + def __init__(self, repo, file_path=None): + """ + Initialize this Index instance, optionally from the given ``file_path``. + If no file_path is given, we will be created from the current index file. + + If a stream is not given, the stream will be initialized from the current + repository's index on demand. + """ + self.repo = repo + self.version = self._VERSION + self._extension_data = '' + self._file_path = file_path or self._index_path() + + def _set_cache_(self, attr): + if attr == "entries": + # read the current index + # try memory map for speed + fp = open(self._file_path, "rb") + stream = fp + try: + raise Exception() + stream = mmap.mmap(fp.fileno(), 0, access=mmap.ACCESS_READ) + except Exception: + pass + # END memory mapping + + try: + self._read_from_stream(stream) + finally: + pass + # make sure we close the stream ( possibly an mmap ) + # and the file + #stream.close() + #if stream is not fp: + # fp.close() + # END read from default index on demand + else: + super(IndexFile, self)._set_cache_(attr) + + def _index_path(self): + return join_path_native(self.repo.git_dir, "index") + + + @property + def path(self): + """ + Returns + Path to the index file we are representing + """ + return self._file_path + + @classmethod + def _read_entry(cls, stream): + """Return: One entry of the given stream""" + beginoffset = stream.tell() + ctime = struct.unpack(">8s", stream.read(8))[0] + mtime = struct.unpack(">8s", stream.read(8))[0] + (dev, ino, mode, uid, gid, size, sha, flags) = \ + struct.unpack(">LLLLLL20sH", stream.read(20 + 4 * 6 + 2)) + path_size = flags & 0x0fff + path = stream.read(path_size) + + real_size = ((stream.tell() - beginoffset + 8) & ~7) + data = stream.read((beginoffset + real_size) - stream.tell()) + return IndexEntry((mode, binascii.hexlify(sha), flags >> 12, path, ctime, mtime, dev, ino, uid, gid, size)) + + @classmethod + def _read_header(cls, stream): + """Return tuple(version_long, num_entries) from the given stream""" + type_id = stream.read(4) + if type_id != "DIRC": + raise AssertionError("Invalid index file header: %r" % type_id) + version, num_entries = struct.unpack(">LL", stream.read(4 * 2)) + assert version in (1, 2) + return version, num_entries + + def _read_from_stream(self, stream): + """ + Initialize this instance with index values read from the given stream + """ + self.version, num_entries = self._read_header(stream) + count = 0 + self.entries = dict() + while count < num_entries: + entry = self._read_entry(stream) + self.entries[self.get_entries_key(entry)] = entry + count += 1 + # END for each entry + + # the footer contains extension data and a sha on the content so far + # Keep the extension footer,and verify we have a sha in the end + # Extension data format is: + # 4 bytes ID + # 4 bytes length of chunk + # repeated 0 - N times + self._extension_data = stream.read(~0) + assert len(self._extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(self._extension_data) + + content_sha = self._extension_data[-20:] + + # truncate the sha in the end as we will dynamically create it anyway + self._extension_data = self._extension_data[:-20] + + + @classmethod + def _write_cache_entry(cls, stream, entry): + """ + Write an IndexEntry to a stream + """ + beginoffset = stream.tell() + stream.write(entry[4]) # ctime + stream.write(entry[5]) # mtime + path = entry[3] + plen = len(path) & 0x0fff # path length + assert plen == len(path), "Path %s too long to fit into index" % entry[3] + flags = plen | (entry[2] << 12)# stage and path length are 2 byte flags + stream.write(struct.pack(">LLLLLL20sH", entry[6], entry[7], entry[0], + entry[8], entry[9], entry[10], binascii.unhexlify(entry[1]), flags)) + stream.write(path) + real_size = ((stream.tell() - beginoffset + 8) & ~7) + stream.write("\0" * ((beginoffset + real_size) - stream.tell())) - def write(self, file_path = None, ignore_tree_extension_data=False): - """ - Write the current state to our file path or to the given one - - ``file_path`` - If None, we will write to our stored file path from which we have - been initialized. Otherwise we write to the given file path. - Please note that this will change the file_path of this index to - the one you gave. - - ``ignore_tree_extension_data`` - If True, the TREE type extension data read in the index will not - be written to disk. Use this if you have altered the index and - would like to use git-write-tree afterwards to create a tree - representing your written changes. - If this data is present in the written index, git-write-tree - will instead write the stored/cached tree. - Alternatively, use IndexFile.write_tree() to handle this case - automatically - - Returns - self - - Note - Index writing based on the dulwich implementation - """ - write_op = ConcurrentWriteOperation(file_path or self._file_path) - stream = write_op._begin_writing() - - stream = SHA1Writer(stream) - - # header - stream.write("DIRC") - stream.write(struct.pack(">LL", self.version, len(self.entries))) - - # body - entries_sorted = self.entries.values() - entries_sorted.sort(key=lambda e: (e[3], e[2])) # use path/stage as sort key - for entry in entries_sorted: - self._write_cache_entry(stream, entry) - # END for each entry - - stored_ext_data = None - if ignore_tree_extension_data and self._extension_data and self._extension_data[:4] == 'TREE': - stored_ext_data = self._extension_data - self._extension_data = '' - # END extension data special handling - - # write previously cached extensions data - stream.write(self._extension_data) - - if stored_ext_data: - self._extension_data = stored_ext_data - # END reset previous ext data - - # write the sha over the content - stream.write_sha() - write_op._end_writing() - - # make sure we represent what we have written - if file_path is not None: - self._file_path = file_path - - @clear_cache - @default_index - def merge_tree(self, rhs, base=None): - """Merge the given rhs treeish into the current index, possibly taking - a common base treeish into account. - - As opposed to the from_tree_ method, this allows you to use an already - existing tree as the left side of the merge - - ``rhs`` - treeish reference pointing to the 'other' side of the merge. - - ``base`` - optional treeish reference pointing to the common base of 'rhs' and - this index which equals lhs - - Returns - self ( containing the merge and possibly unmerged entries in case of - conflicts ) - - Raise - GitCommandError in case there is a merge conflict. The error will - be raised at the first conflicting path. If you want to have proper - merge resolution to be done by yourself, you have to commit the changed - index ( or make a valid tree from it ) and retry with a three-way - index.from_tree call. - """ - # -i : ignore working tree status - # --aggressive : handle more merge cases - # -m : do an actual merge - args = ["--aggressive", "-i", "-m"] - if base is not None: - args.append(base) - args.append(rhs) - - self.repo.git.read_tree(args) - return self - - @classmethod - def from_tree(cls, repo, *treeish, **kwargs): - """ - Merge the given treeish revisions into a new index which is returned. - The original index will remain unaltered - - ``repo`` - The repository treeish are located in. - - ``*treeish`` - One, two or three Tree Objects or Commits. The result changes according to the - amount of trees. - If 1 Tree is given, it will just be read into a new index - If 2 Trees are given, they will be merged into a new index using a - two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' - one. It behaves like a fast-forward. - If 3 Trees are given, a 3-way merge will be performed with the first tree - being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, - tree 3 is the 'other' one - - ``**kwargs`` - Additional arguments passed to git-read-tree - - Returns - New IndexFile instance. It will point to a temporary index location which - does not exist anymore. If you intend to write such a merged Index, supply - an alternate file_path to its 'write' method. - - Note: - In the three-way merge case, --aggressive will be specified to automatically - resolve more cases in a commonly correct manner. Specify trivial=True as kwarg - to override that. - - As the underlying git-read-tree command takes into account the current index, - it will be temporarily moved out of the way to assure there are no unsuspected - interferences. - """ - if len(treeish) == 0 or len(treeish) > 3: - raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) - - arg_list = list() - # ignore that working tree and index possibly are out of date - if len(treeish)>1: - # drop unmerged entries when reading our index and merging - arg_list.append("--reset") - # handle non-trivial cases the way a real merge does - arg_list.append("--aggressive") - # END merge handling - - # tmp file created in git home directory to be sure renaming - # works - /tmp/ dirs could be on another device - tmp_index = tempfile.mktemp('','',repo.git_dir) - arg_list.append("--index-output=%s" % tmp_index) - arg_list.extend(treeish) - - # move current index out of the way - otherwise the merge may fail - # as it considers existing entries. moving it essentially clears the index. - # Unfortunately there is no 'soft' way to do it. - # The _TemporaryFileSwap assure the original file get put back - index_handler = _TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) - try: - repo.git.read_tree(*arg_list, **kwargs) - index = cls(repo, tmp_index) - index.entries # force it to read the file as we will delete the temp-file - del(index_handler) # release as soon as possible - finally: - if os.path.exists(tmp_index): - os.remove(tmp_index) - # END index merge handling - - return index - - @classmethod - def _index_mode_to_tree_index_mode(cls, index_mode): - """ - Cleanup a index_mode value. - This will return a index_mode that can be stored in a tree object. - - ``index_mode`` - Index_mode to clean up. - """ - if stat.S_ISLNK(index_mode): - return stat.S_IFLNK - elif stat.S_ISDIR(index_mode): - return stat.S_IFDIR - elif stat.S_IFMT(index_mode) == cls.S_IFGITLINK: - return cls.S_IFGITLINK - ret = stat.S_IFREG | 0644 - ret |= (index_mode & 0111) - return ret - - - # UTILITIES - def _iter_expand_paths(self, paths): - """Expand the directories in list of paths to the corresponding paths accordingly, - - Note: git will add items multiple times even if a glob overlapped - with manually specified paths or if paths where specified multiple - times - we respect that and do not prune""" - def raise_exc(e): - raise e - r = self.repo.working_tree_dir - rs = r + '/' - for path in paths: - abs_path = path - if not os.path.isabs(abs_path): - abs_path = os.path.join(r, path) - # END make absolute path - - # resolve globs if possible - if '?' in path or '*' in path or '[' in path: - for f in self._iter_expand_paths(glob.glob(abs_path)): - yield f.replace(rs, '') - continue - # END glob handling - try: - for root, dirs, files in os.walk(abs_path, onerror=raise_exc): - for rela_file in files: - # add relative paths only - yield os.path.join(root.replace(rs, ''), rela_file) - # END for each file in subdir - # END for each subdirectory - except OSError: - # was a file or something that could not be iterated - yield path.replace(rs, '') - # END path exception handling - # END for each path - - def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, read_from_stdout=True): - """Write path to proc.stdin and make sure it processes the item, including progress. - @return: stdout string - @param read_from_stdout: if True, proc.stdout will be read after the item - was sent to stdin. In that case, it will return None - @note: There is a bug in git-update-index that prevents it from sending - reports just in time. This is why we have a version that tries to - read stdout and one which doesn't. In fact, the stdout is not - important as the piped-in files are processed anyway and just in time""" - fprogress(filepath, False, item) - rval = None - try: - proc.stdin.write("%s\n" % filepath) - except IOError: - # pipe broke, usually because some error happend - raise fmakeexc() - # END write exception handling - proc.stdin.flush() - if read_from_stdout: - rval = proc.stdout.readline().strip() - fprogress(filepath, True, item) - return rval - - def iter_blobs(self, predicate = lambda t: True): - """ - Returns - Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) - - ``predicate`` - Function(t) returning True if tuple(stage, Blob) should be yielded by the - iterator. A default filter, the BlobFilter, allows you to yield blobs - only if they match a given list of paths. - """ - for entry in self.entries.itervalues(): - mode = self._index_mode_to_tree_index_mode(entry.mode) - blob = Blob(self.repo, entry.sha, mode, entry.path) - blob.size = entry.size - output = (entry.stage, blob) - if predicate(output): - yield output - # END for each entry - - def unmerged_blobs(self): - """ - Returns - Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being - a dictionary associating a path in the index with a list containing - sorted stage/blob pairs - - Note: - Blobs that have been removed in one side simply do not exist in the - given stage. I.e. a file removed on the 'other' branch whose entries - are at stage 3 will not have a stage 3 entry. - """ - is_unmerged_blob = lambda t: t[0] != 0 - path_map = dict() - for stage, blob in self.iter_blobs(is_unmerged_blob): - path_map.setdefault(blob.path, list()).append((stage, blob)) - # END for each unmerged blob - for l in path_map.itervalues(): - l.sort() - return path_map - - @classmethod - def get_entries_key(cls, *entry): - """ - Returns - Key suitable to be used for the index.entries dictionary - - ``entry`` - One instance of type BaseIndexEntry or the path and the stage - """ - if len(entry) == 1: - return (entry[0].path, entry[0].stage) - else: - return tuple(entry) - - - def resolve_blobs(self, iter_blobs): - """ - Resolve the blobs given in blob iterator. This will effectively remove the - index entries of the respective path at all non-null stages and add the given - blob as new stage null blob. - - For each path there may only be one blob, otherwise a ValueError will be raised - claiming the path is already at stage 0. - - Raise - ValueError if one of the blobs already existed at stage 0 - - Returns: - self - - Note - You will have to write the index manually once you are done, i.e. - index.resolve_blobs(blobs).write() - """ - for blob in iter_blobs: - stage_null_key = (blob.path, 0) - if stage_null_key in self.entries: - raise ValueError( "Path %r already exists at stage 0" % blob.path ) - # END assert blob is not stage 0 already - - # delete all possible stages - for stage in (1, 2, 3): - try: - del( self.entries[(blob.path, stage)] ) - except KeyError: - pass - # END ignore key errors - # END for each possible stage - - self.entries[stage_null_key] = IndexEntry.from_blob(blob) - # END for each blob - - return self - - def update(self): - """ - Reread the contents of our index file, discarding all cached information - we might have. - - Note: - This is a possibly dangerious operations as it will discard your changes - to index.entries - - Returns - self - """ - del(self.entries) - # allows to lazily reread on demand - return self - - def write_tree(self, missing_ok=False): - """ - Writes the Index in self to a corresponding Tree file into the repository - object database and returns it as corresponding Tree object. - - ``missing_ok`` - If True, missing objects referenced by this index will not result - in an error. - - Returns - Tree object representing this index - """ - index_path = self._index_path() - tmp_index_mover = _TemporaryFileSwap(index_path) - - self.write(index_path, ignore_tree_extension_data=True) - tree_sha = self.repo.git.write_tree(missing_ok=missing_ok) - - del(tmp_index_mover) # as soon as possible - - return Tree(self.repo, tree_sha, 0, '') - - def _process_diff_args(self, args): - try: - args.pop(args.index(self)) - except IndexError: - pass - # END remove self - return args - - - def _to_relative_path(self, path): - """ - Return - Version of path relative to our git directory or raise ValueError - if it is not within our git direcotory - """ - if not os.path.isabs(path): - return path - relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") - if relative_path == path: - raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) - return relative_path - - def _preprocess_add_items(self, items): - """ - Split the items into two lists of path strings and BaseEntries. - """ - paths = list() - entries = list() - - for item in items: - if isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - elif isinstance(item, Blob): - entries.append(BaseIndexEntry.from_blob(item)) - elif isinstance(item, BaseIndexEntry): - entries.append(item) - else: - raise TypeError("Invalid Type: %r" % item) - # END for each item - return (paths, entries) - - - @clear_cache - @default_index - def add(self, items, force=True, fprogress=lambda *args: None): - """ - Add files from the working tree, specific blobs or BaseIndexEntries - to the index. The underlying index file will be written immediately, hence - you should provide as many items as possible to minimize the amounts of writes - - ``items`` - Multiple types of items are supported, types can be mixed within one call. - Different types imply a different handling. File paths may generally be - relative or absolute. - - - path string - strings denote a relative or absolute path into the repository pointing to - an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. - - Paths provided like this must exist. When added, they will be written - into the object database. - - PathStrings may contain globs, such as 'lib/__init__*' or can be directories - like 'lib', the latter ones will add all the files within the dirctory and - subdirectories. - - This equals a straight git-add. - - They are added at stage 0 - - - Blob object - Blobs are added as they are assuming a valid mode is set. - The file they refer to may or may not exist in the file system, but - must be a path relative to our repository. - - If their sha is null ( 40*0 ), their path must exist in the file system - as an object will be created from the data at the path.The handling - now very much equals the way string paths are processed, except that - the mode you have set will be kept. This allows you to create symlinks - by settings the mode respectively and writing the target of the symlink - directly into the file. This equals a default Linux-Symlink which - is not dereferenced automatically, except that it can be created on - filesystems not supporting it as well. - - Please note that globs or directories are not allowed in Blob objects. - - They are added at stage 0 - - - BaseIndexEntry or type - Handling equals the one of Blob objects, but the stage may be - explicitly set. - - ``force`` - If True, otherwise ignored or excluded files will be - added anyway. - As opposed to the git-add command, we enable this flag by default - as the API user usually wants the item to be added even though - they might be excluded. - - ``fprogress`` - Function with signature f(path, done=False, item=item) called for each - path to be added, once once it is about to be added where done==False - and once after it was added where done=True. - item is set to the actual item we handle, either a Path or a BaseIndexEntry - Please note that the processed path is not guaranteed to be present - in the index already as the index is currently being processed. - - Returns - List(BaseIndexEntries) representing the entries just actually added. - - Raises - GitCommandError if a supplied Path did not exist. Please note that BaseIndexEntry - Objects that do not have a null sha will be added even if their paths - do not exist. - """ - # sort the entries into strings and Entries, Blobs are converted to entries - # automatically - # paths can be git-added, for everything else we use git-update-index - entries_added = list() - paths, entries = self._preprocess_add_items(items) - - - # HANDLE PATHS - if paths: - # to get suitable progress information, pipe paths to stdin - args = ("--add", "--replace", "--verbose", "--stdin") - proc = self.repo.git.update_index(*args, **{'as_process':True, 'istream':subprocess.PIPE}) - make_exc = lambda : GitCommandError(("git-update-index",)+args, 128, proc.stderr.read()) - added_files = list() - - for filepath in self._iter_expand_paths(paths): - self._write_path_to_stdin(proc, filepath, filepath, make_exc, fprogress, read_from_stdout=False) - added_files.append(filepath) - # END for each filepath - self._flush_stdin_and_wait(proc, ignore_stdout=True) # ignore stdout - - # force rereading our entries once it is all done - del(self.entries) - entries_added.extend(self.entries[(f,0)] for f in added_files) - # END path handling - - # HANDLE ENTRIES - if entries: - null_mode_entries = [ e for e in entries if e.mode == 0 ] - if null_mode_entries: - raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") - # END null mode should be remove - - # HANLDE ENTRY OBJECT CREATION - # create objects if required, otherwise go with the existing shas - null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ] - if null_entries_indices: - # creating object ids is the time consuming part. Hence we will - # send progress for these now. - args = ("-w", "--stdin-paths") - proc = self.repo.git.hash_object(*args, **{'istream':subprocess.PIPE, 'as_process':True}) - make_exc = lambda : GitCommandError(("git-hash-object",)+args, 128, proc.stderr.read()) - obj_ids = list() - for ei in null_entries_indices: - entry = entries[ei] - obj_ids.append(self._write_path_to_stdin(proc, entry.path, entry, make_exc, fprogress)) - # END for each entry index - assert len(obj_ids) == len(null_entries_indices), "git-hash-object did not produce all requested objects: want %i, got %i" % ( len(null_entries_indices), len(obj_ids) ) - - # update IndexEntries with new object id - for i,new_sha in zip(null_entries_indices, obj_ids): - e = entries[i] - new_entry = BaseIndexEntry((e.mode, new_sha, e.stage, e.path)) - entries[i] = new_entry - # END for each index - # END null_entry handling - - # feed pure entries to stdin - proc = self.repo.git.update_index(index_info=True, istream=subprocess.PIPE, as_process=True) - for i, entry in enumerate(entries): - progress_sent = i in null_entries_indices - if not progress_sent: - fprogress(entry.path, False, entry) - # it cannot handle too-many newlines in this mode - if i != 0: - proc.stdin.write('\n') - proc.stdin.write(str(entry)) - proc.stdin.flush() - if not progress_sent: - fprogress(entry.path, True, entry) - # END for each enty - self._flush_stdin_and_wait(proc, ignore_stdout=True) - entries_added.extend(entries) - # END if there are base entries - - return entries_added - - def _items_to_rela_paths(self, items): - """Returns a list of repo-relative paths from the given items which - may be absolute or relative paths, entries or blobs""" - paths = list() - for item in items: - if isinstance(item, (BaseIndexEntry,Blob)): - paths.append(self._to_relative_path(item.path)) - elif isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - else: - raise TypeError("Invalid item type: %r" % item) - # END for each item - return paths - - @clear_cache - @default_index - def remove(self, items, working_tree=False, **kwargs): - """ - Remove the given items from the index and optionally from - the working tree as well. - - ``items`` - Multiple types of items are supported which may be be freely mixed. - - - path string - Remove the given path at all stages. If it is a directory, you must - specify the r=True keyword argument to remove all file entries - below it. If absolute paths are given, they will be converted - to a path relative to the git repository directory containing - the working tree - - The path string may include globs, such as *.c. - - - Blob object - Only the path portion is used in this case. - - - BaseIndexEntry or compatible type - The only relevant information here Yis the path. The stage is ignored. - - ``working_tree`` - If True, the entry will also be removed from the working tree, physically - removing the respective file. This may fail if there are uncommited changes - in it. - - ``**kwargs`` - Additional keyword arguments to be passed to git-rm, such - as 'r' to allow recurive removal of - - Returns - List(path_string, ...) list of repository relative paths that have - been removed effectively. - This is interesting to know in case you have provided a directory or - globs. Paths are relative to the repository. - """ - args = list() - if not working_tree: - args.append("--cached") - args.append("--") - - # preprocess paths - paths = self._items_to_rela_paths(items) - removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() - - # process output to gain proper paths - # rm 'path' - return [ p[4:-1] for p in removed_paths ] - - @clear_cache - @default_index - def move(self, items, skip_errors=False, **kwargs): - """ - Rename/move the items, whereas the last item is considered the destination of - the move operation. If the destination is a file, the first item ( of two ) - must be a file as well. If the destination is a directory, it may be preceeded - by one or more directories or files. - - The working tree will be affected in non-bare repositories. - - ``items`` - Multiple types of items are supported, please see the 'remove' method - for reference. - ``skip_errors`` - If True, errors such as ones resulting from missing source files will - be skpped. - ``**kwargs`` - Additional arguments you would like to pass to git-mv, such as dry_run - or force. - - Returns - List(tuple(source_path_string, destination_path_string), ...) - A list of pairs, containing the source file moved as well as its - actual destination. Relative to the repository root. - - Raises - ValueErorr: If only one item was given - GitCommandError: If git could not handle your request - """ - args = list() - if skip_errors: - args.append('-k') - - paths = self._items_to_rela_paths(items) - if len(paths) < 2: - raise ValueError("Please provide at least one source and one destination of the move operation") - - was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) - kwargs['dry_run'] = True - - # first execute rename in dryrun so the command tells us what it actually does - # ( for later output ) - out = list() - mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() - - # parse result - first 0:n/2 lines are 'checking ', the remaining ones - # are the 'renaming' ones which we parse - for ln in xrange(len(mvlines)/2, len(mvlines)): - tokens = mvlines[ln].split(' to ') - assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] - - # [0] = Renaming x - # [1] = y - out.append((tokens[0][9:], tokens[1])) - # END for each line to parse - - # either prepare for the real run, or output the dry-run result - if was_dry_run: - return out - # END handle dryrun - - - # now apply the actual operation - kwargs.pop('dry_run') - self.repo.git.mv(args, paths, **kwargs) - - return out - + def write(self, file_path = None, ignore_tree_extension_data=False): + """ + Write the current state to our file path or to the given one + + ``file_path`` + If None, we will write to our stored file path from which we have + been initialized. Otherwise we write to the given file path. + Please note that this will change the file_path of this index to + the one you gave. + + ``ignore_tree_extension_data`` + If True, the TREE type extension data read in the index will not + be written to disk. Use this if you have altered the index and + would like to use git-write-tree afterwards to create a tree + representing your written changes. + If this data is present in the written index, git-write-tree + will instead write the stored/cached tree. + Alternatively, use IndexFile.write_tree() to handle this case + automatically + + Returns + self + + Note + Index writing based on the dulwich implementation + """ + write_op = ConcurrentWriteOperation(file_path or self._file_path) + stream = write_op._begin_writing() + + stream = SHA1Writer(stream) + + # header + stream.write("DIRC") + stream.write(struct.pack(">LL", self.version, len(self.entries))) + + # body + entries_sorted = self.entries.values() + entries_sorted.sort(key=lambda e: (e[3], e[2])) # use path/stage as sort key + for entry in entries_sorted: + self._write_cache_entry(stream, entry) + # END for each entry + + stored_ext_data = None + if ignore_tree_extension_data and self._extension_data and self._extension_data[:4] == 'TREE': + stored_ext_data = self._extension_data + self._extension_data = '' + # END extension data special handling + + # write previously cached extensions data + stream.write(self._extension_data) + + if stored_ext_data: + self._extension_data = stored_ext_data + # END reset previous ext data + + # write the sha over the content + stream.write_sha() + write_op._end_writing() + + # make sure we represent what we have written + if file_path is not None: + self._file_path = file_path + + @clear_cache + @default_index + def merge_tree(self, rhs, base=None): + """Merge the given rhs treeish into the current index, possibly taking + a common base treeish into account. + + As opposed to the from_tree_ method, this allows you to use an already + existing tree as the left side of the merge + + ``rhs`` + treeish reference pointing to the 'other' side of the merge. + + ``base`` + optional treeish reference pointing to the common base of 'rhs' and + this index which equals lhs + + Returns + self ( containing the merge and possibly unmerged entries in case of + conflicts ) + + Raise + GitCommandError in case there is a merge conflict. The error will + be raised at the first conflicting path. If you want to have proper + merge resolution to be done by yourself, you have to commit the changed + index ( or make a valid tree from it ) and retry with a three-way + index.from_tree call. + """ + # -i : ignore working tree status + # --aggressive : handle more merge cases + # -m : do an actual merge + args = ["--aggressive", "-i", "-m"] + if base is not None: + args.append(base) + args.append(rhs) + + self.repo.git.read_tree(args) + return self + + @classmethod + def from_tree(cls, repo, *treeish, **kwargs): + """ + Merge the given treeish revisions into a new index which is returned. + The original index will remain unaltered + + ``repo`` + The repository treeish are located in. + + ``*treeish`` + One, two or three Tree Objects or Commits. The result changes according to the + amount of trees. + If 1 Tree is given, it will just be read into a new index + If 2 Trees are given, they will be merged into a new index using a + two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' + one. It behaves like a fast-forward. + If 3 Trees are given, a 3-way merge will be performed with the first tree + being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, + tree 3 is the 'other' one + + ``**kwargs`` + Additional arguments passed to git-read-tree + + Returns + New IndexFile instance. It will point to a temporary index location which + does not exist anymore. If you intend to write such a merged Index, supply + an alternate file_path to its 'write' method. + + Note: + In the three-way merge case, --aggressive will be specified to automatically + resolve more cases in a commonly correct manner. Specify trivial=True as kwarg + to override that. + + As the underlying git-read-tree command takes into account the current index, + it will be temporarily moved out of the way to assure there are no unsuspected + interferences. + """ + if len(treeish) == 0 or len(treeish) > 3: + raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) + + arg_list = list() + # ignore that working tree and index possibly are out of date + if len(treeish)>1: + # drop unmerged entries when reading our index and merging + arg_list.append("--reset") + # handle non-trivial cases the way a real merge does + arg_list.append("--aggressive") + # END merge handling + + # tmp file created in git home directory to be sure renaming + # works - /tmp/ dirs could be on another device + tmp_index = tempfile.mktemp('','',repo.git_dir) + arg_list.append("--index-output=%s" % tmp_index) + arg_list.extend(treeish) + + # move current index out of the way - otherwise the merge may fail + # as it considers existing entries. moving it essentially clears the index. + # Unfortunately there is no 'soft' way to do it. + # The _TemporaryFileSwap assure the original file get put back + index_handler = _TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) + try: + repo.git.read_tree(*arg_list, **kwargs) + index = cls(repo, tmp_index) + index.entries # force it to read the file as we will delete the temp-file + del(index_handler) # release as soon as possible + finally: + if os.path.exists(tmp_index): + os.remove(tmp_index) + # END index merge handling + + return index + + @classmethod + def _index_mode_to_tree_index_mode(cls, index_mode): + """ + Cleanup a index_mode value. + This will return a index_mode that can be stored in a tree object. + + ``index_mode`` + Index_mode to clean up. + """ + if stat.S_ISLNK(index_mode): + return stat.S_IFLNK + elif stat.S_ISDIR(index_mode): + return stat.S_IFDIR + elif stat.S_IFMT(index_mode) == cls.S_IFGITLINK: + return cls.S_IFGITLINK + ret = stat.S_IFREG | 0644 + ret |= (index_mode & 0111) + return ret + + + # UTILITIES + def _iter_expand_paths(self, paths): + """Expand the directories in list of paths to the corresponding paths accordingly, + + Note: git will add items multiple times even if a glob overlapped + with manually specified paths or if paths where specified multiple + times - we respect that and do not prune""" + def raise_exc(e): + raise e + r = self.repo.working_tree_dir + rs = r + '/' + for path in paths: + abs_path = path + if not os.path.isabs(abs_path): + abs_path = os.path.join(r, path) + # END make absolute path + + # resolve globs if possible + if '?' in path or '*' in path or '[' in path: + for f in self._iter_expand_paths(glob.glob(abs_path)): + yield f.replace(rs, '') + continue + # END glob handling + try: + for root, dirs, files in os.walk(abs_path, onerror=raise_exc): + for rela_file in files: + # add relative paths only + yield os.path.join(root.replace(rs, ''), rela_file) + # END for each file in subdir + # END for each subdirectory + except OSError: + # was a file or something that could not be iterated + yield path.replace(rs, '') + # END path exception handling + # END for each path + + def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, read_from_stdout=True): + """Write path to proc.stdin and make sure it processes the item, including progress. + @return: stdout string + @param read_from_stdout: if True, proc.stdout will be read after the item + was sent to stdin. In that case, it will return None + @note: There is a bug in git-update-index that prevents it from sending + reports just in time. This is why we have a version that tries to + read stdout and one which doesn't. In fact, the stdout is not + important as the piped-in files are processed anyway and just in time""" + fprogress(filepath, False, item) + rval = None + try: + proc.stdin.write("%s\n" % filepath) + except IOError: + # pipe broke, usually because some error happend + raise fmakeexc() + # END write exception handling + proc.stdin.flush() + if read_from_stdout: + rval = proc.stdout.readline().strip() + fprogress(filepath, True, item) + return rval + + def iter_blobs(self, predicate = lambda t: True): + """ + Returns + Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) + + ``predicate`` + Function(t) returning True if tuple(stage, Blob) should be yielded by the + iterator. A default filter, the BlobFilter, allows you to yield blobs + only if they match a given list of paths. + """ + for entry in self.entries.itervalues(): + mode = self._index_mode_to_tree_index_mode(entry.mode) + blob = Blob(self.repo, entry.sha, mode, entry.path) + blob.size = entry.size + output = (entry.stage, blob) + if predicate(output): + yield output + # END for each entry + + def unmerged_blobs(self): + """ + Returns + Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being + a dictionary associating a path in the index with a list containing + sorted stage/blob pairs + + Note: + Blobs that have been removed in one side simply do not exist in the + given stage. I.e. a file removed on the 'other' branch whose entries + are at stage 3 will not have a stage 3 entry. + """ + is_unmerged_blob = lambda t: t[0] != 0 + path_map = dict() + for stage, blob in self.iter_blobs(is_unmerged_blob): + path_map.setdefault(blob.path, list()).append((stage, blob)) + # END for each unmerged blob + for l in path_map.itervalues(): + l.sort() + return path_map + + @classmethod + def get_entries_key(cls, *entry): + """ + Returns + Key suitable to be used for the index.entries dictionary + + ``entry`` + One instance of type BaseIndexEntry or the path and the stage + """ + if len(entry) == 1: + return (entry[0].path, entry[0].stage) + else: + return tuple(entry) + + + def resolve_blobs(self, iter_blobs): + """ + Resolve the blobs given in blob iterator. This will effectively remove the + index entries of the respective path at all non-null stages and add the given + blob as new stage null blob. + + For each path there may only be one blob, otherwise a ValueError will be raised + claiming the path is already at stage 0. + + Raise + ValueError if one of the blobs already existed at stage 0 + + Returns: + self + + Note + You will have to write the index manually once you are done, i.e. + index.resolve_blobs(blobs).write() + """ + for blob in iter_blobs: + stage_null_key = (blob.path, 0) + if stage_null_key in self.entries: + raise ValueError( "Path %r already exists at stage 0" % blob.path ) + # END assert blob is not stage 0 already + + # delete all possible stages + for stage in (1, 2, 3): + try: + del( self.entries[(blob.path, stage)] ) + except KeyError: + pass + # END ignore key errors + # END for each possible stage + + self.entries[stage_null_key] = IndexEntry.from_blob(blob) + # END for each blob + + return self + + def update(self): + """ + Reread the contents of our index file, discarding all cached information + we might have. + + Note: + This is a possibly dangerious operations as it will discard your changes + to index.entries + + Returns + self + """ + del(self.entries) + # allows to lazily reread on demand + return self + + def write_tree(self, missing_ok=False): + """ + Writes the Index in self to a corresponding Tree file into the repository + object database and returns it as corresponding Tree object. + + ``missing_ok`` + If True, missing objects referenced by this index will not result + in an error. + + Returns + Tree object representing this index + """ + index_path = self._index_path() + tmp_index_mover = _TemporaryFileSwap(index_path) + + self.write(index_path, ignore_tree_extension_data=True) + tree_sha = self.repo.git.write_tree(missing_ok=missing_ok) + + del(tmp_index_mover) # as soon as possible + + return Tree(self.repo, tree_sha, 0, '') + + def _process_diff_args(self, args): + try: + args.pop(args.index(self)) + except IndexError: + pass + # END remove self + return args + + + def _to_relative_path(self, path): + """ + Return + Version of path relative to our git directory or raise ValueError + if it is not within our git direcotory + """ + if not os.path.isabs(path): + return path + relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") + if relative_path == path: + raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) + return relative_path + + def _preprocess_add_items(self, items): + """ + Split the items into two lists of path strings and BaseEntries. + """ + paths = list() + entries = list() + + for item in items: + if isinstance(item, basestring): + paths.append(self._to_relative_path(item)) + elif isinstance(item, Blob): + entries.append(BaseIndexEntry.from_blob(item)) + elif isinstance(item, BaseIndexEntry): + entries.append(item) + else: + raise TypeError("Invalid Type: %r" % item) + # END for each item + return (paths, entries) + + + @clear_cache + @default_index + def add(self, items, force=True, fprogress=lambda *args: None): + """ + Add files from the working tree, specific blobs or BaseIndexEntries + to the index. The underlying index file will be written immediately, hence + you should provide as many items as possible to minimize the amounts of writes + + ``items`` + Multiple types of items are supported, types can be mixed within one call. + Different types imply a different handling. File paths may generally be + relative or absolute. + + - path string + strings denote a relative or absolute path into the repository pointing to + an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. + + Paths provided like this must exist. When added, they will be written + into the object database. + + PathStrings may contain globs, such as 'lib/__init__*' or can be directories + like 'lib', the latter ones will add all the files within the dirctory and + subdirectories. + + This equals a straight git-add. + + They are added at stage 0 + + - Blob object + Blobs are added as they are assuming a valid mode is set. + The file they refer to may or may not exist in the file system, but + must be a path relative to our repository. + + If their sha is null ( 40*0 ), their path must exist in the file system + as an object will be created from the data at the path.The handling + now very much equals the way string paths are processed, except that + the mode you have set will be kept. This allows you to create symlinks + by settings the mode respectively and writing the target of the symlink + directly into the file. This equals a default Linux-Symlink which + is not dereferenced automatically, except that it can be created on + filesystems not supporting it as well. + + Please note that globs or directories are not allowed in Blob objects. + + They are added at stage 0 + + - BaseIndexEntry or type + Handling equals the one of Blob objects, but the stage may be + explicitly set. + + ``force`` + If True, otherwise ignored or excluded files will be + added anyway. + As opposed to the git-add command, we enable this flag by default + as the API user usually wants the item to be added even though + they might be excluded. + + ``fprogress`` + Function with signature f(path, done=False, item=item) called for each + path to be added, once once it is about to be added where done==False + and once after it was added where done=True. + item is set to the actual item we handle, either a Path or a BaseIndexEntry + Please note that the processed path is not guaranteed to be present + in the index already as the index is currently being processed. + + Returns + List(BaseIndexEntries) representing the entries just actually added. + + Raises + GitCommandError if a supplied Path did not exist. Please note that BaseIndexEntry + Objects that do not have a null sha will be added even if their paths + do not exist. + """ + # sort the entries into strings and Entries, Blobs are converted to entries + # automatically + # paths can be git-added, for everything else we use git-update-index + entries_added = list() + paths, entries = self._preprocess_add_items(items) + + + # HANDLE PATHS + if paths: + # to get suitable progress information, pipe paths to stdin + args = ("--add", "--replace", "--verbose", "--stdin") + proc = self.repo.git.update_index(*args, **{'as_process':True, 'istream':subprocess.PIPE}) + make_exc = lambda : GitCommandError(("git-update-index",)+args, 128, proc.stderr.read()) + added_files = list() + + for filepath in self._iter_expand_paths(paths): + self._write_path_to_stdin(proc, filepath, filepath, make_exc, fprogress, read_from_stdout=False) + added_files.append(filepath) + # END for each filepath + self._flush_stdin_and_wait(proc, ignore_stdout=True) # ignore stdout + + # force rereading our entries once it is all done + del(self.entries) + entries_added.extend(self.entries[(f,0)] for f in added_files) + # END path handling + + # HANDLE ENTRIES + if entries: + null_mode_entries = [ e for e in entries if e.mode == 0 ] + if null_mode_entries: + raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") + # END null mode should be remove + + # HANLDE ENTRY OBJECT CREATION + # create objects if required, otherwise go with the existing shas + null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ] + if null_entries_indices: + # creating object ids is the time consuming part. Hence we will + # send progress for these now. + args = ("-w", "--stdin-paths") + proc = self.repo.git.hash_object(*args, **{'istream':subprocess.PIPE, 'as_process':True}) + make_exc = lambda : GitCommandError(("git-hash-object",)+args, 128, proc.stderr.read()) + obj_ids = list() + for ei in null_entries_indices: + entry = entries[ei] + obj_ids.append(self._write_path_to_stdin(proc, entry.path, entry, make_exc, fprogress)) + # END for each entry index + assert len(obj_ids) == len(null_entries_indices), "git-hash-object did not produce all requested objects: want %i, got %i" % ( len(null_entries_indices), len(obj_ids) ) + + # update IndexEntries with new object id + for i,new_sha in zip(null_entries_indices, obj_ids): + e = entries[i] + new_entry = BaseIndexEntry((e.mode, new_sha, e.stage, e.path)) + entries[i] = new_entry + # END for each index + # END null_entry handling + + # feed pure entries to stdin + proc = self.repo.git.update_index(index_info=True, istream=subprocess.PIPE, as_process=True) + for i, entry in enumerate(entries): + progress_sent = i in null_entries_indices + if not progress_sent: + fprogress(entry.path, False, entry) + # it cannot handle too-many newlines in this mode + if i != 0: + proc.stdin.write('\n') + proc.stdin.write(str(entry)) + proc.stdin.flush() + if not progress_sent: + fprogress(entry.path, True, entry) + # END for each enty + self._flush_stdin_and_wait(proc, ignore_stdout=True) + entries_added.extend(entries) + # END if there are base entries + + return entries_added + + def _items_to_rela_paths(self, items): + """Returns a list of repo-relative paths from the given items which + may be absolute or relative paths, entries or blobs""" + paths = list() + for item in items: + if isinstance(item, (BaseIndexEntry,Blob)): + paths.append(self._to_relative_path(item.path)) + elif isinstance(item, basestring): + paths.append(self._to_relative_path(item)) + else: + raise TypeError("Invalid item type: %r" % item) + # END for each item + return paths + + @clear_cache + @default_index + def remove(self, items, working_tree=False, **kwargs): + """ + Remove the given items from the index and optionally from + the working tree as well. + + ``items`` + Multiple types of items are supported which may be be freely mixed. + + - path string + Remove the given path at all stages. If it is a directory, you must + specify the r=True keyword argument to remove all file entries + below it. If absolute paths are given, they will be converted + to a path relative to the git repository directory containing + the working tree + + The path string may include globs, such as *.c. + + - Blob object + Only the path portion is used in this case. + + - BaseIndexEntry or compatible type + The only relevant information here Yis the path. The stage is ignored. + + ``working_tree`` + If True, the entry will also be removed from the working tree, physically + removing the respective file. This may fail if there are uncommited changes + in it. + + ``**kwargs`` + Additional keyword arguments to be passed to git-rm, such + as 'r' to allow recurive removal of + + Returns + List(path_string, ...) list of repository relative paths that have + been removed effectively. + This is interesting to know in case you have provided a directory or + globs. Paths are relative to the repository. + """ + args = list() + if not working_tree: + args.append("--cached") + args.append("--") + + # preprocess paths + paths = self._items_to_rela_paths(items) + removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() + + # process output to gain proper paths + # rm 'path' + return [ p[4:-1] for p in removed_paths ] + + @clear_cache + @default_index + def move(self, items, skip_errors=False, **kwargs): + """ + Rename/move the items, whereas the last item is considered the destination of + the move operation. If the destination is a file, the first item ( of two ) + must be a file as well. If the destination is a directory, it may be preceeded + by one or more directories or files. + + The working tree will be affected in non-bare repositories. + + ``items`` + Multiple types of items are supported, please see the 'remove' method + for reference. + ``skip_errors`` + If True, errors such as ones resulting from missing source files will + be skpped. + ``**kwargs`` + Additional arguments you would like to pass to git-mv, such as dry_run + or force. + + Returns + List(tuple(source_path_string, destination_path_string), ...) + A list of pairs, containing the source file moved as well as its + actual destination. Relative to the repository root. + + Raises + ValueErorr: If only one item was given + GitCommandError: If git could not handle your request + """ + args = list() + if skip_errors: + args.append('-k') + + paths = self._items_to_rela_paths(items) + if len(paths) < 2: + raise ValueError("Please provide at least one source and one destination of the move operation") + + was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) + kwargs['dry_run'] = True + + # first execute rename in dryrun so the command tells us what it actually does + # ( for later output ) + out = list() + mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() + + # parse result - first 0:n/2 lines are 'checking ', the remaining ones + # are the 'renaming' ones which we parse + for ln in xrange(len(mvlines)/2, len(mvlines)): + tokens = mvlines[ln].split(' to ') + assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] + + # [0] = Renaming x + # [1] = y + out.append((tokens[0][9:], tokens[1])) + # END for each line to parse + + # either prepare for the real run, or output the dry-run result + if was_dry_run: + return out + # END handle dryrun + + + # now apply the actual operation + kwargs.pop('dry_run') + self.repo.git.mv(args, paths, **kwargs) + + return out + - @default_index - def commit(self, message, parent_commits=None, head=True): - """ - Commit the current default index file, creating a commit object. - - For more information on the arguments, see tree.commit. - - ``NOTE``: - If you have manually altered the .entries member of this instance, - don't forget to write() your changes to disk beforehand. - - Returns - Commit object representing the new commit - """ - tree_sha = self.repo.git.write_tree() - return Commit.create_from_tree(self.repo, tree_sha, message, parent_commits, head) - - @classmethod - def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): - proc.stdin.flush() - proc.stdin.close() - stdout = '' - if not ignore_stdout: - stdout = proc.stdout.read() - proc.stdout.close() - proc.wait() - return stdout - - @default_index - def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): - """ - Checkout the given paths or all files from the version known to the index into - the working tree. - - ``paths`` - If None, all paths in the index will be checked out. Otherwise an iterable - of relative or absolute paths or a single path pointing to files or directories - in the index is expected. - - ``force`` - If True, existing files will be overwritten even if they contain local modifications. - If False, these will trigger a CheckoutError. - - ``fprogress`` - see Index.add_ for signature and explanation. - The provided progress information will contain None as path and item if no - explicit paths are given. Otherwise progress information will be send - prior and after a file has been checked out - - ``**kwargs`` - Additional arguments to be pasesd to git-checkout-index - - Returns - iterable yielding paths to files which have been checked out and are - guaranteed to match the version stored in the index - - Raise CheckoutError - If at least one file failed to be checked out. This is a summary, - hence it will checkout as many files as it can anyway. - If one of files or directories do not exist in the index - ( as opposed to the original git command who ignores them ). - Raise GitCommandError if error lines could not be parsed - this truly is - an exceptional state - """ - args = ["--index"] - if force: - args.append("--force") - - def handle_stderr(proc, iter_checked_out_files): - stderr = proc.stderr.read() - if not stderr: - return - # line contents: - # git-checkout-index: this already exists - failed_files = list() - failed_reasons = list() - unknown_lines = list() - endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') - for line in stderr.splitlines(): - if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): - is_a_dir = " is a directory" - unlink_issue = "unable to unlink old '" - if line.endswith(is_a_dir): - failed_files.append(line[:-len(is_a_dir)]) - failed_reasons.append(is_a_dir) - elif line.startswith(unlink_issue): - failed_files.append(line[len(unlink_issue):line.rfind("'")]) - failed_reasons.append(unlink_issue) - else: - unknown_lines.append(line) - continue - # END special lines parsing - - for e in endings: - if line.endswith(e): - failed_files.append(line[20:-len(e)]) - failed_reasons.append(e) - break - # END if ending matches - # END for each possible ending - # END for each line - if unknown_lines: - raise GitCommandError(("git-checkout-index", ), 128, stderr) - if failed_files: - valid_files = list(set(iter_checked_out_files) - set(failed_files)) - raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) - # END stderr handler - - - if paths is None: - args.append("--all") - kwargs['as_process'] = 1 - fprogress(None, False, None) - proc = self.repo.git.checkout_index(*args, **kwargs) - proc.wait() - fprogress(None, True, None) - rval_iter = ( e.path for e in self.entries.itervalues() ) - handle_stderr(proc, rval_iter) - return rval_iter - else: - if isinstance(paths, basestring): - paths = [paths] - - args.append("--stdin") - kwargs['as_process'] = True - kwargs['istream'] = subprocess.PIPE - proc = self.repo.git.checkout_index(args, **kwargs) - make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) - checked_out_files = list() - for path in paths: - path = self._to_relative_path(path) - # if the item is not in the index, it could be a directory - path_is_directory = False - try: - self.entries[(path, 0)] - except KeyError: - dir = path - if not dir.endswith('/'): - dir += '/' - for entry in self.entries.itervalues(): - if entry.path.startswith(dir): - p = entry.path - self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False) - checked_out_files.append(p) - path_is_directory = True - # END if entry is in directory - # END for each entry - # END path exception handlnig - - if not path_is_directory: - self._write_path_to_stdin(proc, path, path, make_exc, fprogress, read_from_stdout=False) - checked_out_files.append(path) - # END path is a file - # END for each path - self._flush_stdin_and_wait(proc, ignore_stdout=True) - - handle_stderr(proc, checked_out_files) - return checked_out_files - # END directory handling - # END paths handling - assert "Should not reach this point" - - @clear_cache - @default_index - def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): - """ - Reset the index to reflect the tree at the given commit. This will not - adjust our HEAD reference as opposed to HEAD.reset by default. - - ``commit`` - Revision, Reference or Commit specifying the commit we should represent. - If you want to specify a tree only, use IndexFile.from_tree and overwrite - the default index. - - ``working_tree`` - If True, the files in the working tree will reflect the changed index. - If False, the working tree will not be touched - Please note that changes to the working copy will be discarded without - warning ! - - ``head`` - If True, the head will be set to the given commit. This is False by default, - but if True, this method behaves like HEAD.reset. - - ``**kwargs`` - Additional keyword arguments passed to git-reset - - Returns - self - """ - cur_head = self.repo.head - prev_commit = cur_head.commit - - # reset to get the tree/working copy - cur_head.reset(commit, index=True, working_tree=working_tree, paths=paths, **kwargs) - - # put the head back, possibly - if not head: - cur_head.reset(prev_commit, index=False, working_tree=False) - # END reset head - - return self - - @default_index - def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): - """ - Diff this index against the working copy or a Tree or Commit object - - For a documentation of the parameters and return values, see - Diffable.diff - - Note - Will only work with indices that represent the default git index as - they have not been initialized with a stream. - """ - # index against index is always empty - if other is self.Index: - return diff.DiffIndex() - - # index against anything but None is a reverse diff with the respective - # item. Handle existing -R flags properly. Transform strings to the object - # so that we can call diff on it - if isinstance(other, basestring): - other = Object.new(self.repo, other) - # END object conversion - - if isinstance(other, Object): - # invert the existing R flag - cur_val = kwargs.get('R', False) - kwargs['R'] = not cur_val - return other.diff(self.Index, paths, create_patch, **kwargs) - # END diff against other item handlin - - # if other is not None here, something is wrong - if other is not None: - raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) - - # diff against working copy - can be handled by superclass natively - return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) - + @default_index + def commit(self, message, parent_commits=None, head=True): + """ + Commit the current default index file, creating a commit object. + + For more information on the arguments, see tree.commit. + + ``NOTE``: + If you have manually altered the .entries member of this instance, + don't forget to write() your changes to disk beforehand. + + Returns + Commit object representing the new commit + """ + tree_sha = self.repo.git.write_tree() + return Commit.create_from_tree(self.repo, tree_sha, message, parent_commits, head) + + @classmethod + def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): + proc.stdin.flush() + proc.stdin.close() + stdout = '' + if not ignore_stdout: + stdout = proc.stdout.read() + proc.stdout.close() + proc.wait() + return stdout + + @default_index + def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): + """ + Checkout the given paths or all files from the version known to the index into + the working tree. + + ``paths`` + If None, all paths in the index will be checked out. Otherwise an iterable + of relative or absolute paths or a single path pointing to files or directories + in the index is expected. + + ``force`` + If True, existing files will be overwritten even if they contain local modifications. + If False, these will trigger a CheckoutError. + + ``fprogress`` + see Index.add_ for signature and explanation. + The provided progress information will contain None as path and item if no + explicit paths are given. Otherwise progress information will be send + prior and after a file has been checked out + + ``**kwargs`` + Additional arguments to be pasesd to git-checkout-index + + Returns + iterable yielding paths to files which have been checked out and are + guaranteed to match the version stored in the index + + Raise CheckoutError + If at least one file failed to be checked out. This is a summary, + hence it will checkout as many files as it can anyway. + If one of files or directories do not exist in the index + ( as opposed to the original git command who ignores them ). + Raise GitCommandError if error lines could not be parsed - this truly is + an exceptional state + """ + args = ["--index"] + if force: + args.append("--force") + + def handle_stderr(proc, iter_checked_out_files): + stderr = proc.stderr.read() + if not stderr: + return + # line contents: + # git-checkout-index: this already exists + failed_files = list() + failed_reasons = list() + unknown_lines = list() + endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') + for line in stderr.splitlines(): + if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): + is_a_dir = " is a directory" + unlink_issue = "unable to unlink old '" + if line.endswith(is_a_dir): + failed_files.append(line[:-len(is_a_dir)]) + failed_reasons.append(is_a_dir) + elif line.startswith(unlink_issue): + failed_files.append(line[len(unlink_issue):line.rfind("'")]) + failed_reasons.append(unlink_issue) + else: + unknown_lines.append(line) + continue + # END special lines parsing + + for e in endings: + if line.endswith(e): + failed_files.append(line[20:-len(e)]) + failed_reasons.append(e) + break + # END if ending matches + # END for each possible ending + # END for each line + if unknown_lines: + raise GitCommandError(("git-checkout-index", ), 128, stderr) + if failed_files: + valid_files = list(set(iter_checked_out_files) - set(failed_files)) + raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) + # END stderr handler + + + if paths is None: + args.append("--all") + kwargs['as_process'] = 1 + fprogress(None, False, None) + proc = self.repo.git.checkout_index(*args, **kwargs) + proc.wait() + fprogress(None, True, None) + rval_iter = ( e.path for e in self.entries.itervalues() ) + handle_stderr(proc, rval_iter) + return rval_iter + else: + if isinstance(paths, basestring): + paths = [paths] + + args.append("--stdin") + kwargs['as_process'] = True + kwargs['istream'] = subprocess.PIPE + proc = self.repo.git.checkout_index(args, **kwargs) + make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) + checked_out_files = list() + for path in paths: + path = self._to_relative_path(path) + # if the item is not in the index, it could be a directory + path_is_directory = False + try: + self.entries[(path, 0)] + except KeyError: + dir = path + if not dir.endswith('/'): + dir += '/' + for entry in self.entries.itervalues(): + if entry.path.startswith(dir): + p = entry.path + self._write_path_to_stdin(proc, p, p, make_exc, fprogress, read_from_stdout=False) + checked_out_files.append(p) + path_is_directory = True + # END if entry is in directory + # END for each entry + # END path exception handlnig + + if not path_is_directory: + self._write_path_to_stdin(proc, path, path, make_exc, fprogress, read_from_stdout=False) + checked_out_files.append(path) + # END path is a file + # END for each path + self._flush_stdin_and_wait(proc, ignore_stdout=True) + + handle_stderr(proc, checked_out_files) + return checked_out_files + # END directory handling + # END paths handling + assert "Should not reach this point" + + @clear_cache + @default_index + def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): + """ + Reset the index to reflect the tree at the given commit. This will not + adjust our HEAD reference as opposed to HEAD.reset by default. + + ``commit`` + Revision, Reference or Commit specifying the commit we should represent. + If you want to specify a tree only, use IndexFile.from_tree and overwrite + the default index. + + ``working_tree`` + If True, the files in the working tree will reflect the changed index. + If False, the working tree will not be touched + Please note that changes to the working copy will be discarded without + warning ! + + ``head`` + If True, the head will be set to the given commit. This is False by default, + but if True, this method behaves like HEAD.reset. + + ``**kwargs`` + Additional keyword arguments passed to git-reset + + Returns + self + """ + cur_head = self.repo.head + prev_commit = cur_head.commit + + # reset to get the tree/working copy + cur_head.reset(commit, index=True, working_tree=working_tree, paths=paths, **kwargs) + + # put the head back, possibly + if not head: + cur_head.reset(prev_commit, index=False, working_tree=False) + # END reset head + + return self + + @default_index + def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): + """ + Diff this index against the working copy or a Tree or Commit object + + For a documentation of the parameters and return values, see + Diffable.diff + + Note + Will only work with indices that represent the default git index as + they have not been initialized with a stream. + """ + # index against index is always empty + if other is self.Index: + return diff.DiffIndex() + + # index against anything but None is a reverse diff with the respective + # item. Handle existing -R flags properly. Transform strings to the object + # so that we can call diff on it + if isinstance(other, basestring): + other = Object.new(self.repo, other) + # END object conversion + + if isinstance(other, Object): + # invert the existing R flag + cur_val = kwargs.get('R', False) + kwargs['R'] = not cur_val + return other.diff(self.Index, paths, create_patch, **kwargs) + # END diff against other item handlin + + # if other is not None here, something is wrong + if other is not None: + raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) + + # diff against working copy - can be handled by superclass natively + return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) + diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py index 192750e3..717fa808 100644 --- a/lib/git/objects/__init__.py +++ b/lib/git/objects/__init__.py @@ -9,4 +9,4 @@ from tree import * from commit import * __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file + if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 8d6860de..6a51eed3 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,222 +6,222 @@ import os from git.utils import LazyMixin, join_path_native import utils - + _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - - This Object also serves as a constructor for instances of the correct type:: - - inst = Object.new(repo,id) - inst.sha # objects sha in hex - inst.size # objects uncompressed data size - inst.data # byte string containing the whole data of the object - """ - NULL_HEX_SHA = '0'*40 - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "sha", "size", "data" ) - type = None # to be set by subclass - - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - """ - super(Object,self).__init__() - self.repo = repo - self.sha = id + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + + This Object also serves as a constructor for instances of the correct type:: + + inst = Object.new(repo,id) + inst.sha # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object + """ + NULL_HEX_SHA = '0'*40 + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "sha", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.sha = id - @classmethod - def new(cls, repo, id): - """ - Return - New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a hexsha even though - the input id may have been a Reference or Rev-Spec - - Note - This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a hexsha. - """ - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = obj_type(repo, hexsha) - inst.size = size - return inst - - def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None - """ - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - - def _set_cache_(self, attr): - """ - Retrieve object information - """ - if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.sha == other.sha - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.sha != other.sha - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.sha) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.sha - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '<git.%s "%s">' % (self.__class__.__name__, self.sha) + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + elif attr == "data": + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.sha == other.sha + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.sha != other.sha + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.sha) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.sha + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '<git.%s "%s">' % (self.__class__.__name__, self.sha) - @property - def data_stream(self): - """ - Returns - File Object compatible stream to the uncompressed raw data of the object - """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") - def stream_data(self, ostream): - """ - Writes our data directly to the given output stream - - ``ostream`` - File object compatible stream object. - - Returns - self - """ - self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) - return self + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + return self class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ - __slots__ = ("path", "mode") - - def __init__(self, repo, sha, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, sha, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in - ``sha`` : string - is the git object id as hex sha + ``sha`` : string + is the git object id as hex sha - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration - ``path`` : str - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - - NOTE - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, sha) - self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) - - def __hash__(self): - """ - Returns - Hash of our path as index items are uniquely identifyable by path, not - by their data ! - """ - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - - @classmethod - def _mode_str_to_int(cls, modestr): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 6 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example. - """ - mode = 0 - for iteration,char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - - @property - def name(self): - """ - Returns - Name portion of the path, effectively being the basename - """ - return os.path.basename(self.path) - - @property - def abspath(self): - """ - Returns - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. - """ - return join_path_native(self.repo.working_tree_dir, self.path) - + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, sha) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + def __hash__(self): + """ + Returns + Hash of our path as index items are uniquely identifyable by path, not + by their data ! + """ + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + + @classmethod + def _mode_str_to_int(cls, modestr): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 6 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example. + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. + """ + return join_path_native(self.repo.working_tree_dir, self.path) + diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py index 11dee323..3f91d078 100644 --- a/lib/git/objects/blob.py +++ b/lib/git/objects/blob.py @@ -8,29 +8,29 @@ import mimetypes import base class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" - __slots__ = tuple() + __slots__ = tuple() - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) + + @property + def mime_type(self): + """ + The mime type of this file (based on the filename) - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.path: - guesses = mimetypes.guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + Returns + str + + NOTE + Defaults to 'text/plain' in case the actual file type is unknown. + """ + guesses = None + if self.path: + guesses = mimetypes.guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - def __repr__(self): - return '<git.Blob "%s">' % self.sha + def __repr__(self): + return '<git.Blob "%s">' % self.sha diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 4d0f808d..d1bbb889 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -14,353 +14,353 @@ import tempfile import os class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): - """ - Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ - - # object configuration - type = "commit" - __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", - "message", "parents") - _id_attribute_ = "sha" - - def __init__(self, repo, sha, tree=None, author=None, authored_date=None, - committer=None, committed_date=None, message=None, parents=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. - - The parameter documentation indicates the type of the argument after a colon ':'. - - ``sha`` - is the sha id of the commit or a ref - - ``parents`` : tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - - ``tree`` : Tree - is the corresponding tree id or an actual Tree - - ``author`` : Actor - is the author string ( will be implicitly converted into an Actor object ) - - ``authored_date`` : int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - - ``committer`` : Actor - is the committer string - - ``committed_date`` : int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - - ``message`` : string - is the commit message - - Returns - git.Commit - """ - super(Commit,self).__init__(repo, sha) - self._set_self_from_args_(locals()) - - if parents is not None: - self.parents = tuple( self.__class__(repo, p) for p in parents ) - # END for each parent to convert - - if self.sha and tree is not None: - self.tree = Tree(repo, tree, path='') - # END id to tree conversion - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - """ - Called by LazyMixin superclass when the given uninitialized member needs - to be set. - We set all values at once. - """ - if attr in Commit.__slots__: - # prepare our data lines to match rev-list - data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.sha) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message - else: - super(Commit, self)._set_cache_(attr) - - @property - def summary(self): - """ - Returns - First line of the commit message. - """ - return self.message.split('\n', 1)[0] - - def count(self, paths='', **kwargs): - """ - Count the number of commits reachable from this commit - - ``paths`` - is an optinal path or a list of paths restricting the return value - to commits actually containing the paths - - ``kwargs`` - Additional options to be passed to git-rev-list. They must not alter - the ouput style of the command, or parsing will yield incorrect results - Returns - int - """ - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. - if paths: - return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) - else: - return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) - - - @property - def name_rev(self): - """ - Returns - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes - """ - return self.repo.git.name_rev(self) - - @classmethod - def iter_items(cls, repo, rev, paths='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``rev`` - revision specifier, see git-rev-parse for viable options - - ``paths`` - is an optinal path or list of paths, if set only Commits that include the path - or paths will be considered - - ``kwargs`` - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - - Returns - iterator yielding Commit items - """ - options = {'pretty': 'raw', 'as_process' : True } - options.update(kwargs) - - args = list() - if paths: - args.extend(('--', paths)) - # END if paths - - proc = repo.git.rev_list(rev, args, **options) - return cls._iter_from_process_or_stream(repo, proc, True) - - def iter_parents(self, paths='', **kwargs): - """ - Iterate _all_ parents of this commit. - - ``paths`` - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - - ``kwargs`` - All arguments allowed by git-rev-list - - Return: - Iterator yielding Commit objects which are parents of self - """ - # skip ourselves - skip = kwargs.get("skip", 1) - if skip == 0: # skip ourselves - skip = 1 - kwargs['skip'] = skip - - return self.iter_items( self.repo, self, paths, **kwargs ) - - @property - def stats(self): - """ - Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - Return - git.Stats - """ - if not self.parents: - text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) - return stats.Stats._list_from_string(self.repo, text) - - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``proc`` - git-rev-list process instance (raw format) - - ``from_rev_list`` - If True, the stream was created by rev-list in which case we parse - the message differently - Returns - iterator returning Commit objects - """ - stream = proc_or_stream - if not hasattr(stream,'next'): - stream = proc_or_stream.stdout - - for line in stream: - commit_tokens = line.split() - id = commit_tokens[1] - assert commit_tokens[0] == "commit" - tree = stream.next().split()[1] - - parents = [] - next_line = None - for parent_line in stream: - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - parents.append(parent_line.split()[-1]) - # END for each parent line - - author, authored_date = utils.parse_actor_and_date(next_line) - committer, committed_date = utils.parse_actor_and_date(stream.next()) - - # empty line - stream.next() - - message_lines = [] - if from_rev_list: - for msg_line in stream: - if not msg_line.startswith(' '): - # and forget about this empty marker - break - # END abort message reading - # strip leading 4 spaces - message_lines.append(msg_line[4:]) - # END while there are message lines - else: - # a stream from our data simply gives us the plain message - for msg_line in stream: - message_lines.append(msg_line) - # END message parsing - message = '\n'.join(message_lines) - - yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message) - # END for each line in stream - - - @classmethod - def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): - """ - Commit the given tree, creating a commit object. - - ``repo`` - is the Repo - - ``tree`` - Sha of a tree or a tree object to become the tree of the new commit - - ``message`` - Commit message. It may be an empty string if no message is provided. - It will be converted to a string in any case. - - ``parent_commits`` - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object - - ``head`` - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could - lead to undesired results when diffing files. - - Returns - Commit object representing the new commit - - Note: - Additional information about hte committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information - """ - parents = parent_commits - if parent_commits is None: - try: - parent_commits = [ repo.head.commit ] - except ValueError: - # empty repositories have no head commit - parent_commits = list() - # END handle parent commits - # END if parent commits are unset - - parent_args = [ ("-p", str(commit)) for commit in parent_commits ] - - # create message stream - tmp_file_path = tempfile.mktemp() - fp = open(tmp_file_path,"wb") - fp.write(str(message)) - fp.close() - fp = open(tmp_file_path,"rb") - fp.seek(0) - - try: - # write the current index as tree - commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp) - new_commit = cls(repo, commit_sha) - - if head: - try: - repo.head.commit = new_commit - except ValueError: - # head is not yet set to master - create it and set it - import git.refs - master = git.refs.Head.create(repo, 'master', commit=new_commit) - repo.head.reference = master - # END handle empty repositories - # END advance head handling - - return new_commit - finally: - fp.close() - os.remove(tmp_file_path) - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.sha - - def __repr__(self): - return '<git.Commit "%s">' % self.sha + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + + # object configuration + type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") + _id_attribute_ = "sha" + + def __init__(self, repo, sha, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``sha`` + is the sha id of the commit or a ref + + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + + ``tree`` : Tree + is the corresponding tree id or an actual Tree + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + + ``committer`` : Actor + is the committer string + + ``committed_date`` : int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, sha) + self._set_self_from_args_(locals()) + + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.sha and tree is not None: + self.tree = Tree(repo, tree, path='') + # END id to tree conversion + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + """ + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. + """ + if attr in Commit.__slots__: + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.sha) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + def count(self, paths='', **kwargs): + """ + Count the number of commits reachable from this commit + + ``paths`` + is an optinal path or a list of paths restricting the return value + to commits actually containing the paths + + ``kwargs`` + Additional options to be passed to git-rev-list. They must not alter + the ouput style of the command, or parsing will yield incorrect results + Returns + int + """ + # yes, it makes a difference whether empty paths are given or not in our case + # as the empty paths version will ignore merge commits for some reason. + if paths: + return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) + else: + return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) + + + @property + def name_rev(self): + """ + Returns + String describing the commits hex sha based on the closest Reference. + Mostly useful for UI purposes + """ + return self.repo.git.name_rev(self) + + @classmethod + def iter_items(cls, repo, rev, paths='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``rev`` + revision specifier, see git-rev-parse for viable options + + ``paths`` + is an optinal path or list of paths, if set only Commits that include the path + or paths will be considered + + ``kwargs`` + optional keyword arguments to git rev-list where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + ``since`` all commits since i.e. '1970-01-01' + + Returns + iterator yielding Commit items + """ + options = {'pretty': 'raw', 'as_process' : True } + options.update(kwargs) + + args = list() + if paths: + args.extend(('--', paths)) + # END if paths + + proc = repo.git.rev_list(rev, args, **options) + return cls._iter_from_process_or_stream(repo, proc, True) + + def iter_parents(self, paths='', **kwargs): + """ + Iterate _all_ parents of this commit. + + ``paths`` + Optional path or list of paths limiting the Commits to those that + contain at least one of the paths + + ``kwargs`` + All arguments allowed by git-rev-list + + Return: + Iterator yielding Commit objects which are parents of self + """ + # skip ourselves + skip = kwargs.get("skip", 1) + if skip == 0: # skip ourselves + skip = 1 + kwargs['skip'] = skip + + return self.iter_items( self.repo, self, paths, **kwargs ) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) + return stats.Stats._list_from_string(self.repo, text) + + @classmethod + def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``proc`` + git-rev-list process instance (raw format) + + ``from_rev_list`` + If True, the stream was created by rev-list in which case we parse + the message differently + Returns + iterator returning Commit objects + """ + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + commit_tokens = line.split() + id = commit_tokens[1] + assert commit_tokens[0] == "commit" + tree = stream.next().split()[1] + + parents = [] + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = utils.parse_actor_and_date(next_line) + committer, committed_date = utils.parse_actor_and_date(stream.next()) + + # empty line + stream.next() + + message_lines = [] + if from_rev_list: + for msg_line in stream: + if not msg_line.startswith(' '): + # and forget about this empty marker + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + else: + # a stream from our data simply gives us the plain message + for msg_line in stream: + message_lines.append(msg_line) + # END message parsing + message = '\n'.join(message_lines) + + yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream + + + @classmethod + def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): + """ + Commit the given tree, creating a commit object. + + ``repo`` + is the Repo + + ``tree`` + Sha of a tree or a tree object to become the tree of the new commit + + ``message`` + Commit message. It may be an empty string if no message is provided. + It will be converted to a string in any case. + + ``parent_commits`` + Optional Commit objects to use as parents for the new commit. + If empty list, the commit will have no parents at all and become + a root commit. + If None , the current head commit will be the parent of the + new commit object + + ``head`` + If True, the HEAD will be advanced to the new commit automatically. + Else the HEAD will remain pointing on the previous commit. This could + lead to undesired results when diffing files. + + Returns + Commit object representing the new commit + + Note: + Additional information about hte committer and Author are taken from the + environment or from the git configuration, see git-commit-tree for + more information + """ + parents = parent_commits + if parent_commits is None: + try: + parent_commits = [ repo.head.commit ] + except ValueError: + # empty repositories have no head commit + parent_commits = list() + # END handle parent commits + # END if parent commits are unset + + parent_args = [ ("-p", str(commit)) for commit in parent_commits ] + + # create message stream + tmp_file_path = tempfile.mktemp() + fp = open(tmp_file_path,"wb") + fp.write(str(message)) + fp.close() + fp = open(tmp_file_path,"rb") + fp.seek(0) + + try: + # write the current index as tree + commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp) + new_commit = cls(repo, commit_sha) + + if head: + try: + repo.head.commit = new_commit + except ValueError: + # head is not yet set to master - create it and set it + import git.refs + master = git.refs.Head.create(repo, 'master', commit=new_commit) + repo.head.reference = master + # END handle empty repositories + # END advance head handling + + return new_commit + finally: + fp.close() + os.remove(tmp_file_path) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.sha + + def __repr__(self): + return '<git.Commit "%s">' % self.sha diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index c329edf7..e880bbe5 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -10,66 +10,66 @@ import base import utils class TagObject(base.Object): - """ - Non-Lightweight tag carrying additional information about an object we are pointing - to. - """ - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - - def __init__(self, repo, sha, object=None, tag=None, - tagger=None, tagged_date=None, message=None): - """ - Initialize a tag object with additional data - - ``repo`` - repository this object is located in - - ``sha`` - SHA1 or ref suitable for git-rev-parse - - ``object`` - Object instance of object we are pointing to - - ``tag`` - name of this tag - - ``tagger`` - Actor identifying the tagger - - ``tagged_date`` : int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - """ - super(TagObject, self).__init__(repo, sha ) - self._set_self_from_args_(locals()) - - def _set_cache_(self, attr): - """ - Cache all our attributes at once - """ - if attr in TagObject.__slots__: - lines = self.data.splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in csse there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, sha, object=None, tag=None, + tagger=None, tagged_date=None, message=None): + """ + Initialize a tag object with additional data + + ``repo`` + repository this object is located in + + ``sha`` + SHA1 or ref suitable for git-rev-parse + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + """ + super(TagObject, self).__init__(repo, sha ) + self._set_self_from_args_(locals()) + + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in TagObject.__slots__: + lines = self.data.splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3][7:]# tagger <actor> <date> + self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in csse there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index a8ad0ced..5bd29a2a 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -20,230 +20,230 @@ def sha_to_hex(sha): class Tree(base.IndexObject, diff.Diffable, utils.Traversable): - """ - Tress represent a ordered list of Blobs and other Trees. Hence it can be - accessed like a list. - - Tree's will cache their contents after first retrieval to improve efficiency. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - - - """ - - type = "tree" - __slots__ = "_cache" - - # using ascii codes for comparison - commit_id = 016 - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - - def __init__(self, repo, sha, mode=0, path=None): - super(Tree, self).__init__(repo, sha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return index_object._cache - return tuple() - - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - self._cache = self._get_tree_cache() - else: - super(Tree, self)._set_cache_(attr) - - def _get_tree_cache(self): - """ - Return - list(object_instance, ...) - - ``treeish`` - sha or ref identifying a tree - """ - out = list() - for obj in self._iter_from_data(): - if obj is not None: - out.append(obj) - # END if object was handled - # END for each line from ls-tree - return out - - - def _iter_from_data(self): - """ - Reads the binary non-pretty printed representation of a tree and converts - it into Blob, Tree or Commit objects. - - Note: This method was inspired by the parse_tree method in dulwich. - - Returns - list(IndexObject, ...) - """ - ord_zero = ord('0') - data = self.data - len_data = len(data) - i = 0 - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - type_id = mode >> 12 - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - name = data[ns:i] - path = join_path(self.path, name) - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - - hexsha = sha_to_hex(sha) - if type_id == self.blob_id or type_id == self.symlink_id: - yield blob.Blob(self.repo, hexsha, mode, path) - elif type_id == self.tree_id: - yield Tree(self.repo, hexsha, mode, path) - elif type_id == self.commit_id: - # todo - yield None - else: - raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) - # END for each byte in data stream - - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> - >>> Repo('/path/to/python-git').tree/'README.txt' - <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> - - Returns - ``git.Blob`` or ``git.Tree`` - - Raise - KeyError if given file or tree does not exist in tree - """ - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for obj in self._cache: - if obj.name == file: - return obj - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - def __repr__(self): - return '<git.Tree "%s">' % self.sha - - @property - def trees(self): - """ - Returns - list(Tree, ...) list of trees directly below this tree - """ - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """ - Returns - list(Blob, ...) list of blobs directly below this tree - """ - return [ i for i in self if i.type == "blob" ] - - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see utils.Traversable.traverse - - Trees are set to visist_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self,i,j): - return self._cache[i:j] - - def __iter__(self): - return iter(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self,item): - if isinstance(item, int): - return self._cache[item] - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self,item): - if isinstance(item, base.IndexObject): - return item in self._cache - - # compatability - for obj in self._cache: - if item == obj.path: - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._cache) + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ + + type = "tree" + __slots__ = "_cache" + + # using ascii codes for comparison + commit_id = 016 + blob_id = 010 + symlink_id = 012 + tree_id = 004 + + + def __init__(self, repo, sha, mode=0, path=None): + super(Tree, self).__init__(repo, sha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return index_object._cache + return tuple() + + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() + else: + super(Tree, self)._set_cache_(attr) + + def _get_tree_cache(self): + """ + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for obj in self._iter_from_data(): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + + Returns + list(IndexObject, ...) + """ + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + type_id = mode >> 12 + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + path = join_path(self.path, name) + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.blob_id or type_id == self.symlink_id: + yield blob.Blob(self.repo, hexsha, mode, path) + elif type_id == self.tree_id: + yield Tree(self.repo, hexsha, mode, path) + elif type_id == self.commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) + # END for each byte in data stream + + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> + >>> Repo('/path/to/python-git').tree/'README.txt' + <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> + + Returns + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree + """ + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for obj in self._cache: + if obj.name == file: + return obj + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + def __repr__(self): + return '<git.Tree "%s">' % self.sha + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] + + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see utils.Traversable.traverse + + Trees are set to visist_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + + def __iter__(self): + return iter(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index ada34cc0..ec5453f1 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -11,157 +11,157 @@ from collections import deque as Deque from git.actor import Actor def get_object_type_by_name(object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - - + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + + # precompiled regex _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') def parse_actor_and_date(line): - """ - Parse out the actor (author or committer) info from a line like:: - - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - Returns - [Actor, int_seconds_since_epoch] - """ - m = _re_actor_epoch.search(line) - actor, epoch = m.groups() - return (Actor._from_string(actor), int(epoch)) - - - + """ + Parse out the actor (author or committer) info from a line like:: + + author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 + + Returns + [Actor, int_seconds_since_epoch] + """ + m = _re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor._from_string(actor), int(epoch)) + + + class ProcessStreamAdapter(object): - """ - Class wireing all calls to the contained Process instance. - - Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope. - """ - __slots__ = ("_proc", "_stream") - def __init__(self, process, stream_name): - self._proc = process - self._stream = getattr(process, stream_name) - - def __getattr__(self, attr): - return getattr(self._stream, attr) - - + """ + Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope. + """ + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) + + class Traversable(object): - """Simple interface to perforam depth-first or breadth-first traversals - into one direction. - Subclasses only need to implement one function. - Instances of the Subclass must be hashable""" - __slots__ = tuple() - - @classmethod - def _get_intermediate_items(cls, item): - """ - Returns: - List of items connected to the given item. - Must be implemented in subclass - """ - raise NotImplementedError("To be implemented in subclass") - - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = True, ignore_self=1, as_edge = False ): - """ - ``Returns`` - iterator yieling of items found when traversing self - - ``predicate`` - f(i,d) returns False if item i at depth d should not be included in the result - - ``prune`` - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. - - ``depth`` - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predessessors/successors - - ``branch_first`` - if True, items will be returned branch first, otherwise depth first - - ``visit_once`` - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. - - ``ignore_self`` - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None - - ``as_edge`` - if True, return a pair of items, first being the source, second the - destinatination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - visited = set() - stack = Deque() - stack.append( ( 0 ,self, None ) ) # self is always depth level 0 - - def addToStack( stack, item, branch_first, depth ): - lst = self._get_intermediate_items( item ) - if not lst: - return - if branch_first: - stack.extendleft( ( depth , i, item ) for i in lst ) - else: - reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) - stack.extend( reviter ) - # END addToStack local method - - while stack: - d, item, src = stack.pop() # depth of item, item, item_source - - if visit_once and item in visited: - continue - - if visit_once: - visited.add(item) - - rval = ( as_edge and (src, item) ) or item - if prune( rval, d ): - continue - - skipStartItem = ignore_self and ( item == self ) - if not skipStartItem and predicate( rval, d ): - yield rval - - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: - continue - - addToStack( stack, item, branch_first, nd ) - # END for each item on work stack + """Simple interface to perforam depth-first or breadth-first traversals + into one direction. + Subclasses only need to implement one function. + Instances of the Subclass must be hashable""" + __slots__ = tuple() + + @classmethod + def _get_intermediate_items(cls, item): + """ + Returns: + List of items connected to the given item. + Must be implemented in subclass + """ + raise NotImplementedError("To be implemented in subclass") + + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = True, ignore_self=1, as_edge = False ): + """ + ``Returns`` + iterator yieling of items found when traversing self + + ``predicate`` + f(i,d) returns False if item i at depth d should not be included in the result + + ``prune`` + f(i,d) return True if the search should stop at item i at depth d. + Item i will not be returned. + + ``depth`` + define at which level the iteration should not go deeper + if -1, there is no limit + if 0, you would effectively only get self, the root of the iteration + i.e. if 1, you would only get the first level of predessessors/successors + + ``branch_first`` + if True, items will be returned branch first, otherwise depth first + + ``visit_once`` + if True, items will only be returned once, although they might be encountered + several times. Loops are prevented that way. + + ``ignore_self`` + if True, self will be ignored and automatically pruned from + the result. Otherwise it will be the first item to be returned. + If as_edge is True, the source of the first edge is None + + ``as_edge`` + if True, return a pair of items, first being the source, second the + destinatination, i.e. tuple(src, dest) with the edge spanning from + source to destination""" + visited = set() + stack = Deque() + stack.append( ( 0 ,self, None ) ) # self is always depth level 0 + + def addToStack( stack, item, branch_first, depth ): + lst = self._get_intermediate_items( item ) + if not lst: + return + if branch_first: + stack.extendleft( ( depth , i, item ) for i in lst ) + else: + reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) + stack.extend( reviter ) + # END addToStack local method + + while stack: + d, item, src = stack.pop() # depth of item, item, item_source + + if visit_once and item in visited: + continue + + if visit_once: + visited.add(item) + + rval = ( as_edge and (src, item) ) or item + if prune( rval, d ): + continue + + skipStartItem = ignore_self and ( item == self ) + if not skipStartItem and predicate( rval, d ): + yield rval + + # only continue to next level if this is appropriate ! + nd = d + 1 + if depth > -1 and nd > depth: + continue + + addToStack( stack, item, branch_first, nd ) + # END for each item on work stack diff --git a/lib/git/refs.py b/lib/git/refs.py index 4941e0a6..ddf358fe 100644 --- a/lib/git/refs.py +++ b/lib/git/refs.py @@ -12,940 +12,940 @@ from utils import LazyMixin, Iterable, join_path, join_path_native, to_native_pa class SymbolicReference(object): - """ - Represents a special case of a reference such that this reference is symbolic. - It does not point to a specific commit, but to another Head, which itself - specifies a commit. - - A typical example for a symbolic reference is HEAD. - """ - __slots__ = ("repo", "path") - _common_path_default = "" - _id_attribute_ = "name" - - def __init__(self, repo, path): - self.repo = repo - self.path = path - - def __str__(self): - return self.path - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.path) - - def __eq__(self, other): - return self.path == other.path - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.path) - - @property - def name(self): - """ - Returns - In case of symbolic references, the shortest assumable name - is the path itself. - """ - return self.path - - def _get_path(self): - return join_path_native(self.repo.git_dir, self.path) - - @classmethod - def _get_packed_refs_path(cls, repo): - return os.path.join(repo.git_dir, 'packed-refs') - - @classmethod - def _iter_packed_refs(cls, repo): - """Returns an iterator yielding pairs of sha1/path pairs for the corresponding - refs. - NOTE: The packed refs file will be kept open as long as we iterate""" - try: - fp = open(cls._get_packed_refs_path(repo), 'r') - for line in fp: - line = line.strip() - if not line: - continue - if line.startswith('#'): - if line.startswith('# pack-refs with:') and not line.endswith('peeled'): - raise TypeError("PackingType of packed-Refs not understood: %r" % line) - # END abort if we do not understand the packing scheme - continue - # END parse comment - - # skip dereferenced tag object entries - previous line was actual - # tag reference for it - if line[0] == '^': - continue - - yield tuple(line.split(' ', 1)) - # END for each line - except (OSError,IOError): - raise StopIteration - # END no packed-refs file handling - # NOTE: Had try-finally block around here to close the fp, - # but some python version woudn't allow yields within that. - # I believe files are closing themselves on destruction, so it is - # alright. - - def _get_ref_info(self): - """Return: (sha, target_ref_path) if available, the sha the file at - rela_path points to, or None. target_ref_path is the reference we - point to, or None""" - tokens = None - try: - fp = open(self._get_path(), 'r') - value = fp.read().rstrip() - fp.close() - tokens = value.split(" ") - except (OSError,IOError): - # Probably we are just packed, find our entry in the packed refs file - # NOTE: We are not a symbolic ref if we are in a packed file, as these - # are excluded explictly - for sha, path in self._iter_packed_refs(self.repo): - if path != self.path: continue - tokens = (sha, path) - break - # END for each packed ref - # END handle packed refs - - if tokens is None: - raise ValueError("Reference at %r does not exist" % self.path) - - # is it a reference ? - if tokens[0] == 'ref:': - return (None, tokens[1]) - - # its a commit - if self.repo.re_hexsha_only.match(tokens[0]): - return (tokens[0], None) - - raise ValueError("Failed to parse reference information from %r" % self.path) - - def _get_commit(self): - """ - Returns: - Commit object we point to, works for detached and non-detached - SymbolicReferences - """ - # we partially reimplement it to prevent unnecessary file access - sha, target_ref_path = self._get_ref_info() - - # it is a detached reference - if sha: - return Commit(self.repo, sha) - - return Reference.from_path(self.repo, target_ref_path).commit - - def _set_commit(self, commit): - """ - Set our commit, possibly dereference our symbolic reference first. - """ - if self.is_detached: - return self._set_reference(commit) - - # set the commit on our reference - self._get_reference().commit = commit - - commit = property(_get_commit, _set_commit, doc="Query or set commits directly") - - def _get_reference(self): - """ - Returns - Reference Object we point to - """ - sha, target_ref_path = self._get_ref_info() - if target_ref_path is None: - raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) - return Reference.from_path(self.repo, target_ref_path) - - def _set_reference(self, ref): - """ - Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. - Otherwise we try to get a commit from it using our interface. - - Strings are allowed but will be checked to be sure we have a commit - """ - write_value = None - if isinstance(ref, SymbolicReference): - write_value = "ref: %s" % ref.path - elif isinstance(ref, Commit): - write_value = ref.sha - else: - try: - write_value = ref.commit.sha - except AttributeError: - sha = str(ref) - try: - obj = Object.new(self.repo, sha) - if obj.type != "commit": - raise TypeError("Invalid object type behind sha: %s" % sha) - write_value = obj.sha - except Exception: - raise ValueError("Could not extract object from %s" % ref) - # END end try string - # END try commit attribute - - # if we are writing a ref, use symbolic ref to get the reflog and more - # checking - # Otherwise we detach it and have to do it manually - if write_value.startswith('ref:'): - self.repo.git.symbolic_ref(self.path, write_value[5:]) - return - # END non-detached handling - - path = self._get_path() - directory = os.path.dirname(path) - if not os.path.isdir(directory): - os.makedirs(directory) - - fp = open(path, "wb") - try: - fp.write(write_value) - finally: - fp.close() - # END writing - - reference = property(_get_reference, _set_reference, doc="Returns the Reference we point to") - - # alias - ref = reference - - def is_valid(self): - """ - Returns - True if the reference is valid, hence it can be read and points to - a valid object or reference. - """ - try: - self.commit - except (OSError, ValueError): - return False - else: - return True - - @property - def is_detached(self): - """ - Returns - True if we are a detached reference, hence we point to a specific commit - instead to another reference - """ - try: - self.reference - return False - except TypeError: - return True - - @classmethod - def from_path(cls, repo, path): - """ - Return - Instance of SymbolicReference or HEAD - depending on the given path - - Note - It enforces that symbolic refs in git are only found in the - root of the .git repository, never within a folder. - """ - if not path: - raise ValueError("Cannot create Symbolic Reference from %r" % path) - - if path == 'HEAD': - return HEAD(repo, path) - - if '/' not in path: - return SymbolicReference(repo, path) - - raise ValueError("Could not find symbolic reference type suitable to handle path %r" % path) + """ + Represents a special case of a reference such that this reference is symbolic. + It does not point to a specific commit, but to another Head, which itself + specifies a commit. + + A typical example for a symbolic reference is HEAD. + """ + __slots__ = ("repo", "path") + _common_path_default = "" + _id_attribute_ = "name" + + def __init__(self, repo, path): + self.repo = repo + self.path = path + + def __str__(self): + return self.path + + def __repr__(self): + return '<git.%s "%s">' % (self.__class__.__name__, self.path) + + def __eq__(self, other): + return self.path == other.path + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.path) + + @property + def name(self): + """ + Returns + In case of symbolic references, the shortest assumable name + is the path itself. + """ + return self.path + + def _get_path(self): + return join_path_native(self.repo.git_dir, self.path) + + @classmethod + def _get_packed_refs_path(cls, repo): + return os.path.join(repo.git_dir, 'packed-refs') + + @classmethod + def _iter_packed_refs(cls, repo): + """Returns an iterator yielding pairs of sha1/path pairs for the corresponding + refs. + NOTE: The packed refs file will be kept open as long as we iterate""" + try: + fp = open(cls._get_packed_refs_path(repo), 'r') + for line in fp: + line = line.strip() + if not line: + continue + if line.startswith('#'): + if line.startswith('# pack-refs with:') and not line.endswith('peeled'): + raise TypeError("PackingType of packed-Refs not understood: %r" % line) + # END abort if we do not understand the packing scheme + continue + # END parse comment + + # skip dereferenced tag object entries - previous line was actual + # tag reference for it + if line[0] == '^': + continue + + yield tuple(line.split(' ', 1)) + # END for each line + except (OSError,IOError): + raise StopIteration + # END no packed-refs file handling + # NOTE: Had try-finally block around here to close the fp, + # but some python version woudn't allow yields within that. + # I believe files are closing themselves on destruction, so it is + # alright. + + def _get_ref_info(self): + """Return: (sha, target_ref_path) if available, the sha the file at + rela_path points to, or None. target_ref_path is the reference we + point to, or None""" + tokens = None + try: + fp = open(self._get_path(), 'r') + value = fp.read().rstrip() + fp.close() + tokens = value.split(" ") + except (OSError,IOError): + # Probably we are just packed, find our entry in the packed refs file + # NOTE: We are not a symbolic ref if we are in a packed file, as these + # are excluded explictly + for sha, path in self._iter_packed_refs(self.repo): + if path != self.path: continue + tokens = (sha, path) + break + # END for each packed ref + # END handle packed refs + + if tokens is None: + raise ValueError("Reference at %r does not exist" % self.path) + + # is it a reference ? + if tokens[0] == 'ref:': + return (None, tokens[1]) + + # its a commit + if self.repo.re_hexsha_only.match(tokens[0]): + return (tokens[0], None) + + raise ValueError("Failed to parse reference information from %r" % self.path) + + def _get_commit(self): + """ + Returns: + Commit object we point to, works for detached and non-detached + SymbolicReferences + """ + # we partially reimplement it to prevent unnecessary file access + sha, target_ref_path = self._get_ref_info() + + # it is a detached reference + if sha: + return Commit(self.repo, sha) + + return Reference.from_path(self.repo, target_ref_path).commit + + def _set_commit(self, commit): + """ + Set our commit, possibly dereference our symbolic reference first. + """ + if self.is_detached: + return self._set_reference(commit) + + # set the commit on our reference + self._get_reference().commit = commit + + commit = property(_get_commit, _set_commit, doc="Query or set commits directly") + + def _get_reference(self): + """ + Returns + Reference Object we point to + """ + sha, target_ref_path = self._get_ref_info() + if target_ref_path is None: + raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) + return Reference.from_path(self.repo, target_ref_path) + + def _set_reference(self, ref): + """ + Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. + Otherwise we try to get a commit from it using our interface. + + Strings are allowed but will be checked to be sure we have a commit + """ + write_value = None + if isinstance(ref, SymbolicReference): + write_value = "ref: %s" % ref.path + elif isinstance(ref, Commit): + write_value = ref.sha + else: + try: + write_value = ref.commit.sha + except AttributeError: + sha = str(ref) + try: + obj = Object.new(self.repo, sha) + if obj.type != "commit": + raise TypeError("Invalid object type behind sha: %s" % sha) + write_value = obj.sha + except Exception: + raise ValueError("Could not extract object from %s" % ref) + # END end try string + # END try commit attribute + + # if we are writing a ref, use symbolic ref to get the reflog and more + # checking + # Otherwise we detach it and have to do it manually + if write_value.startswith('ref:'): + self.repo.git.symbolic_ref(self.path, write_value[5:]) + return + # END non-detached handling + + path = self._get_path() + directory = os.path.dirname(path) + if not os.path.isdir(directory): + os.makedirs(directory) + + fp = open(path, "wb") + try: + fp.write(write_value) + finally: + fp.close() + # END writing + + reference = property(_get_reference, _set_reference, doc="Returns the Reference we point to") + + # alias + ref = reference + + def is_valid(self): + """ + Returns + True if the reference is valid, hence it can be read and points to + a valid object or reference. + """ + try: + self.commit + except (OSError, ValueError): + return False + else: + return True + + @property + def is_detached(self): + """ + Returns + True if we are a detached reference, hence we point to a specific commit + instead to another reference + """ + try: + self.reference + return False + except TypeError: + return True + + @classmethod + def from_path(cls, repo, path): + """ + Return + Instance of SymbolicReference or HEAD + depending on the given path + + Note + It enforces that symbolic refs in git are only found in the + root of the .git repository, never within a folder. + """ + if not path: + raise ValueError("Cannot create Symbolic Reference from %r" % path) + + if path == 'HEAD': + return HEAD(repo, path) + + if '/' not in path: + return SymbolicReference(repo, path) + + raise ValueError("Could not find symbolic reference type suitable to handle path %r" % path) - @classmethod - def _to_full_path(cls, repo, path): - if isinstance(path, SymbolicReference): - path = path.path - full_ref_path = path - if not cls._common_path_default: - return full_ref_path - if not path.startswith(cls._common_path_default+"/"): - full_ref_path = '%s/%s' % (cls._common_path_default, path) - return full_ref_path - - @classmethod - def delete(cls, repo, path): - """Delete the reference at the given path - - ``repo`` - Repository to delete the reference from - - ``path`` - Short or full path pointing to the reference, i.e. refs/myreference - or just "myreference", hence 'refs/' is implied. - Alternatively the symbolic reference to be deleted - """ - full_ref_path = cls._to_full_path(repo, path) - abs_path = os.path.join(repo.git_dir, full_ref_path) - if os.path.exists(abs_path): - os.remove(abs_path) - else: - # check packed refs - pack_file_path = cls._get_packed_refs_path(repo) - try: - reader = open(pack_file_path) - except (OSError,IOError): - pass # it didnt exist at all - else: - new_lines = list() - made_change = False - dropped_last_line = False - for line in reader: - # keep line if it is a comment or if the ref to delete is not - # in the line - # If we deleted the last line and this one is a tag-reference object, - # we drop it as well - if ( line.startswith('#') or full_ref_path not in line ) and \ - ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): - new_lines.append(line) - dropped_last_line = False - continue - # END skip comments and lines without our path - - # drop this line - made_change = True - dropped_last_line = True - # END for each line in packed refs - reader.close() - - # write the new lines - if made_change: - open(pack_file_path, 'w').writelines(new_lines) - # END open exception handling - # END handle deletion - - @classmethod - def _create(cls, repo, path, resolve, reference, force): - """internal method used to create a new symbolic reference. - If resolve is False,, the reference will be taken as is, creating - a proper symbolic reference. Otherwise it will be resolved to the - corresponding object and a detached symbolic reference will be created - instead""" - full_ref_path = cls._to_full_path(repo, path) - abs_ref_path = os.path.join(repo.git_dir, full_ref_path) - - # figure out target data - target = reference - if resolve: - target = Object.new(repo, reference) - - if not force and os.path.isfile(abs_ref_path): - target_data = str(target) - if isinstance(target, SymbolicReference): - target_data = target.path - if not resolve: - target_data = "ref: " + target_data - if open(abs_ref_path, 'rb').read().strip() != target_data: - raise OSError("Reference at %s does already exist" % full_ref_path) - # END no force handling - - ref = cls(repo, full_ref_path) - ref.reference = target - return ref - - @classmethod - def create(cls, repo, path, reference='HEAD', force=False ): - """ - Create a new symbolic reference, hence a reference pointing to another - reference. - ``repo`` - Repository to create the reference in - - ``path`` - full path at which the new symbolic reference is supposed to be - created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" - - ``reference`` - The reference to which the new symbolic reference should point to - - ``force`` - if True, force creation even if a symbolic reference with that name already exists. - Raise OSError otherwise - - Returns - Newly created symbolic Reference - - Raises OSError - If a (Symbolic)Reference with the same name but different contents - already exists. - Note - This does not alter the current HEAD, index or Working Tree - """ - return cls._create(repo, path, False, reference, force) - - def rename(self, new_path, force=False): - """ - Rename self to a new path - - ``new_path`` - Either a simple name or a full path, i.e. new_name or features/new_name. - The prefix refs/ is implied for references and will be set as needed. - In case this is a symbolic ref, there is no implied prefix - - ``force`` - If True, the rename will succeed even if a head with the target name - already exists. It will be overwritten in that case - - Returns - self - - Raises OSError: - In case a file at path but a different contents already exists - """ - new_path = self._to_full_path(self.repo, new_path) - if self.path == new_path: - return self - - new_abs_path = os.path.join(self.repo.git_dir, new_path) - cur_abs_path = os.path.join(self.repo.git_dir, self.path) - if os.path.isfile(new_abs_path): - if not force: - # if they point to the same file, its not an error - if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): - raise OSError("File at path %r already exists" % new_abs_path) - # else: we could remove ourselves and use the otherone, but - # but clarity we just continue as usual - # END not force handling - os.remove(new_abs_path) - # END handle existing target file - - dirname = os.path.dirname(new_abs_path) - if not os.path.isdir(dirname): - os.makedirs(dirname) - # END create directory - - os.rename(cur_abs_path, new_abs_path) - self.path = new_path - - return self - - @classmethod - def _iter_items(cls, repo, common_path = None): - if common_path is None: - common_path = cls._common_path_default - rela_paths = set() - - # walk loose refs - # Currently we do not follow links - for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)): - if 'refs/' not in root: # skip non-refs subfolders - refs_id = [ i for i,d in enumerate(dirs) if d == 'refs' ] - if refs_id: - dirs[0:] = ['refs'] - # END prune non-refs folders - - for f in files: - abs_path = to_native_path_linux(join_path(root, f)) - rela_paths.add(abs_path.replace(to_native_path_linux(repo.git_dir) + '/', "")) - # END for each file in root directory - # END for each directory to walk - - # read packed refs - for sha, rela_path in cls._iter_packed_refs(repo): - if rela_path.startswith(common_path): - rela_paths.add(rela_path) - # END relative path matches common path - # END packed refs reading - - # return paths in sorted order - for path in sorted(rela_paths): - try: - yield cls.from_path(repo, path) - except ValueError: - continue - # END for each sorted relative refpath - - @classmethod - def iter_items(cls, repo, common_path = None): - """ - Find all refs in the repository + @classmethod + def _to_full_path(cls, repo, path): + if isinstance(path, SymbolicReference): + path = path.path + full_ref_path = path + if not cls._common_path_default: + return full_ref_path + if not path.startswith(cls._common_path_default+"/"): + full_ref_path = '%s/%s' % (cls._common_path_default, path) + return full_ref_path + + @classmethod + def delete(cls, repo, path): + """Delete the reference at the given path + + ``repo`` + Repository to delete the reference from + + ``path`` + Short or full path pointing to the reference, i.e. refs/myreference + or just "myreference", hence 'refs/' is implied. + Alternatively the symbolic reference to be deleted + """ + full_ref_path = cls._to_full_path(repo, path) + abs_path = os.path.join(repo.git_dir, full_ref_path) + if os.path.exists(abs_path): + os.remove(abs_path) + else: + # check packed refs + pack_file_path = cls._get_packed_refs_path(repo) + try: + reader = open(pack_file_path) + except (OSError,IOError): + pass # it didnt exist at all + else: + new_lines = list() + made_change = False + dropped_last_line = False + for line in reader: + # keep line if it is a comment or if the ref to delete is not + # in the line + # If we deleted the last line and this one is a tag-reference object, + # we drop it as well + if ( line.startswith('#') or full_ref_path not in line ) and \ + ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): + new_lines.append(line) + dropped_last_line = False + continue + # END skip comments and lines without our path + + # drop this line + made_change = True + dropped_last_line = True + # END for each line in packed refs + reader.close() + + # write the new lines + if made_change: + open(pack_file_path, 'w').writelines(new_lines) + # END open exception handling + # END handle deletion + + @classmethod + def _create(cls, repo, path, resolve, reference, force): + """internal method used to create a new symbolic reference. + If resolve is False,, the reference will be taken as is, creating + a proper symbolic reference. Otherwise it will be resolved to the + corresponding object and a detached symbolic reference will be created + instead""" + full_ref_path = cls._to_full_path(repo, path) + abs_ref_path = os.path.join(repo.git_dir, full_ref_path) + + # figure out target data + target = reference + if resolve: + target = Object.new(repo, reference) + + if not force and os.path.isfile(abs_ref_path): + target_data = str(target) + if isinstance(target, SymbolicReference): + target_data = target.path + if not resolve: + target_data = "ref: " + target_data + if open(abs_ref_path, 'rb').read().strip() != target_data: + raise OSError("Reference at %s does already exist" % full_ref_path) + # END no force handling + + ref = cls(repo, full_ref_path) + ref.reference = target + return ref + + @classmethod + def create(cls, repo, path, reference='HEAD', force=False ): + """ + Create a new symbolic reference, hence a reference pointing to another + reference. + ``repo`` + Repository to create the reference in + + ``path`` + full path at which the new symbolic reference is supposed to be + created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" + + ``reference`` + The reference to which the new symbolic reference should point to + + ``force`` + if True, force creation even if a symbolic reference with that name already exists. + Raise OSError otherwise + + Returns + Newly created symbolic Reference + + Raises OSError + If a (Symbolic)Reference with the same name but different contents + already exists. + Note + This does not alter the current HEAD, index or Working Tree + """ + return cls._create(repo, path, False, reference, force) + + def rename(self, new_path, force=False): + """ + Rename self to a new path + + ``new_path`` + Either a simple name or a full path, i.e. new_name or features/new_name. + The prefix refs/ is implied for references and will be set as needed. + In case this is a symbolic ref, there is no implied prefix + + ``force`` + If True, the rename will succeed even if a head with the target name + already exists. It will be overwritten in that case + + Returns + self + + Raises OSError: + In case a file at path but a different contents already exists + """ + new_path = self._to_full_path(self.repo, new_path) + if self.path == new_path: + return self + + new_abs_path = os.path.join(self.repo.git_dir, new_path) + cur_abs_path = os.path.join(self.repo.git_dir, self.path) + if os.path.isfile(new_abs_path): + if not force: + # if they point to the same file, its not an error + if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): + raise OSError("File at path %r already exists" % new_abs_path) + # else: we could remove ourselves and use the otherone, but + # but clarity we just continue as usual + # END not force handling + os.remove(new_abs_path) + # END handle existing target file + + dirname = os.path.dirname(new_abs_path) + if not os.path.isdir(dirname): + os.makedirs(dirname) + # END create directory + + os.rename(cur_abs_path, new_abs_path) + self.path = new_path + + return self + + @classmethod + def _iter_items(cls, repo, common_path = None): + if common_path is None: + common_path = cls._common_path_default + rela_paths = set() + + # walk loose refs + # Currently we do not follow links + for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)): + if 'refs/' not in root: # skip non-refs subfolders + refs_id = [ i for i,d in enumerate(dirs) if d == 'refs' ] + if refs_id: + dirs[0:] = ['refs'] + # END prune non-refs folders + + for f in files: + abs_path = to_native_path_linux(join_path(root, f)) + rela_paths.add(abs_path.replace(to_native_path_linux(repo.git_dir) + '/', "")) + # END for each file in root directory + # END for each directory to walk + + # read packed refs + for sha, rela_path in cls._iter_packed_refs(repo): + if rela_path.startswith(common_path): + rela_paths.add(rela_path) + # END relative path matches common path + # END packed refs reading + + # return paths in sorted order + for path in sorted(rela_paths): + try: + yield cls.from_path(repo, path) + except ValueError: + continue + # END for each sorted relative refpath + + @classmethod + def iter_items(cls, repo, common_path = None): + """ + Find all refs in the repository - ``repo`` - is the Repo + ``repo`` + is the Repo - ``common_path`` - Optional keyword argument to the path which is to be shared by all - returned Ref objects. - Defaults to class specific portion if None assuring that only - refs suitable for the actual class are returned. + ``common_path`` + Optional keyword argument to the path which is to be shared by all + returned Ref objects. + Defaults to class specific portion if None assuring that only + refs suitable for the actual class are returned. - Returns - git.SymbolicReference[], each of them is guaranteed to be a symbolic - ref which is not detached. - - List is lexigraphically sorted - The returned objects represent actual subclasses, such as Head or TagReference - """ - return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) - - @classmethod - def from_path(cls, repo, path): - """ - Return - Instance of type Reference, Head, or Tag - depending on the given path - """ - if not path: - raise ValueError("Cannot create Reference from %r" % path) - - for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): - try: - instance = ref_type(repo, path) - if instance.__class__ == SymbolicReference and instance.is_detached: - raise ValueError("SymbolRef was detached, we drop it") - return instance - except ValueError: - pass - # END exception handling - # END for each type to try - raise ValueError("Could not find reference type suitable to handle path %r" % path) - + Returns + git.SymbolicReference[], each of them is guaranteed to be a symbolic + ref which is not detached. + + List is lexigraphically sorted + The returned objects represent actual subclasses, such as Head or TagReference + """ + return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) + + @classmethod + def from_path(cls, repo, path): + """ + Return + Instance of type Reference, Head, or Tag + depending on the given path + """ + if not path: + raise ValueError("Cannot create Reference from %r" % path) + + for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): + try: + instance = ref_type(repo, path) + if instance.__class__ == SymbolicReference and instance.is_detached: + raise ValueError("SymbolRef was detached, we drop it") + return instance + except ValueError: + pass + # END exception handling + # END for each type to try + raise ValueError("Could not find reference type suitable to handle path %r" % path) + class Reference(SymbolicReference, LazyMixin, Iterable): - """ - Represents a named reference to any object. Subclasses may apply restrictions though, - i.e. Heads can only point to commits. - """ - __slots__ = tuple() - _common_path_default = "refs" - - def __init__(self, repo, path): - """ - Initialize this instance - ``repo`` - Our parent repository - - ``path`` - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master - - """ - if not path.startswith(self._common_path_default+'/'): - raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path )) - super(Reference, self).__init__(repo, path) - + """ + Represents a named reference to any object. Subclasses may apply restrictions though, + i.e. Heads can only point to commits. + """ + __slots__ = tuple() + _common_path_default = "refs" + + def __init__(self, repo, path): + """ + Initialize this instance + ``repo`` + Our parent repository + + ``path`` + Path relative to the .git/ directory pointing to the ref in question, i.e. + refs/heads/master + + """ + if not path.startswith(self._common_path_default+'/'): + raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path )) + super(Reference, self).__init__(repo, path) + - def __str__(self): - return self.name + def __str__(self): + return self.name - def _get_object(self): - """ - Returns - The object our ref currently refers to. Refs can be cached, they will - always point to the actual object as it gets re-created on each query - """ - # have to be dynamic here as we may be a tag which can point to anything - # Our path will be resolved to the hexsha which will be used accordingly - return Object.new(self.repo, self.path) - - def _set_object(self, ref): - """ - Set our reference to point to the given ref. It will be converted - to a specific hexsha. - - Note: - TypeChecking is done by the git command - """ - # do it safely by specifying the old value - self.repo.git.update_ref(self.path, ref, self._get_object().sha) - - object = property(_get_object, _set_object, doc="Return the object our ref currently refers to") - - @property - def name(self): - """ - Returns - (shortest) Name of this reference - it may contain path components - """ - # first two path tokens are can be removed as they are - # refs/heads or refs/tags or refs/remotes - tokens = self.path.split('/') - if len(tokens) < 3: - return self.path # could be refs/HEAD - return '/'.join(tokens[2:]) - - - @classmethod - def create(cls, repo, path, commit='HEAD', force=False ): - """ - Create a new reference. - ``repo`` - Repository to create the reference in - - ``path`` - The relative path of the reference, i.e. 'new_branch' or - feature/feature1. The path prefix 'refs/' is implied if not - given explicitly - - ``commit`` - Commit to which the new reference should point, defaults to the - current HEAD - - ``force`` - if True, force creation even if a reference with that name already exists. - Raise OSError otherwise - - Returns - Newly created Reference - - Note - This does not alter the current HEAD, index or Working Tree - """ - return cls._create(repo, path, True, commit, force) - - @classmethod - def iter_items(cls, repo, common_path = None): - """ - Equivalent to SymbolicReference.iter_items, but will return non-detached - references as well. - """ - return cls._iter_items(repo, common_path) - - + def _get_object(self): + """ + Returns + The object our ref currently refers to. Refs can be cached, they will + always point to the actual object as it gets re-created on each query + """ + # have to be dynamic here as we may be a tag which can point to anything + # Our path will be resolved to the hexsha which will be used accordingly + return Object.new(self.repo, self.path) + + def _set_object(self, ref): + """ + Set our reference to point to the given ref. It will be converted + to a specific hexsha. + + Note: + TypeChecking is done by the git command + """ + # do it safely by specifying the old value + self.repo.git.update_ref(self.path, ref, self._get_object().sha) + + object = property(_get_object, _set_object, doc="Return the object our ref currently refers to") + + @property + def name(self): + """ + Returns + (shortest) Name of this reference - it may contain path components + """ + # first two path tokens are can be removed as they are + # refs/heads or refs/tags or refs/remotes + tokens = self.path.split('/') + if len(tokens) < 3: + return self.path # could be refs/HEAD + return '/'.join(tokens[2:]) + + + @classmethod + def create(cls, repo, path, commit='HEAD', force=False ): + """ + Create a new reference. + ``repo`` + Repository to create the reference in + + ``path`` + The relative path of the reference, i.e. 'new_branch' or + feature/feature1. The path prefix 'refs/' is implied if not + given explicitly + + ``commit`` + Commit to which the new reference should point, defaults to the + current HEAD + + ``force`` + if True, force creation even if a reference with that name already exists. + Raise OSError otherwise + + Returns + Newly created Reference + + Note + This does not alter the current HEAD, index or Working Tree + """ + return cls._create(repo, path, True, commit, force) + + @classmethod + def iter_items(cls, repo, common_path = None): + """ + Equivalent to SymbolicReference.iter_items, but will return non-detached + references as well. + """ + return cls._iter_items(repo, common_path) + + class HEAD(SymbolicReference): - """ - Special case of a Symbolic Reference as it represents the repository's - HEAD reference. - """ - _HEAD_NAME = 'HEAD' - __slots__ = tuple() - - def __init__(self, repo, path=_HEAD_NAME): - if path != self._HEAD_NAME: - raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) - super(HEAD, self).__init__(repo, path) - - - def reset(self, commit='HEAD', index=True, working_tree = False, - paths=None, **kwargs): - """ - Reset our HEAD to the given commit optionally synchronizing - the index and working tree. The reference we refer to will be set to - commit as well. - - ``commit`` - Commit object, Reference Object or string identifying a revision we - should reset HEAD to. - - ``index`` - If True, the index will be set to match the given commit. Otherwise - it will not be touched. - - ``working_tree`` - If True, the working tree will be forcefully adjusted to match the given - commit, possibly overwriting uncommitted changes without warning. - If working_tree is True, index must be true as well - - ``paths`` - Single path or list of paths relative to the git root directory - that are to be reset. This allow to partially reset individual files. - - ``kwargs`` - Additional arguments passed to git-reset. - - Returns - self - """ - mode = "--soft" - if index: - mode = "--mixed" - - if working_tree: - mode = "--hard" - if not index: - raise ValueError( "Cannot reset the working tree if the index is not reset as well") - # END working tree handling - - self.repo.git.reset(mode, commit, paths, **kwargs) - - return self - + """ + Special case of a Symbolic Reference as it represents the repository's + HEAD reference. + """ + _HEAD_NAME = 'HEAD' + __slots__ = tuple() + + def __init__(self, repo, path=_HEAD_NAME): + if path != self._HEAD_NAME: + raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) + super(HEAD, self).__init__(repo, path) + + + def reset(self, commit='HEAD', index=True, working_tree = False, + paths=None, **kwargs): + """ + Reset our HEAD to the given commit optionally synchronizing + the index and working tree. The reference we refer to will be set to + commit as well. + + ``commit`` + Commit object, Reference Object or string identifying a revision we + should reset HEAD to. + + ``index`` + If True, the index will be set to match the given commit. Otherwise + it will not be touched. + + ``working_tree`` + If True, the working tree will be forcefully adjusted to match the given + commit, possibly overwriting uncommitted changes without warning. + If working_tree is True, index must be true as well + + ``paths`` + Single path or list of paths relative to the git root directory + that are to be reset. This allow to partially reset individual files. + + ``kwargs`` + Additional arguments passed to git-reset. + + Returns + self + """ + mode = "--soft" + if index: + mode = "--mixed" + + if working_tree: + mode = "--hard" + if not index: + raise ValueError( "Cannot reset the working tree if the index is not reset as well") + # END working tree handling + + self.repo.git.reset(mode, commit, paths, **kwargs) + + return self + class Head(Reference): - """ - A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. + """ + A Head is a named reference to a Commit. Every Head instance contains a name + and a Commit object. - Examples:: + Examples:: - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] + >>> repo = Repo("/path/to/repo") + >>> head = repo.heads[0] - >>> head.name - 'master' + >>> head.name + 'master' - >>> head.commit - <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> + >>> head.commit + <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> - >>> head.commit.sha - '1c09f116cbc2cb4100fb6935bb162daa4723f455' - """ - _common_path_default = "refs/heads" - - @classmethod - def create(cls, repo, path, commit='HEAD', force=False, **kwargs ): - """ - Create a new head. - ``repo`` - Repository to create the head in - - ``path`` - The name or path of the head, i.e. 'new_branch' or - feature/feature1. The prefix refs/heads is implied. - - ``commit`` - Commit to which the new head should point, defaults to the - current HEAD - - ``force`` - if True, force creation even if branch with that name already exists. - - ``**kwargs`` - Additional keyword arguments to be passed to git-branch, i.e. - track, no-track, l - - Returns - Newly created Head - - Note - This does not alter the current HEAD, index or Working Tree - """ - if cls is not Head: - raise TypeError("Only Heads can be created explicitly, not objects of type %s" % cls.__name__) - - args = ( path, commit ) - if force: - kwargs['f'] = True - - repo.git.branch(*args, **kwargs) - return cls(repo, "%s/%s" % ( cls._common_path_default, path)) - - - @classmethod - def delete(cls, repo, *heads, **kwargs): - """ - Delete the given heads - - ``force`` - If True, the heads will be deleted even if they are not yet merged into - the main development stream. - Default False - """ - force = kwargs.get("force", False) - flag = "-d" - if force: - flag = "-D" - repo.git.branch(flag, *heads) - - - def rename(self, new_path, force=False): - """ - Rename self to a new path - - ``new_path`` - Either a simple name or a path, i.e. new_name or features/new_name. - The prefix refs/heads is implied - - ``force`` - If True, the rename will succeed even if a head with the target name - already exists. - - Returns - self - - Note - respects the ref log as git commands are used - """ - flag = "-m" - if force: - flag = "-M" - - self.repo.git.branch(flag, self, new_path) - self.path = "%s/%s" % (self._common_path_default, new_path) - return self - - def checkout(self, force=False, **kwargs): - """ - Checkout this head by setting the HEAD to this reference, by updating the index - to reflect the tree we point to and by updating the working tree to reflect - the latest index. - - The command will fail if changed working tree files would be overwritten. - - ``force`` - If True, changes to the index and the working tree will be discarded. - If False, GitCommandError will be raised in that situation. - - ``**kwargs`` - Additional keyword arguments to be passed to git checkout, i.e. - b='new_branch' to create a new branch at the given spot. - - Returns - The active branch after the checkout operation, usually self unless - a new branch has been created. - - Note - By default it is only allowed to checkout heads - everything else - will leave the HEAD detached which is allowed and possible, but remains - a special state that some tools might not be able to handle. - """ - args = list() - kwargs['f'] = force - if kwargs['f'] == False: - kwargs.pop('f') - - self.repo.git.checkout(self, **kwargs) - return self.repo.active_branch - + >>> head.commit.sha + '1c09f116cbc2cb4100fb6935bb162daa4723f455' + """ + _common_path_default = "refs/heads" + + @classmethod + def create(cls, repo, path, commit='HEAD', force=False, **kwargs ): + """ + Create a new head. + ``repo`` + Repository to create the head in + + ``path`` + The name or path of the head, i.e. 'new_branch' or + feature/feature1. The prefix refs/heads is implied. + + ``commit`` + Commit to which the new head should point, defaults to the + current HEAD + + ``force`` + if True, force creation even if branch with that name already exists. + + ``**kwargs`` + Additional keyword arguments to be passed to git-branch, i.e. + track, no-track, l + + Returns + Newly created Head + + Note + This does not alter the current HEAD, index or Working Tree + """ + if cls is not Head: + raise TypeError("Only Heads can be created explicitly, not objects of type %s" % cls.__name__) + + args = ( path, commit ) + if force: + kwargs['f'] = True + + repo.git.branch(*args, **kwargs) + return cls(repo, "%s/%s" % ( cls._common_path_default, path)) + + + @classmethod + def delete(cls, repo, *heads, **kwargs): + """ + Delete the given heads + + ``force`` + If True, the heads will be deleted even if they are not yet merged into + the main development stream. + Default False + """ + force = kwargs.get("force", False) + flag = "-d" + if force: + flag = "-D" + repo.git.branch(flag, *heads) + + + def rename(self, new_path, force=False): + """ + Rename self to a new path + + ``new_path`` + Either a simple name or a path, i.e. new_name or features/new_name. + The prefix refs/heads is implied + + ``force`` + If True, the rename will succeed even if a head with the target name + already exists. + + Returns + self + + Note + respects the ref log as git commands are used + """ + flag = "-m" + if force: + flag = "-M" + + self.repo.git.branch(flag, self, new_path) + self.path = "%s/%s" % (self._common_path_default, new_path) + return self + + def checkout(self, force=False, **kwargs): + """ + Checkout this head by setting the HEAD to this reference, by updating the index + to reflect the tree we point to and by updating the working tree to reflect + the latest index. + + The command will fail if changed working tree files would be overwritten. + + ``force`` + If True, changes to the index and the working tree will be discarded. + If False, GitCommandError will be raised in that situation. + + ``**kwargs`` + Additional keyword arguments to be passed to git checkout, i.e. + b='new_branch' to create a new branch at the given spot. + + Returns + The active branch after the checkout operation, usually self unless + a new branch has been created. + + Note + By default it is only allowed to checkout heads - everything else + will leave the HEAD detached which is allowed and possible, but remains + a special state that some tools might not be able to handle. + """ + args = list() + kwargs['f'] = force + if kwargs['f'] == False: + kwargs.pop('f') + + self.repo.git.checkout(self, **kwargs) + return self.repo.active_branch + class TagReference(Reference): - """ - Class representing a lightweight tag reference which either points to a commit - ,a tag object or any other object. In the latter case additional information, - like the signature or the tag-creator, is available. - - This tag object will always point to a commit object, but may carray additional - information in a tag object:: - - tagref = TagReference.list_items(repo)[0] - print tagref.commit.message - if tagref.tag is not None: - print tagref.tag.message - """ - - __slots__ = tuple() - _common_path_default = "refs/tags" - - @property - def commit(self): - """ - Returns - Commit object the tag ref points to - """ - if self.object.type == "commit": - return self.object - elif self.object.type == "tag": - # it is a tag object which carries the commit as an object - we can point to anything - return self.object.object - else: - raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) + """ + Class representing a lightweight tag reference which either points to a commit + ,a tag object or any other object. In the latter case additional information, + like the signature or the tag-creator, is available. + + This tag object will always point to a commit object, but may carray additional + information in a tag object:: + + tagref = TagReference.list_items(repo)[0] + print tagref.commit.message + if tagref.tag is not None: + print tagref.tag.message + """ + + __slots__ = tuple() + _common_path_default = "refs/tags" + + @property + def commit(self): + """ + Returns + Commit object the tag ref points to + """ + if self.object.type == "commit": + return self.object + elif self.object.type == "tag": + # it is a tag object which carries the commit as an object - we can point to anything + return self.object.object + else: + raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) - @property - def tag(self): - """ - Returns - Tag object this tag ref points to or None in case - we are a light weight tag - """ - if self.object.type == "tag": - return self.object - return None - - @classmethod - def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): - """ - Create a new tag reference. - - ``path`` - The name of the tag, i.e. 1.0 or releases/1.0. - The prefix refs/tags is implied - - ``ref`` - A reference to the object you want to tag. It can be a commit, tree or - blob. - - ``message`` - If not None, the message will be used in your tag object. This will also - create an additional tag object that allows to obtain that information, i.e.:: - tagref.tag.message - - ``force`` - If True, to force creation of a tag even though that tag already exists. - - ``**kwargs`` - Additional keyword arguments to be passed to git-tag - - Returns - A new TagReference - """ - args = ( path, ref ) - if message: - kwargs['m'] = message - if force: - kwargs['f'] = True - - repo.git.tag(*args, **kwargs) - return TagReference(repo, "%s/%s" % (cls._common_path_default, path)) - - @classmethod - def delete(cls, repo, *tags): - """ - Delete the given existing tag or tags - """ - repo.git.tag("-d", *tags) - - - + @property + def tag(self): + """ + Returns + Tag object this tag ref points to or None in case + we are a light weight tag + """ + if self.object.type == "tag": + return self.object + return None + + @classmethod + def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): + """ + Create a new tag reference. + + ``path`` + The name of the tag, i.e. 1.0 or releases/1.0. + The prefix refs/tags is implied + + ``ref`` + A reference to the object you want to tag. It can be a commit, tree or + blob. + + ``message`` + If not None, the message will be used in your tag object. This will also + create an additional tag object that allows to obtain that information, i.e.:: + tagref.tag.message + + ``force`` + If True, to force creation of a tag even though that tag already exists. + + ``**kwargs`` + Additional keyword arguments to be passed to git-tag + + Returns + A new TagReference + """ + args = ( path, ref ) + if message: + kwargs['m'] = message + if force: + kwargs['f'] = True + + repo.git.tag(*args, **kwargs) + return TagReference(repo, "%s/%s" % (cls._common_path_default, path)) + + @classmethod + def delete(cls, repo, *tags): + """ + Delete the given existing tag or tags + """ + repo.git.tag("-d", *tags) + + + - + # provide an alias Tag = TagReference class RemoteReference(Head): - """ - Represents a reference pointing to a remote head. - """ - _common_path_default = "refs/remotes" - - @property - def remote_name(self): - """ - Returns - Name of the remote we are a reference of, such as 'origin' for a reference - named 'origin/master' - """ - tokens = self.path.split('/') - # /refs/remotes/<remote name>/<branch_name> - return tokens[2] - - @property - def remote_head(self): - """ - Returns - Name of the remote head itself, i.e. master. - - NOTE: The returned name is usually not qualified enough to uniquely identify - a branch - """ - tokens = self.path.split('/') - return '/'.join(tokens[3:]) - - @classmethod - def delete(cls, repo, *refs, **kwargs): - """ - Delete the given remote references. - - Note - kwargs are given for compatability with the base class method as we - should not narrow the signature. - """ - repo.git.branch("-d", "-r", *refs) - # the official deletion method will ignore remote symbolic refs - these - # are generally ignored in the refs/ folder. We don't though - # and delete remainders manually - for ref in refs: - try: - os.remove(os.path.join(repo.git_dir, ref.path)) - except OSError: - pass - # END for each ref + """ + Represents a reference pointing to a remote head. + """ + _common_path_default = "refs/remotes" + + @property + def remote_name(self): + """ + Returns + Name of the remote we are a reference of, such as 'origin' for a reference + named 'origin/master' + """ + tokens = self.path.split('/') + # /refs/remotes/<remote name>/<branch_name> + return tokens[2] + + @property + def remote_head(self): + """ + Returns + Name of the remote head itself, i.e. master. + + NOTE: The returned name is usually not qualified enough to uniquely identify + a branch + """ + tokens = self.path.split('/') + return '/'.join(tokens[3:]) + + @classmethod + def delete(cls, repo, *refs, **kwargs): + """ + Delete the given remote references. + + Note + kwargs are given for compatability with the base class method as we + should not narrow the signature. + """ + repo.git.branch("-d", "-r", *refs) + # the official deletion method will ignore remote symbolic refs - these + # are generally ignored in the refs/ folder. We don't though + # and delete remainders manually + for ref in refs: + try: + os.remove(os.path.join(repo.git_dir, ref.path)) + except OSError: + pass + # END for each ref diff --git a/lib/git/remote.py b/lib/git/remote.py index d9d61206..65e07bd3 100644 --- a/lib/git/remote.py +++ b/lib/git/remote.py @@ -14,770 +14,770 @@ import re import os class _SectionConstraint(object): - """ - Constrains a ConfigParser to only option commands which are constrained to - always use the section we have been initialized with. - - It supports all ConfigParser methods that operate on an option - """ - __slots__ = ("_config", "_section_name") - _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option") - - def __init__(self, config, section): - self._config = config - self._section_name = section - - def __getattr__(self, attr): - if attr in self._valid_attrs_: - return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) - return super(_SectionConstraint,self).__getattribute__(attr) - - def _call_config(self, method, *args, **kwargs): - """Call the configuration at the given method which must take a section name - as first argument""" - return getattr(self._config, method)(self._section_name, *args, **kwargs) - - + """ + Constrains a ConfigParser to only option commands which are constrained to + always use the section we have been initialized with. + + It supports all ConfigParser methods that operate on an option + """ + __slots__ = ("_config", "_section_name") + _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option") + + def __init__(self, config, section): + self._config = config + self._section_name = section + + def __getattr__(self, attr): + if attr in self._valid_attrs_: + return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) + return super(_SectionConstraint,self).__getattribute__(attr) + + def _call_config(self, method, *args, **kwargs): + """Call the configuration at the given method which must take a section name + as first argument""" + return getattr(self._config, method)(self._section_name, *args, **kwargs) + + class RemoteProgress(object): - """ - Handler providing an interface to parse progress information emitted by git-push - and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. - """ - BEGIN, END, COUNTING, COMPRESSING, WRITING = [ 1 << x for x in range(5) ] - STAGE_MASK = BEGIN|END - OP_MASK = COUNTING|COMPRESSING|WRITING - - __slots__ = ("_cur_line", "_seen_ops") - re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") - re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") - - def __init__(self): - self._seen_ops = list() - - def _parse_progress_line(self, line): - """ - Parse progress information from the given line as retrieved by git-push - or git-fetch - @return: list(line, ...) list of lines that could not be processed""" - # handle - # Counting objects: 4, done. - # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. - self._cur_line = line - sub_lines = line.split('\r') - failed_lines = list() - for sline in sub_lines: - # find esacpe characters and cut them away - regex will not work with - # them as they are non-ascii. As git might expect a tty, it will send them - last_valid_index = None - for i,c in enumerate(reversed(sline)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i-1 - # END character was non-ascii - # END for each character in sline - if last_valid_index is not None: - sline = sline[:last_valid_index] - # END cut away invalid part - sline = sline.rstrip() - - cur_count, max_count = None, None - match = self.re_op_relative.match(sline) - if match is None: - match = self.re_op_absolute.match(sline) - - if not match: - self.line_dropped(sline) - failed_lines.append(sline) - continue - # END could not get match - - op_code = 0 - remote, op_name, percent, cur_count, max_count, message = match.groups() - - # get operation id - if op_name == "Counting objects": - op_code |= self.COUNTING - elif op_name == "Compressing objects": - op_code |= self.COMPRESSING - elif op_name == "Writing objects": - op_code |= self.WRITING - else: - raise ValueError("Operation name %r unknown" % op_name) - - # figure out stage - if op_code not in self._seen_ops: - self._seen_ops.append(op_code) - op_code |= self.BEGIN - # END begin opcode - - if message is None: - message = '' - # END message handling - - message = message.strip() - done_token = ', done.' - if message.endswith(done_token): - op_code |= self.END - message = message[:-len(done_token)] - # END end message handling - - self.update(op_code, cur_count, max_count, message) - # END for each sub line - return failed_lines - - def line_dropped(self, line): - """ - Called whenever a line could not be understood and was therefore dropped. - """ - pass - - def update(self, op_code, cur_count, max_count=None, message=''): - """ - Called whenever the progress changes - - ``op_code`` - Integer allowing to be compared against Operation IDs and stage IDs. - - Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation - ID as well as END. It may be that BEGIN and END are set at once in case only - one progress message was emitted due to the speed of the operation. - Between BEGIN and END, none of these flags will be set - - Operation IDs are all held within the OP_MASK. Only one Operation ID will - be active per call. - - ``cur_count`` - Current absolute count of items - - ``max_count`` - The maximum count of items we expect. It may be None in case there is - no maximum number of items or if it is (yet) unknown. - - ``message`` - In case of the 'WRITING' operation, it contains the amount of bytes - transferred. It may possibly be used for other purposes as well. - - You may read the contents of the current line in self._cur_line - """ - pass - - + """ + Handler providing an interface to parse progress information emitted by git-push + and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. + """ + BEGIN, END, COUNTING, COMPRESSING, WRITING = [ 1 << x for x in range(5) ] + STAGE_MASK = BEGIN|END + OP_MASK = COUNTING|COMPRESSING|WRITING + + __slots__ = ("_cur_line", "_seen_ops") + re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") + re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") + + def __init__(self): + self._seen_ops = list() + + def _parse_progress_line(self, line): + """ + Parse progress information from the given line as retrieved by git-push + or git-fetch + @return: list(line, ...) list of lines that could not be processed""" + # handle + # Counting objects: 4, done. + # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. + self._cur_line = line + sub_lines = line.split('\r') + failed_lines = list() + for sline in sub_lines: + # find esacpe characters and cut them away - regex will not work with + # them as they are non-ascii. As git might expect a tty, it will send them + last_valid_index = None + for i,c in enumerate(reversed(sline)): + if ord(c) < 32: + # its a slice index + last_valid_index = -i-1 + # END character was non-ascii + # END for each character in sline + if last_valid_index is not None: + sline = sline[:last_valid_index] + # END cut away invalid part + sline = sline.rstrip() + + cur_count, max_count = None, None + match = self.re_op_relative.match(sline) + if match is None: + match = self.re_op_absolute.match(sline) + + if not match: + self.line_dropped(sline) + failed_lines.append(sline) + continue + # END could not get match + + op_code = 0 + remote, op_name, percent, cur_count, max_count, message = match.groups() + + # get operation id + if op_name == "Counting objects": + op_code |= self.COUNTING + elif op_name == "Compressing objects": + op_code |= self.COMPRESSING + elif op_name == "Writing objects": + op_code |= self.WRITING + else: + raise ValueError("Operation name %r unknown" % op_name) + + # figure out stage + if op_code not in self._seen_ops: + self._seen_ops.append(op_code) + op_code |= self.BEGIN + # END begin opcode + + if message is None: + message = '' + # END message handling + + message = message.strip() + done_token = ', done.' + if message.endswith(done_token): + op_code |= self.END + message = message[:-len(done_token)] + # END end message handling + + self.update(op_code, cur_count, max_count, message) + # END for each sub line + return failed_lines + + def line_dropped(self, line): + """ + Called whenever a line could not be understood and was therefore dropped. + """ + pass + + def update(self, op_code, cur_count, max_count=None, message=''): + """ + Called whenever the progress changes + + ``op_code`` + Integer allowing to be compared against Operation IDs and stage IDs. + + Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation + ID as well as END. It may be that BEGIN and END are set at once in case only + one progress message was emitted due to the speed of the operation. + Between BEGIN and END, none of these flags will be set + + Operation IDs are all held within the OP_MASK. Only one Operation ID will + be active per call. + + ``cur_count`` + Current absolute count of items + + ``max_count`` + The maximum count of items we expect. It may be None in case there is + no maximum number of items or if it is (yet) unknown. + + ``message`` + In case of the 'WRITING' operation, it contains the amount of bytes + transferred. It may possibly be used for other purposes as well. + + You may read the contents of the current line in self._cur_line + """ + pass + + class PushInfo(object): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit # commit at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') - - NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ - FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] + """ + Carries information about the result of a push operation of a single head:: + + info = remote.push()[0] + info.flags # bitflags providing more information about the result + info.local_ref # Reference pointing to the local reference that was pushed + # It is None if the ref was deleted. + info.remote_ref_string # path to the remote reference located on the remote side + info.remote_ref # Remote Reference on the local side corresponding to + # the remote_ref_string. It can be a TagReference as well. + info.old_commit # commit at which the remote_ref was standing before we pushed + # it to local_ref.commit. Will be None if an error was indicated + info.summary # summary line providing human readable english text about the push + + """ + __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') + + NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ + FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] - _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, - '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, - '=' : UP_TO_DATE, '!' : ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, - summary=''): - """ - Initialize a new instance - """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit = old_commit - self.summary = summary - - @property - def remote_ref(self): - """ - Returns - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance. - """ - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """ - Create a new PushInfo instance as parsed from line which is expected to be like - c refs/heads/master:refs/heads/master 05d2687..1d0568e - """ - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - old_commit = Commit(remote.repo, old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) - + _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, + '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, + '=' : UP_TO_DATE, '!' : ERROR } + + def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, + summary=''): + """ + Initialize a new instance + """ + self.flags = flags + self.local_ref = local_ref + self.remote_ref_string = remote_ref_string + self._remote = remote + self.old_commit = old_commit + self.summary = summary + + @property + def remote_ref(self): + """ + Returns + Remote Reference or TagReference in the local repository corresponding + to the remote_ref_string kept in this instance. + """ + # translate heads to a local remote, tags stay as they are + if self.remote_ref_string.startswith("refs/tags"): + return TagReference(self._remote.repo, self.remote_ref_string) + elif self.remote_ref_string.startswith("refs/heads"): + remote_ref = Reference(self._remote.repo, self.remote_ref_string) + return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) + else: + raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) + # END + + @classmethod + def _from_line(cls, remote, line): + """ + Create a new PushInfo instance as parsed from line which is expected to be like + c refs/heads/master:refs/heads/master 05d2687..1d0568e + """ + control_character, from_to, summary = line.split('\t', 3) + flags = 0 + + # control character handling + try: + flags |= cls._flag_map[ control_character ] + except KeyError: + raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) + # END handle control character + + # from_to handling + from_ref_string, to_ref_string = from_to.split(':') + if flags & cls.DELETED: + from_ref = None + else: + from_ref = Reference.from_path(remote.repo, from_ref_string) + + # commit handling, could be message or commit info + old_commit = None + if summary.startswith('['): + if "[rejected]" in summary: + flags |= cls.REJECTED + elif "[remote rejected]" in summary: + flags |= cls.REMOTE_REJECTED + elif "[remote failure]" in summary: + flags |= cls.REMOTE_FAILURE + elif "[no match]" in summary: + flags |= cls.ERROR + elif "[new tag]" in summary: + flags |= cls.NEW_TAG + elif "[new branch]" in summary: + flags |= cls.NEW_HEAD + # uptodate encoded in control character + else: + # fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit + split_token = "..." + if control_character == " ": + split_token = ".." + old_sha, new_sha = summary.split(' ')[0].split(split_token) + old_commit = Commit(remote.repo, old_sha) + # END message handling + + return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) + class FetchInfo(object): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is SymbolicReference - info.note # additional notes given by git-fetch intended for the user - info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit', 'flags', 'note') - - NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ - FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] - - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\.-]+)( \(.*\)?$)?") - - _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, - '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } - - def __init__(self, ref, flags, note = '', old_commit = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit = old_commit - - def __str__(self): - return self.name - - @property - def name(self): - """ - Returns - Name of our remote ref - """ - return self.ref.name - - @property - def commit(self): - """ - Returns - Commit of our remote ref - """ - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """ - Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo - """ - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name == "branch": - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - remote_local_ref = Reference.from_path(repo, os.path.join(ref_type._common_path_default, remote_local_ref.strip())) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit = Commit(repo, operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit) - + """ + Carries information about the results of a fetch operation of a single head:: + + info = remote.fetch()[0] + info.ref # Symbolic Reference or RemoteReference to the changed + # remote head or FETCH_HEAD + info.flags # additional flags to be & with enumeration members, + # i.e. info.flags & info.REJECTED + # is 0 if ref is SymbolicReference + info.note # additional notes given by git-fetch intended for the user + info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, + # field is set to the previous location of ref, otherwise None + """ + __slots__ = ('ref','old_commit', 'flags', 'note') + + NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ + FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] + + # %c %-*s %-*s -> %s (%s) + re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\.-]+)( \(.*\)?$)?") + + _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, + '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } + + def __init__(self, ref, flags, note = '', old_commit = None): + """ + Initialize a new instance + """ + self.ref = ref + self.flags = flags + self.note = note + self.old_commit = old_commit + + def __str__(self): + return self.name + + @property + def name(self): + """ + Returns + Name of our remote ref + """ + return self.ref.name + + @property + def commit(self): + """ + Returns + Commit of our remote ref + """ + return self.ref.commit + + @classmethod + def _from_line(cls, repo, line, fetch_line): + """ + Parse information from the given line as returned by git-fetch -v + and return a new FetchInfo object representing this information. + + We can handle a line as follows + "%c %-*s %-*s -> %s%s" + + Where c is either ' ', !, +, -, *, or = + ! means error + + means success forcing update + - means a tag was updated + * means birth of new branch or tag + = means the head was up to date ( and not moved ) + ' ' means a fast-forward + + fetch line is the corresponding line from FETCH_HEAD, like + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo + """ + match = cls.re_fetch_result.match(line) + if match is None: + raise ValueError("Failed to parse line: %r" % line) + + # parse lines + control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() + try: + new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") + ref_type_name, fetch_note = fetch_note.split(' ', 1) + except ValueError: # unpack error + raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) + + # handle FETCH_HEAD and figure out ref type + # If we do not specify a target branch like master:refs/remotes/origin/master, + # the fetch result is stored in FETCH_HEAD which destroys the rule we usually + # have. In that case we use a symbolic reference which is detached + ref_type = None + if remote_local_ref == "FETCH_HEAD": + ref_type = SymbolicReference + elif ref_type_name == "branch": + ref_type = RemoteReference + elif ref_type_name == "tag": + ref_type = TagReference + else: + raise TypeError("Cannot handle reference type: %r" % ref_type_name) + + # create ref instance + if ref_type is SymbolicReference: + remote_local_ref = ref_type(repo, "FETCH_HEAD") + else: + remote_local_ref = Reference.from_path(repo, os.path.join(ref_type._common_path_default, remote_local_ref.strip())) + # END create ref instance + + note = ( note and note.strip() ) or '' + + # parse flags from control_character + flags = 0 + try: + flags |= cls._flag_map[control_character] + except KeyError: + raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) + # END control char exception hanlding + + # parse operation string for more info - makes no sense for symbolic refs + old_commit = None + if isinstance(remote_local_ref, Reference): + if 'rejected' in operation: + flags |= cls.REJECTED + if 'new tag' in operation: + flags |= cls.NEW_TAG + if 'new branch' in operation: + flags |= cls.NEW_HEAD + if '...' in operation or '..' in operation: + split_token = '...' + if control_character == ' ': + split_token = split_token[:-1] + old_commit = Commit(repo, operation.split(split_token)[0]) + # END handle refspec + # END reference flag handling + + return cls(remote_local_ref, flags, note, old_commit) + class Remote(LazyMixin, Iterable): - """ - Provides easy read and write access to a git remote. - - Everything not part of this interface is considered an option for the current - remote, allowing constructs like remote.pushurl to query the pushurl. - - NOTE: When querying configuration, the configuration accessor will be cached - to speed up subsequent accesses. - """ - - __slots__ = ( "repo", "name", "_config_reader" ) - _id_attribute_ = "name" - - def __init__(self, repo, name): - """ - Initialize a remote instance - - ``repo`` - The repository we are a remote of - - ``name`` - the name of the remote, i.e. 'origin' - """ - self.repo = repo - self.name = name - - def __getattr__(self, attr): - """ - Allows to call this instance like - remote.special( *args, **kwargs) to call git-remote special self.name - """ - if attr == "_config_reader": - return super(Remote, self).__getattr__(attr) - - return self._config_reader.get(attr) - - def _config_section_name(self): - return 'remote "%s"' % self.name - - def _set_cache_(self, attr): - if attr == "_config_reader": - self._config_reader = _SectionConstraint(self.repo.config_reader(), self._config_section_name()) - else: - super(Remote, self)._set_cache_(attr) - - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.name) - - def __eq__(self, other): - return self.name == other.name - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.name) - - @classmethod - def iter_items(cls, repo): - """ - Returns - Iterator yielding Remote objects of the given repository - """ - for section in repo.config_reader("repository").sections(): - if not section.startswith('remote'): - continue - lbound = section.find('"') - rbound = section.rfind('"') - if lbound == -1 or rbound == -1: - raise ValueError("Remote-Section has invalid format: %r" % section) - yield Remote(repo, section[lbound+1:rbound]) - # END for each configuration section - - @property - def refs(self): - """ - Returns - IterableList of RemoteReference objects. It is prefixed, allowing - you to omit the remote path portion, i.e.:: - remote.refs.master # yields RemoteReference('/refs/remotes/origin/master') - """ - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - for ref in RemoteReference.list_items(self.repo): - if ref.remote_name == self.name: - out_refs.append(ref) - # END if names match - # END for each ref - assert out_refs, "Remote %s did not have any references" % self.name - return out_refs - - @property - def stale_refs(self): - """ - Returns - IterableList RemoteReference objects that do not have a corresponding - head in the remote reference anymore as they have been deleted on the - remote side, but are still available locally. - - The IterableList is prefixed, hence the 'origin' must be omitted. See - 'refs' property for an example. - """ - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - for line in self.repo.git.remote("prune", "--dry-run", self).splitlines()[2:]: - # expecting - # * [would prune] origin/new_branch - token = " * [would prune] " - if not line.startswith(token): - raise ValueError("Could not parse git-remote prune result: %r" % line) - fqhn = "%s/%s" % (RemoteReference._common_path_default,line.replace(token, "")) - out_refs.append(RemoteReference(self.repo, fqhn)) - # END for each line - return out_refs - - @classmethod - def create(cls, repo, name, url, **kwargs): - """ - Create a new remote to the given repository - ``repo`` - Repository instance that is to receive the new remote - - ``name`` - Desired name of the remote - - ``url`` - URL which corresponds to the remote's name - - ``**kwargs`` - Additional arguments to be passed to the git-remote add command - - Returns - New Remote instance - - Raise - GitCommandError in case an origin with that name already exists - """ - repo.git.remote( "add", name, url, **kwargs ) - return cls(repo, name) - - # add is an alias - add = create - - @classmethod - def remove(cls, repo, name ): - """ - Remove the remote with the given name - """ - repo.git.remote("rm", name) - - # alias - rm = remove - - def rename(self, new_name): - """ - Rename self to the given new_name - - Returns - self - """ - if self.name == new_name: - return self - - self.repo.git.remote("rename", self.name, new_name) - self.name = new_name - del(self._config_reader) # it contains cached values, section names are different now - return self - - def update(self, **kwargs): - """ - Fetch all changes for this remote, including new branches which will - be forced in ( in case your local remote branch is not part the new remote branches - ancestry anymore ). - - ``kwargs`` - Additional arguments passed to git-remote update - - Returns - self - """ - self.repo.git.remote("update", self.name) - return self - - def _digest_process_messages(self, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - @return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - - def _finalize_proc(self, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(os.path.join(self.repo.git_dir, 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - - def fetch(self, refspec=None, progress=None, **kwargs): - """ - Fetch the latest changes for this remote - - ``refspec`` - A "refspec" is used by fetch and push to describe the mapping - between remote ref and local ref. They are combined with a colon in - the format <src>:<dst>, preceded by an optional plus sign, +. - For example: git fetch $URL refs/heads/master:refs/heads/origin means - "grab the master branch head from the $URL and store it as my origin - branch head". And git push $URL refs/heads/master:refs/heads/to-upstream - means "publish my master branch head as to-upstream branch at $URL". - See also git-push(1). - - Taken from the git manual - ``progress`` - See 'push' method - - ``**kwargs`` - Additional arguments to be passed to git-fetch - - Returns - IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed - information about the fetch results - - Note - As fetch does not provide progress information to non-ttys, we cannot make - it available here unfortunately as in the 'push' method. - """ - proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def pull(self, refspec=None, progress=None, **kwargs): - """ - Pull changes from the given branch, being the same as a fetch followed - by a merge of branch with your local branch. - - ``refspec`` - see 'fetch' method - - ``progress`` - see 'push' method - - ``**kwargs`` - Additional arguments to be passed to git-pull - - Returns - Please see 'fetch' method - """ - proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def push(self, refspec=None, progress=None, **kwargs): - """ - Push changes from source branch in refspec to target branch in refspec. - - ``refspec`` - see 'fetch' method - - ``progress`` - Instance of type RemoteProgress allowing the caller to receive - progress information until the method returns. - If None, progress information will be discarded - - ``**kwargs`` - Additional arguments to be passed to git-push - - Returns - IterableList(PushInfo, ...) iterable list of PushInfo instances, each - one informing about an individual head which had been updated on the remote - side. - If the push contains rejected heads, these will have the PushInfo.ERROR bit set - in their flags. - If the operation fails completely, the length of the returned IterableList will - be null. - """ - proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) - - @property - def config_reader(self): - """ - Returns - GitConfigParser compatible object able to read options for only our remote. - Hence you may simple type config.get("pushurl") to obtain the information - """ - return self._config_reader - - @property - def config_writer(self): - """ - Return - GitConfigParser compatible object able to write options for this remote. - - Note - You can only own one writer at a time - delete it to release the - configuration file and make it useable by others. - - To assure consistent results, you should only query options through the - writer. Once you are done writing, you are free to use the config reader - once again. - """ - writer = self.repo.config_writer() - - # clear our cache to assure we re-read the possibly changed configuration - del(self._config_reader) - return _SectionConstraint(writer, self._config_section_name()) + """ + Provides easy read and write access to a git remote. + + Everything not part of this interface is considered an option for the current + remote, allowing constructs like remote.pushurl to query the pushurl. + + NOTE: When querying configuration, the configuration accessor will be cached + to speed up subsequent accesses. + """ + + __slots__ = ( "repo", "name", "_config_reader" ) + _id_attribute_ = "name" + + def __init__(self, repo, name): + """ + Initialize a remote instance + + ``repo`` + The repository we are a remote of + + ``name`` + the name of the remote, i.e. 'origin' + """ + self.repo = repo + self.name = name + + def __getattr__(self, attr): + """ + Allows to call this instance like + remote.special( *args, **kwargs) to call git-remote special self.name + """ + if attr == "_config_reader": + return super(Remote, self).__getattr__(attr) + + return self._config_reader.get(attr) + + def _config_section_name(self): + return 'remote "%s"' % self.name + + def _set_cache_(self, attr): + if attr == "_config_reader": + self._config_reader = _SectionConstraint(self.repo.config_reader(), self._config_section_name()) + else: + super(Remote, self)._set_cache_(attr) + + + def __str__(self): + return self.name + + def __repr__(self): + return '<git.%s "%s">' % (self.__class__.__name__, self.name) + + def __eq__(self, other): + return self.name == other.name + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(self.name) + + @classmethod + def iter_items(cls, repo): + """ + Returns + Iterator yielding Remote objects of the given repository + """ + for section in repo.config_reader("repository").sections(): + if not section.startswith('remote'): + continue + lbound = section.find('"') + rbound = section.rfind('"') + if lbound == -1 or rbound == -1: + raise ValueError("Remote-Section has invalid format: %r" % section) + yield Remote(repo, section[lbound+1:rbound]) + # END for each configuration section + + @property + def refs(self): + """ + Returns + IterableList of RemoteReference objects. It is prefixed, allowing + you to omit the remote path portion, i.e.:: + remote.refs.master # yields RemoteReference('/refs/remotes/origin/master') + """ + out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) + for ref in RemoteReference.list_items(self.repo): + if ref.remote_name == self.name: + out_refs.append(ref) + # END if names match + # END for each ref + assert out_refs, "Remote %s did not have any references" % self.name + return out_refs + + @property + def stale_refs(self): + """ + Returns + IterableList RemoteReference objects that do not have a corresponding + head in the remote reference anymore as they have been deleted on the + remote side, but are still available locally. + + The IterableList is prefixed, hence the 'origin' must be omitted. See + 'refs' property for an example. + """ + out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) + for line in self.repo.git.remote("prune", "--dry-run", self).splitlines()[2:]: + # expecting + # * [would prune] origin/new_branch + token = " * [would prune] " + if not line.startswith(token): + raise ValueError("Could not parse git-remote prune result: %r" % line) + fqhn = "%s/%s" % (RemoteReference._common_path_default,line.replace(token, "")) + out_refs.append(RemoteReference(self.repo, fqhn)) + # END for each line + return out_refs + + @classmethod + def create(cls, repo, name, url, **kwargs): + """ + Create a new remote to the given repository + ``repo`` + Repository instance that is to receive the new remote + + ``name`` + Desired name of the remote + + ``url`` + URL which corresponds to the remote's name + + ``**kwargs`` + Additional arguments to be passed to the git-remote add command + + Returns + New Remote instance + + Raise + GitCommandError in case an origin with that name already exists + """ + repo.git.remote( "add", name, url, **kwargs ) + return cls(repo, name) + + # add is an alias + add = create + + @classmethod + def remove(cls, repo, name ): + """ + Remove the remote with the given name + """ + repo.git.remote("rm", name) + + # alias + rm = remove + + def rename(self, new_name): + """ + Rename self to the given new_name + + Returns + self + """ + if self.name == new_name: + return self + + self.repo.git.remote("rename", self.name, new_name) + self.name = new_name + del(self._config_reader) # it contains cached values, section names are different now + return self + + def update(self, **kwargs): + """ + Fetch all changes for this remote, including new branches which will + be forced in ( in case your local remote branch is not part the new remote branches + ancestry anymore ). + + ``kwargs`` + Additional arguments passed to git-remote update + + Returns + self + """ + self.repo.git.remote("update", self.name) + return self + + def _digest_process_messages(self, fh, progress): + """Read progress messages from file-like object fh, supplying the respective + progress messages to the progress instance. + @return: list(line, ...) list of lines without linebreaks that did + not contain progress information""" + line_so_far = '' + dropped_lines = list() + while True: + char = fh.read(1) + if not char: + break + + if char in ('\r', '\n'): + dropped_lines.extend(progress._parse_progress_line(line_so_far)) + line_so_far = '' + else: + line_so_far += char + # END process parsed line + # END while file is not done reading + return dropped_lines + + + def _finalize_proc(self, proc): + """Wait for the process (fetch, pull or push) and handle its errors accordingly""" + try: + proc.wait() + except GitCommandError,e: + # if a push has rejected items, the command has non-zero return status + # a return status of 128 indicates a connection error - reraise the previous one + if proc.poll() == 128: + raise + pass + # END exception handling + + + def _get_fetch_info_from_stderr(self, proc, progress): + # skip first line as it is some remote info we are not interested in + output = IterableList('name') + + + # lines which are no progress are fetch info lines + # this also waits for the command to finish + # Skip some progress lines that don't provide relevant information + fetch_info_lines = list() + for line in self._digest_process_messages(proc.stderr, progress): + if line.startswith('From') or line.startswith('remote: Total'): + continue + fetch_info_lines.append(line) + # END for each line + + # read head information + fp = open(os.path.join(self.repo.git_dir, 'FETCH_HEAD'),'r') + fetch_head_info = fp.readlines() + fp.close() + + assert len(fetch_info_lines) == len(fetch_head_info) + + output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) + for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) + + self._finalize_proc(proc) + return output + + def _get_push_info(self, proc, progress): + # read progress information from stderr + # we hope stdout can hold all the data, it should ... + # read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually + self._digest_process_messages(proc.stderr, progress) + + output = IterableList('name') + for line in proc.stdout.readlines(): + try: + output.append(PushInfo._from_line(self, line)) + except ValueError: + # if an error happens, additional info is given which we cannot parse + pass + # END exception handling + # END for each line + + self._finalize_proc(proc) + return output + + + def fetch(self, refspec=None, progress=None, **kwargs): + """ + Fetch the latest changes for this remote + + ``refspec`` + A "refspec" is used by fetch and push to describe the mapping + between remote ref and local ref. They are combined with a colon in + the format <src>:<dst>, preceded by an optional plus sign, +. + For example: git fetch $URL refs/heads/master:refs/heads/origin means + "grab the master branch head from the $URL and store it as my origin + branch head". And git push $URL refs/heads/master:refs/heads/to-upstream + means "publish my master branch head as to-upstream branch at $URL". + See also git-push(1). + + Taken from the git manual + ``progress`` + See 'push' method + + ``**kwargs`` + Additional arguments to be passed to git-fetch + + Returns + IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed + information about the fetch results + + Note + As fetch does not provide progress information to non-ttys, we cannot make + it available here unfortunately as in the 'push' method. + """ + proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def pull(self, refspec=None, progress=None, **kwargs): + """ + Pull changes from the given branch, being the same as a fetch followed + by a merge of branch with your local branch. + + ``refspec`` + see 'fetch' method + + ``progress`` + see 'push' method + + ``**kwargs`` + Additional arguments to be passed to git-pull + + Returns + Please see 'fetch' method + """ + proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) + return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) + + def push(self, refspec=None, progress=None, **kwargs): + """ + Push changes from source branch in refspec to target branch in refspec. + + ``refspec`` + see 'fetch' method + + ``progress`` + Instance of type RemoteProgress allowing the caller to receive + progress information until the method returns. + If None, progress information will be discarded + + ``**kwargs`` + Additional arguments to be passed to git-push + + Returns + IterableList(PushInfo, ...) iterable list of PushInfo instances, each + one informing about an individual head which had been updated on the remote + side. + If the push contains rejected heads, these will have the PushInfo.ERROR bit set + in their flags. + If the operation fails completely, the length of the returned IterableList will + be null. + """ + proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) + return self._get_push_info(proc, progress or RemoteProgress()) + + @property + def config_reader(self): + """ + Returns + GitConfigParser compatible object able to read options for only our remote. + Hence you may simple type config.get("pushurl") to obtain the information + """ + return self._config_reader + + @property + def config_writer(self): + """ + Return + GitConfigParser compatible object able to write options for this remote. + + Note + You can only own one writer at a time - delete it to release the + configuration file and make it useable by others. + + To assure consistent results, you should only query options through the + writer. Once you are done writing, you are free to use the config reader + once again. + """ + writer = self.repo.config_writer() + + # clear our cache to assure we re-read the possibly changed configuration + del(self._config_reader) + return _SectionConstraint(writer, self._config_section_name()) diff --git a/lib/git/repo.py b/lib/git/repo.py index 0c8ac9e9..58eb6ba1 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -20,768 +20,768 @@ from config import GitConfigParser from remote import Remote def touch(filename): - fp = open(filename, "a") - fp.close() + fp = open(filename, "a") + fp.close() def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - - if os.path.isdir(d) and \ - os.path.isdir(os.path.join(d, 'objects')) and \ - os.path.isdir(os.path.join(d, 'refs')): - headref = os.path.join(d, 'HEAD') - return os.path.isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False + """ This is taken from the git setup.c:is_git_directory + function.""" + + if os.path.isdir(d) and \ + os.path.isdir(os.path.join(d, 'objects')) and \ + os.path.isdir(os.path.join(d, 'refs')): + headref = os.path.join(d, 'HEAD') + return os.path.isfile(headref) or \ + (os.path.islink(headref) and + os.readlink(headref).startswith('refs')) + return False class Repo(object): - """ - Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - - The following attributes are worth using: - - 'working_dir' is the working directory of the git command, wich is the working tree - directory if available or the .git directory in case of bare repositories - - 'working_tree_dir' is the working tree directory, but will raise AssertionError - if we are a bare repository. - - 'git_dir' is the .git repository directoy, which is always set. - """ - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" ) - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - # invariants - # represents the configuration level of a configuration file - config_level = ("system", "global", "repository") - - def __init__(self, path=None): - """ - Create a new Repo instance - - ``path`` - is the path to either the root git directory or the bare git repo - - Examples:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - repo = Repo("~/Development/git-python.git") - repo = Repo("$REPOSITORIES/Development/git-python.git") - - Raises - InvalidGitRepositoryError or NoSuchPathError - - Returns - ``git.Repo`` - """ - - epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.working_dir = None - self._working_tree_dir = None - self.git_dir = None - curpath = epath - - # walk up the path to find the .git dir - while curpath: - if is_git_dir(curpath): - self.git_dir = curpath - self._working_tree_dir = os.path.dirname(curpath) - break - gitpath = os.path.join(curpath, '.git') - if is_git_dir(gitpath): - self.git_dir = gitpath - self._working_tree_dir = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - # END while curpath - - if self.git_dir is None: - raise InvalidGitRepositoryError(epath) - - self._bare = False - try: - self._bare = self.config_reader("repository").getboolean('core','bare') - except Exception: - # lets not assume the option exists, although it should - pass - - # adjust the wd in case we are actually bare - we didn't know that - # in the first place - if self._bare: - self._working_tree_dir = None - # END working dir handling - - self.working_dir = self._working_tree_dir or self.git_dir - self.git = Git(self.working_dir) - - # Description property - def _get_description(self): - filename = os.path.join(self.git_dir, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = os.path.join(self.git_dir, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - @property - def working_tree_dir(self): - """ - Returns - The working tree directory of our git repository - - Raises AssertionError - If we are a bare repository - """ - if self._working_tree_dir is None: - raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) - return self._working_tree_dir - - @property - def bare(self): - """ - Returns - True if the repository is bare - """ - return self._bare - - @property - def heads(self): - """ - A list of ``Head`` objects representing the branch heads in - this repo - - Returns - ``git.IterableList(Head, ...)`` - """ - return Head.list_items(self) - - @property - def references(self): - """ - A list of Reference objects representing tags, heads and remote references. - - Returns - IterableList(Reference, ...) - """ - return Reference.list_items(self) - - # alias for references - refs = references - - # alias for heads - branches = heads - - @property - def index(self): - """ - Returns - IndexFile representing this repository's index. - """ - return IndexFile(self) - - @property - def head(self): - """ - Return - HEAD Object pointing to the current head reference - """ - return HEAD(self,'HEAD') - - @property - def remotes(self): - """ - A list of Remote objects allowing to access and manipulate remotes - - Returns - ``git.IterableList(Remote, ...)`` - """ - return Remote.list_items(self) - - def remote(self, name='origin'): - """ - Return - Remote with the specified name - - Raise - ValueError if no remote with such a name exists - """ - return Remote(self, name) - - @property - def tags(self): - """ - A list of ``Tag`` objects that are available in this repo - - Returns - ``git.IterableList(TagReference, ...)`` - """ - return TagReference.list_items(self) - - def tag(self,path): - """ - Return - TagReference Object, reference pointing to a Commit or Tag - - ``path`` - path to the tag reference, i.e. 0.1.5 or tags/0.1.5 - """ - return TagReference(self, path) - - def create_head(self, path, commit='HEAD', force=False, **kwargs ): - """ - Create a new head within the repository. - - For more documentation, please see the Head.create method. - - Returns - newly created Head Reference - """ - return Head.create(self, path, commit, force, **kwargs) - - def delete_head(self, *heads, **kwargs): - """ - Delete the given heads - - ``kwargs`` - Additional keyword arguments to be passed to git-branch - """ - return Head.delete(self, *heads, **kwargs) - - def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): - """ - Create a new tag reference. - - For more documentation, please see the TagReference.create method. - - Returns - TagReference object - """ - return TagReference.create(self, path, ref, message, force, **kwargs) - - def delete_tag(self, *tags): - """ - Delete the given tag references - """ - return TagReference.delete(self, *tags) - - def create_remote(self, name, url, **kwargs): - """ - Create a new remote. - - For more information, please see the documentation of the Remote.create - methods - - Returns - Remote reference - """ - return Remote.create(self, name, url, **kwargs) - - def delete_remote(self, remote): - """ - Delete the given remote. - """ - return Remote.remove(self, remote) - - def _get_config_path(self, config_level ): - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if sys.platform == "win32" and config_level == "system": - config_level = "global" - - if config_level == "system": - return "/etc/gitconfig" - elif config_level == "global": - return os.path.expanduser("~/.gitconfig") - elif config_level == "repository": - return "%s/config" % self.git_dir - - raise ValueError( "Invalid configuration level: %r" % config_level ) - - def config_reader(self, config_level=None): - """ - Returns - GitConfigParser allowing to read the full git configuration, but not to write it - - The configuration will include values from the system, user and repository - configuration files. - - NOTE: On windows, system configuration cannot currently be read as the path is - unknown, instead the global path will be used. - - ``config_level`` - For possible values, see config_writer method - If None, all applicable levels will be used. Specify a level in case - you know which exact file you whish to read to prevent reading multiple files for - instance - """ - files = None - if config_level is None: - files = [ self._get_config_path(f) for f in self.config_level ] - else: - files = [ self._get_config_path(config_level) ] - return GitConfigParser(files, read_only=True) - - def config_writer(self, config_level="repository"): - """ - Returns - GitConfigParser allowing to write values of the specified configuration file level. - Config writers should be retrieved, used to change the configuration ,and written - right away as they will lock the configuration file in question and prevent other's - to write it. - - ``config_level`` - One of the following values - system = sytem wide configuration file - global = user level configuration file - repository = configuration file for this repostory only - """ - return GitConfigParser(self._get_config_path(config_level), read_only = False) - - def commit(self, rev=None): - """ - The Commit object for the specified revision - - ``rev`` - revision specifier, see git-rev-parse for viable options. - - Returns - ``git.Commit`` - """ - if rev is None: - rev = self.active_branch - - c = Object.new(self, rev) - assert c.type == "commit", "Revision %s did not point to a commit, but to %s" % (rev, c) - return c - - def iter_trees(self, *args, **kwargs): - """ - Returns - Iterator yielding Tree objects - - Note: Takes all arguments known to iter_commits method - """ - return ( c.tree for c in self.iter_commits(*args, **kwargs) ) - - def tree(self, rev=None): - """ - The Tree object for the given treeish revision - - ``rev`` - is a revision pointing to a Treeish ( being a commit or tree ) - - Examples:: - - repo.tree(repo.heads[0]) - - Returns - ``git.Tree`` - - NOTE - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results. - """ - if rev is None: - rev = self.active_branch - - c = Object.new(self, rev) - if c.type == "commit": - return c.tree - elif c.type == "tree": - return c - raise ValueError( "Revision %s did not point to a treeish, but to %s" % (rev, c)) - - def iter_commits(self, rev=None, paths='', **kwargs): - """ - A list of Commit objects representing the history of a given ref/commit - - ``rev`` - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. - - ``paths`` - is an optional path or a list of paths to limit the returned commits to - Commits that do not contain that path or the paths will not be returned. - - ``kwargs`` - Arguments to be passed to git-rev-parse - common ones are - max_count and skip - - Note: to receive only commits between two named revisions, use the - "revA..revB" revision specifier - - Returns - ``git.Commit[]`` - """ - if rev is None: - rev = self.active_branch - - return Commit.iter_items(self, rev, paths, **kwargs) - - def _get_daemon_export(self): - filename = os.path.join(self.git_dir, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = os.path.join(self.git_dir, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """ - The list of alternates for this repo from which objects can be retrieved - - Returns - list of strings being pathnames of alternates - """ - alternates_path = os.path.join(self.git_dir, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return [] - - def _set_alternates(self, alts): - """ - Sets the alternates - - ``alts`` - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects - - Raises - NoSuchPathError - - Note - The method does not check for the existance of the paths in alts - as the caller is responsible. - - Returns - None - """ - alternates_path = os.path.join(self.git_dir, 'objects', 'info', 'alternates') - if not alts: - if os.path.isfile(alternates_path): - os.remove(alternates_path) - else: - try: - f = open(alternates_path, 'w') - f.write("\n".join(alts)) - finally: - f.close() - # END file handling - # END alts handling - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - def is_dirty(self, index=True, working_tree=True, untracked_files=False): - """ - Returns - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes. - """ - if self._bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - # start from the one which is fastest to evaluate - default_args = ('--abbrev=40', '--full-index', '--raw') - if index: - # diff index against HEAD - if len(self.git.diff('HEAD', '--cached', *default_args)): - return True - # END index handling - if working_tree: - # diff index against working tree - if len(self.git.diff(*default_args)): - return True - # END working tree handling - if untracked_files: - if len(self.untracked_files): - return True - # END untracked files - return False - - @property - def untracked_files(self): - """ - Returns - list(str,...) - - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. - - Note - ignored files will not appear here, i.e. files mentioned in .gitignore - """ - # make sure we get all files, no only untracked directores - proc = self.git.status(untracked_files=True, as_process=True) - stream = iter(proc.stdout) - untracked_files = list() - for line in stream: - if not line.startswith("# Untracked files:"): - continue - # skip two lines - stream.next() - stream.next() - - for untracked_info in stream: - if not untracked_info.startswith("#\t"): - break - untracked_files.append(untracked_info.replace("#\t", "").rstrip()) - # END for each utracked info line - # END for each line - return untracked_files - - @property - def active_branch(self): - """ - The name of the currently active branch. - - Returns - Head to the active branch - """ - return self.head.reference - - def blame(self, rev, file): - """ - The blame information for the given file at the given revision. - - ``rev`` - revision specifier, see git-rev-parse for viable options. - - Returns - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance. - """ - data = self.git.blame(rev, '--', file, p=True) - commits = {} - blames = [] - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail <tom@mojombo.com> - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail <tom@mojombo.com> - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = int(parts[-1]) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # <and rest> - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, sha, - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - @classmethod - def init(cls, path=None, mkdir=True, **kwargs): - """ - Initialize a git repository at the given path if specified - - ``path`` - is the full path to the repo (traditionally ends with /<name>.git) - or None in which case the repository will be created in the current - working directory - - ``mkdir`` - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given - - ``kwargs`` - keyword arguments serving as additional options to the git-init command - - Examples:: - - git.Repo.init('/var/git/myrepo.git',bare=True) - - Returns - ``git.Repo`` (the newly created repo) - """ - - if mkdir and path and not os.path.exists(path): - os.makedirs(path, 0755) - - # git command automatically chdir into the directory - git = Git(path) - output = git.init(**kwargs) - return Repo(path) - - def clone(self, path, **kwargs): - """ - Create a clone from this repository. - - ``path`` - is the full path of the new repo (traditionally ends with ./<name>.git). - - ``kwargs`` - keyword arguments to be given to the git-clone command - - Returns - ``git.Repo`` (the newly cloned repo) - """ - # special handling for windows for path at which the clone should be - # created. - # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence - # we at least give a proper error instead of letting git fail - prev_cwd = None - prev_path = None - if os.name == 'nt': - if '~' in path: - raise OSError("Git cannot handle the ~ character in path %r correctly" % path) - - # on windows, git will think paths like c: are relative and prepend the - # current working dir ( before it fails ). We temporarily adjust the working - # dir to make this actually work - match = re.match("(\w:[/\\\])(.*)", path) - if match: - prev_cwd = os.getcwd() - prev_path = path - drive, rest_of_path = match.groups() - os.chdir(drive) - path = rest_of_path - kwargs['with_keep_cwd'] = True - # END cwd preparation - # END windows handling - - try: - self.git.clone(self.git_dir, path, **kwargs) - finally: - if prev_cwd is not None: - os.chdir(prev_cwd) - path = prev_path - # END reset previous working dir - # END bad windows handling - return Repo(path) - - - def archive(self, ostream, treeish=None, prefix=None, **kwargs): - """ - Archive the tree at the given revision. - ``ostream`` - file compatible stream object to which the archive will be written - - ``treeish`` - is the treeish name/id, defaults to active branch - - ``prefix`` - is the optional prefix to prepend to each filename in the archive - - ``kwargs`` - Additional arguments passed to git-archive - NOTE: Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python - - Examples:: - - >>> repo.archive(open("archive")) - <String containing tar.gz archive> - - Raise - GitCommandError in case something went wrong - - Returns - self - """ - if treeish is None: - treeish = self.active_branch - if prefix and 'prefix' not in kwargs: - kwargs['prefix'] = prefix - kwargs['output_stream'] = ostream - - self.git.archive(treeish, **kwargs) - return self - - def __repr__(self): - return '<git.Repo "%s">' % self.git_dir + """ + Represents a git repository and allows you to query references, + gather commit information, generate diffs, create and clone repositories query + the log. + + The following attributes are worth using: + + 'working_dir' is the working directory of the git command, wich is the working tree + directory if available or the .git directory in case of bare repositories + + 'working_tree_dir' is the working tree directory, but will raise AssertionError + if we are a bare repository. + + 'git_dir' is the .git repository directoy, which is always set. + """ + DAEMON_EXPORT_FILE = 'git-daemon-export-ok' + __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git" ) + + # precompiled regex + re_whitespace = re.compile(r'\s+') + re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') + re_author_committer_start = re.compile(r'^(author|committer)') + re_tab_full_line = re.compile(r'^\t(.*)$') + + # invariants + # represents the configuration level of a configuration file + config_level = ("system", "global", "repository") + + def __init__(self, path=None): + """ + Create a new Repo instance + + ``path`` + is the path to either the root git directory or the bare git repo + + Examples:: + + repo = Repo("/Users/mtrier/Development/git-python") + repo = Repo("/Users/mtrier/Development/git-python.git") + repo = Repo("~/Development/git-python.git") + repo = Repo("$REPOSITORIES/Development/git-python.git") + + Raises + InvalidGitRepositoryError or NoSuchPathError + + Returns + ``git.Repo`` + """ + + epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) + + if not os.path.exists(epath): + raise NoSuchPathError(epath) + + self.working_dir = None + self._working_tree_dir = None + self.git_dir = None + curpath = epath + + # walk up the path to find the .git dir + while curpath: + if is_git_dir(curpath): + self.git_dir = curpath + self._working_tree_dir = os.path.dirname(curpath) + break + gitpath = os.path.join(curpath, '.git') + if is_git_dir(gitpath): + self.git_dir = gitpath + self._working_tree_dir = curpath + break + curpath, dummy = os.path.split(curpath) + if not dummy: + break + # END while curpath + + if self.git_dir is None: + raise InvalidGitRepositoryError(epath) + + self._bare = False + try: + self._bare = self.config_reader("repository").getboolean('core','bare') + except Exception: + # lets not assume the option exists, although it should + pass + + # adjust the wd in case we are actually bare - we didn't know that + # in the first place + if self._bare: + self._working_tree_dir = None + # END working dir handling + + self.working_dir = self._working_tree_dir or self.git_dir + self.git = Git(self.working_dir) + + # Description property + def _get_description(self): + filename = os.path.join(self.git_dir, 'description') + return file(filename).read().rstrip() + + def _set_description(self, descr): + filename = os.path.join(self.git_dir, 'description') + file(filename, 'w').write(descr+'\n') + + description = property(_get_description, _set_description, + doc="the project's description") + del _get_description + del _set_description + + @property + def working_tree_dir(self): + """ + Returns + The working tree directory of our git repository + + Raises AssertionError + If we are a bare repository + """ + if self._working_tree_dir is None: + raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) + return self._working_tree_dir + + @property + def bare(self): + """ + Returns + True if the repository is bare + """ + return self._bare + + @property + def heads(self): + """ + A list of ``Head`` objects representing the branch heads in + this repo + + Returns + ``git.IterableList(Head, ...)`` + """ + return Head.list_items(self) + + @property + def references(self): + """ + A list of Reference objects representing tags, heads and remote references. + + Returns + IterableList(Reference, ...) + """ + return Reference.list_items(self) + + # alias for references + refs = references + + # alias for heads + branches = heads + + @property + def index(self): + """ + Returns + IndexFile representing this repository's index. + """ + return IndexFile(self) + + @property + def head(self): + """ + Return + HEAD Object pointing to the current head reference + """ + return HEAD(self,'HEAD') + + @property + def remotes(self): + """ + A list of Remote objects allowing to access and manipulate remotes + + Returns + ``git.IterableList(Remote, ...)`` + """ + return Remote.list_items(self) + + def remote(self, name='origin'): + """ + Return + Remote with the specified name + + Raise + ValueError if no remote with such a name exists + """ + return Remote(self, name) + + @property + def tags(self): + """ + A list of ``Tag`` objects that are available in this repo + + Returns + ``git.IterableList(TagReference, ...)`` + """ + return TagReference.list_items(self) + + def tag(self,path): + """ + Return + TagReference Object, reference pointing to a Commit or Tag + + ``path`` + path to the tag reference, i.e. 0.1.5 or tags/0.1.5 + """ + return TagReference(self, path) + + def create_head(self, path, commit='HEAD', force=False, **kwargs ): + """ + Create a new head within the repository. + + For more documentation, please see the Head.create method. + + Returns + newly created Head Reference + """ + return Head.create(self, path, commit, force, **kwargs) + + def delete_head(self, *heads, **kwargs): + """ + Delete the given heads + + ``kwargs`` + Additional keyword arguments to be passed to git-branch + """ + return Head.delete(self, *heads, **kwargs) + + def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): + """ + Create a new tag reference. + + For more documentation, please see the TagReference.create method. + + Returns + TagReference object + """ + return TagReference.create(self, path, ref, message, force, **kwargs) + + def delete_tag(self, *tags): + """ + Delete the given tag references + """ + return TagReference.delete(self, *tags) + + def create_remote(self, name, url, **kwargs): + """ + Create a new remote. + + For more information, please see the documentation of the Remote.create + methods + + Returns + Remote reference + """ + return Remote.create(self, name, url, **kwargs) + + def delete_remote(self, remote): + """ + Delete the given remote. + """ + return Remote.remove(self, remote) + + def _get_config_path(self, config_level ): + # we do not support an absolute path of the gitconfig on windows , + # use the global config instead + if sys.platform == "win32" and config_level == "system": + config_level = "global" + + if config_level == "system": + return "/etc/gitconfig" + elif config_level == "global": + return os.path.expanduser("~/.gitconfig") + elif config_level == "repository": + return "%s/config" % self.git_dir + + raise ValueError( "Invalid configuration level: %r" % config_level ) + + def config_reader(self, config_level=None): + """ + Returns + GitConfigParser allowing to read the full git configuration, but not to write it + + The configuration will include values from the system, user and repository + configuration files. + + NOTE: On windows, system configuration cannot currently be read as the path is + unknown, instead the global path will be used. + + ``config_level`` + For possible values, see config_writer method + If None, all applicable levels will be used. Specify a level in case + you know which exact file you whish to read to prevent reading multiple files for + instance + """ + files = None + if config_level is None: + files = [ self._get_config_path(f) for f in self.config_level ] + else: + files = [ self._get_config_path(config_level) ] + return GitConfigParser(files, read_only=True) + + def config_writer(self, config_level="repository"): + """ + Returns + GitConfigParser allowing to write values of the specified configuration file level. + Config writers should be retrieved, used to change the configuration ,and written + right away as they will lock the configuration file in question and prevent other's + to write it. + + ``config_level`` + One of the following values + system = sytem wide configuration file + global = user level configuration file + repository = configuration file for this repostory only + """ + return GitConfigParser(self._get_config_path(config_level), read_only = False) + + def commit(self, rev=None): + """ + The Commit object for the specified revision + + ``rev`` + revision specifier, see git-rev-parse for viable options. + + Returns + ``git.Commit`` + """ + if rev is None: + rev = self.active_branch + + c = Object.new(self, rev) + assert c.type == "commit", "Revision %s did not point to a commit, but to %s" % (rev, c) + return c + + def iter_trees(self, *args, **kwargs): + """ + Returns + Iterator yielding Tree objects + + Note: Takes all arguments known to iter_commits method + """ + return ( c.tree for c in self.iter_commits(*args, **kwargs) ) + + def tree(self, rev=None): + """ + The Tree object for the given treeish revision + + ``rev`` + is a revision pointing to a Treeish ( being a commit or tree ) + + Examples:: + + repo.tree(repo.heads[0]) + + Returns + ``git.Tree`` + + NOTE + If you need a non-root level tree, find it by iterating the root tree. Otherwise + it cannot know about its path relative to the repository root and subsequent + operations might have unexpected results. + """ + if rev is None: + rev = self.active_branch + + c = Object.new(self, rev) + if c.type == "commit": + return c.tree + elif c.type == "tree": + return c + raise ValueError( "Revision %s did not point to a treeish, but to %s" % (rev, c)) + + def iter_commits(self, rev=None, paths='', **kwargs): + """ + A list of Commit objects representing the history of a given ref/commit + + ``rev`` + revision specifier, see git-rev-parse for viable options. + If None, the active branch will be used. + + ``paths`` + is an optional path or a list of paths to limit the returned commits to + Commits that do not contain that path or the paths will not be returned. + + ``kwargs`` + Arguments to be passed to git-rev-parse - common ones are + max_count and skip + + Note: to receive only commits between two named revisions, use the + "revA..revB" revision specifier + + Returns + ``git.Commit[]`` + """ + if rev is None: + rev = self.active_branch + + return Commit.iter_items(self, rev, paths, **kwargs) + + def _get_daemon_export(self): + filename = os.path.join(self.git_dir, self.DAEMON_EXPORT_FILE) + return os.path.exists(filename) + + def _set_daemon_export(self, value): + filename = os.path.join(self.git_dir, self.DAEMON_EXPORT_FILE) + fileexists = os.path.exists(filename) + if value and not fileexists: + touch(filename) + elif not value and fileexists: + os.unlink(filename) + + daemon_export = property(_get_daemon_export, _set_daemon_export, + doc="If True, git-daemon may export this repository") + del _get_daemon_export + del _set_daemon_export + + def _get_alternates(self): + """ + The list of alternates for this repo from which objects can be retrieved + + Returns + list of strings being pathnames of alternates + """ + alternates_path = os.path.join(self.git_dir, 'objects', 'info', 'alternates') + + if os.path.exists(alternates_path): + try: + f = open(alternates_path) + alts = f.read() + finally: + f.close() + return alts.strip().splitlines() + else: + return [] + + def _set_alternates(self, alts): + """ + Sets the alternates + + ``alts`` + is the array of string paths representing the alternates at which + git should look for objects, i.e. /home/user/repo/.git/objects + + Raises + NoSuchPathError + + Note + The method does not check for the existance of the paths in alts + as the caller is responsible. + + Returns + None + """ + alternates_path = os.path.join(self.git_dir, 'objects', 'info', 'alternates') + if not alts: + if os.path.isfile(alternates_path): + os.remove(alternates_path) + else: + try: + f = open(alternates_path, 'w') + f.write("\n".join(alts)) + finally: + f.close() + # END file handling + # END alts handling + + alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") + + def is_dirty(self, index=True, working_tree=True, untracked_files=False): + """ + Returns + ``True``, the repository is considered dirty. By default it will react + like a git-status without untracked files, hence it is dirty if the + index or the working copy have changes. + """ + if self._bare: + # Bare repositories with no associated working directory are + # always consired to be clean. + return False + + # start from the one which is fastest to evaluate + default_args = ('--abbrev=40', '--full-index', '--raw') + if index: + # diff index against HEAD + if len(self.git.diff('HEAD', '--cached', *default_args)): + return True + # END index handling + if working_tree: + # diff index against working tree + if len(self.git.diff(*default_args)): + return True + # END working tree handling + if untracked_files: + if len(self.untracked_files): + return True + # END untracked files + return False + + @property + def untracked_files(self): + """ + Returns + list(str,...) + + Files currently untracked as they have not been staged yet. Paths + are relative to the current working directory of the git command. + + Note + ignored files will not appear here, i.e. files mentioned in .gitignore + """ + # make sure we get all files, no only untracked directores + proc = self.git.status(untracked_files=True, as_process=True) + stream = iter(proc.stdout) + untracked_files = list() + for line in stream: + if not line.startswith("# Untracked files:"): + continue + # skip two lines + stream.next() + stream.next() + + for untracked_info in stream: + if not untracked_info.startswith("#\t"): + break + untracked_files.append(untracked_info.replace("#\t", "").rstrip()) + # END for each utracked info line + # END for each line + return untracked_files + + @property + def active_branch(self): + """ + The name of the currently active branch. + + Returns + Head to the active branch + """ + return self.head.reference + + def blame(self, rev, file): + """ + The blame information for the given file at the given revision. + + ``rev`` + revision specifier, see git-rev-parse for viable options. + + Returns + list: [git.Commit, list: [<line>]] + A list of tuples associating a Commit object with a list of lines that + changed within the given commit. The Commit objects will be given in order + of appearance. + """ + data = self.git.blame(rev, '--', file, p=True) + commits = {} + blames = [] + info = None + + for line in data.splitlines(False): + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + if self.re_hexsha_only.search(firstpart): + # handles + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start + # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 + digits = parts[-1].split(" ") + if len(digits) == 3: + info = {'id': firstpart} + blames.append([None, []]) + # END blame data initialization + else: + m = self.re_author_committer_start.search(firstpart) + if m: + # handles: + # author Tom Preston-Werner + # author-mail <tom@mojombo.com> + # author-time 1192271832 + # author-tz -0700 + # committer Tom Preston-Werner + # committer-mail <tom@mojombo.com> + # committer-time 1192271832 + # committer-tz -0700 - IGNORED BY US + role = m.group(0) + if firstpart.endswith('-mail'): + info["%s_email" % role] = parts[-1] + elif firstpart.endswith('-time'): + info["%s_date" % role] = int(parts[-1]) + elif role == firstpart: + info[role] = parts[-1] + # END distinguish mail,time,name + else: + # handle + # filename lib/grit.rb + # summary add Blob + # <and rest> + if firstpart.startswith('filename'): + info['filename'] = parts[-1] + elif firstpart.startswith('summary'): + info['summary'] = parts[-1] + elif firstpart == '': + if info: + sha = info['id'] + c = commits.get(sha) + if c is None: + c = Commit( self, sha, + author=Actor._from_string(info['author'] + ' ' + info['author_email']), + authored_date=info['author_date'], + committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), + committed_date=info['committer_date'], + message=info['summary']) + commits[sha] = c + # END if commit objects needs initial creation + m = self.re_tab_full_line.search(line) + text, = m.groups() + blames[-1][0] = c + blames[-1][1].append( text ) + info = None + # END if we collected commit info + # END distinguish filename,summary,rest + # END distinguish author|committer vs filename,summary,rest + # END distinguish hexsha vs other information + return blames + + @classmethod + def init(cls, path=None, mkdir=True, **kwargs): + """ + Initialize a git repository at the given path if specified + + ``path`` + is the full path to the repo (traditionally ends with /<name>.git) + or None in which case the repository will be created in the current + working directory + + ``mkdir`` + if specified will create the repository directory if it doesn't + already exists. Creates the directory with a mode=0755. + Only effective if a path is explicitly given + + ``kwargs`` + keyword arguments serving as additional options to the git-init command + + Examples:: + + git.Repo.init('/var/git/myrepo.git',bare=True) + + Returns + ``git.Repo`` (the newly created repo) + """ + + if mkdir and path and not os.path.exists(path): + os.makedirs(path, 0755) + + # git command automatically chdir into the directory + git = Git(path) + output = git.init(**kwargs) + return Repo(path) + + def clone(self, path, **kwargs): + """ + Create a clone from this repository. + + ``path`` + is the full path of the new repo (traditionally ends with ./<name>.git). + + ``kwargs`` + keyword arguments to be given to the git-clone command + + Returns + ``git.Repo`` (the newly cloned repo) + """ + # special handling for windows for path at which the clone should be + # created. + # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence + # we at least give a proper error instead of letting git fail + prev_cwd = None + prev_path = None + if os.name == 'nt': + if '~' in path: + raise OSError("Git cannot handle the ~ character in path %r correctly" % path) + + # on windows, git will think paths like c: are relative and prepend the + # current working dir ( before it fails ). We temporarily adjust the working + # dir to make this actually work + match = re.match("(\w:[/\\\])(.*)", path) + if match: + prev_cwd = os.getcwd() + prev_path = path + drive, rest_of_path = match.groups() + os.chdir(drive) + path = rest_of_path + kwargs['with_keep_cwd'] = True + # END cwd preparation + # END windows handling + + try: + self.git.clone(self.git_dir, path, **kwargs) + finally: + if prev_cwd is not None: + os.chdir(prev_cwd) + path = prev_path + # END reset previous working dir + # END bad windows handling + return Repo(path) + + + def archive(self, ostream, treeish=None, prefix=None, **kwargs): + """ + Archive the tree at the given revision. + ``ostream`` + file compatible stream object to which the archive will be written + + ``treeish`` + is the treeish name/id, defaults to active branch + + ``prefix`` + is the optional prefix to prepend to each filename in the archive + + ``kwargs`` + Additional arguments passed to git-archive + NOTE: Use the 'format' argument to define the kind of format. Use + specialized ostreams to write any format supported by python + + Examples:: + + >>> repo.archive(open("archive")) + <String containing tar.gz archive> + + Raise + GitCommandError in case something went wrong + + Returns + self + """ + if treeish is None: + treeish = self.active_branch + if prefix and 'prefix' not in kwargs: + kwargs['prefix'] = prefix + kwargs['output_stream'] = ostream + + self.git.archive(treeish, **kwargs) + return self + + def __repr__(self): + return '<git.Repo "%s">' % self.git_dir diff --git a/lib/git/stats.py b/lib/git/stats.py index bda4e539..6b759b7f 100644 --- a/lib/git/stats.py +++ b/lib/git/stats.py @@ -5,56 +5,56 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php class Stats(object): - """ - Represents stat information as presented by git at the end of a merge. It is - created from the output of a diff operation. - - ``Example``:: - - c = Commit( sha1 ) - s = c.stats - s.total # full-stat-dict - s.files # dict( filepath : stat-dict ) - - ``stat-dict`` - - A dictionary with the following keys and values:: - - deletions = number of deleted lines as int - insertions = number of inserted lines as int - lines = total number of lines changed as int, or deletions + insertions - - ``full-stat-dict`` - - In addition to the items in the stat-dict, it features additional information:: - - files = number of changed files as int - - """ - __slots__ = ("total", "files") - - def __init__(self, total, files): - self.total = total - self.files = files + """ + Represents stat information as presented by git at the end of a merge. It is + created from the output of a diff operation. + + ``Example``:: + + c = Commit( sha1 ) + s = c.stats + s.total # full-stat-dict + s.files # dict( filepath : stat-dict ) + + ``stat-dict`` + + A dictionary with the following keys and values:: + + deletions = number of deleted lines as int + insertions = number of inserted lines as int + lines = total number of lines changed as int, or deletions + insertions + + ``full-stat-dict`` + + In addition to the items in the stat-dict, it features additional information:: + + files = number of changed files as int + + """ + __slots__ = ("total", "files") + + def __init__(self, total, files): + self.total = total + self.files = files - @classmethod - def _list_from_string(cls, repo, text): - """ - Create a Stat object from output retrieved by git-diff. - - Returns - git.Stat - """ - hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} - for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") - insertions = raw_insertions != '-' and int(raw_insertions) or 0 - deletions = raw_deletions != '-' and int(raw_deletions) or 0 - hsh['total']['insertions'] += insertions - hsh['total']['deletions'] += deletions - hsh['total']['lines'] += insertions + deletions - hsh['total']['files'] += 1 - hsh['files'][filename.strip()] = {'insertions': insertions, - 'deletions': deletions, - 'lines': insertions + deletions} - return Stats(hsh['total'], hsh['files']) + @classmethod + def _list_from_string(cls, repo, text): + """ + Create a Stat object from output retrieved by git-diff. + + Returns + git.Stat + """ + hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': {}} + for line in text.splitlines(): + (raw_insertions, raw_deletions, filename) = line.split("\t") + insertions = raw_insertions != '-' and int(raw_insertions) or 0 + deletions = raw_deletions != '-' and int(raw_deletions) or 0 + hsh['total']['insertions'] += insertions + hsh['total']['deletions'] += deletions + hsh['total']['lines'] += insertions + deletions + hsh['total']['files'] += 1 + hsh['files'][filename.strip()] = {'insertions': insertions, + 'deletions': deletions, + 'lines': insertions + deletions} + return Stats(hsh['total'], hsh['files']) diff --git a/lib/git/utils.py b/lib/git/utils.py index 5dc576be..15102fec 100644 --- a/lib/git/utils.py +++ b/lib/git/utils.py @@ -10,396 +10,396 @@ import time import tempfile try: - import hashlib + import hashlib except ImportError: - import sha + import sha def make_sha(source=''): - """ - A python2.4 workaround for the sha/hashlib module fiasco - - Note - From the dulwich project - """ - try: - return hashlib.sha1(source) - except NameError: - sha1 = sha.sha(source) - return sha1 + """ + A python2.4 workaround for the sha/hashlib module fiasco + + Note + From the dulwich project + """ + try: + return hashlib.sha1(source) + except NameError: + sha1 = sha.sha(source) + return sha1 def join_path(a, *p): - """Join path tokens together similar to os.path.join, but always use - '/' instead of possibly '\' on windows.""" - path = a - for b in p: - if b.startswith('/'): - path += b[1:] - elif path == '' or path.endswith('/'): - path += b - else: - path += '/' + b - return path - + """Join path tokens together similar to os.path.join, but always use + '/' instead of possibly '\' on windows.""" + path = a + for b in p: + if b.startswith('/'): + path += b[1:] + elif path == '' or path.endswith('/'): + path += b + else: + path += '/' + b + return path + def to_native_path_windows(path): - return path.replace('/','\\') - + return path.replace('/','\\') + def to_native_path_linux(path): - return path.replace('\\','/') + return path.replace('\\','/') if sys.platform.startswith('win'): - to_native_path = to_native_path_windows + to_native_path = to_native_path_windows else: - # no need for any work on linux - def to_native_path_linux(path): - return path - to_native_path = to_native_path_linux + # no need for any work on linux + def to_native_path_linux(path): + return path + to_native_path = to_native_path_linux def join_path_native(a, *p): - """As join path, but makes sure an OS native path is returned. This is only - needed to play it safe on my dear windows and to assure nice paths that only - use '\'""" - return to_native_path(join_path(a, *p)) + """As join path, but makes sure an OS native path is returned. This is only + needed to play it safe on my dear windows and to assure nice paths that only + use '\'""" + return to_native_path(join_path(a, *p)) class SHA1Writer(object): - """ - Wrapper around a file-like object that remembers the SHA1 of - the data written to it. It will write a sha when the stream is closed - or if the asked for explicitly usign write_sha. - - Note: - Based on the dulwich project - """ - __slots__ = ("f", "sha1") - - def __init__(self, f): - self.f = f - self.sha1 = make_sha("") + """ + Wrapper around a file-like object that remembers the SHA1 of + the data written to it. It will write a sha when the stream is closed + or if the asked for explicitly usign write_sha. + + Note: + Based on the dulwich project + """ + __slots__ = ("f", "sha1") + + def __init__(self, f): + self.f = f + self.sha1 = make_sha("") - def write(self, data): - self.sha1.update(data) - self.f.write(data) + def write(self, data): + self.sha1.update(data) + self.f.write(data) - def write_sha(self): - sha = self.sha1.digest() - self.f.write(sha) - return sha + def write_sha(self): + sha = self.sha1.digest() + self.f.write(sha) + return sha - def close(self): - sha = self.write_sha() - self.f.close() - return sha + def close(self): + sha = self.write_sha() + self.f.close() + return sha - def tell(self): - return self.f.tell() + def tell(self): + return self.f.tell() class LockFile(object): - """ - Provides methods to obtain, check for, and release a file based lock which - should be used to handle concurrent access to the same file. - - As we are a utility class to be derived from, we only use protected methods. - - Locks will automatically be released on destruction - """ - __slots__ = ("_file_path", "_owns_lock") - - def __init__(self, file_path): - self._file_path = file_path - self._owns_lock = False - - def __del__(self): - self._release_lock() - - def _lock_file_path(self): - """ - Return - Path to lockfile - """ - return "%s.lock" % (self._file_path) - - def _has_lock(self): - """ - Return - True if we have a lock and if the lockfile still exists - - Raise - AssertionError if our lock-file does not exist - """ - if not self._owns_lock: - return False - - return True - - def _obtain_lock_or_raise(self): - """ - Create a lock file as flag for other instances, mark our instance as lock-holder - - Raise - IOError if a lock was already present or a lock file could not be written - """ - if self._has_lock(): - return - lock_file = self._lock_file_path() - if os.path.isfile(lock_file): - raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - - try: - fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) - os.close(fd) - except OSError,e: - raise IOError(str(e)) - - self._owns_lock = True - - def _obtain_lock(self): - """ - The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation - """ - return self._obtain_lock_or_raise() - - def _release_lock(self): - """ - Release our lock if we have one - """ - if not self._has_lock(): - return - - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. - lfp = self._lock_file_path() - if os.path.isfile(lfp): - os.remove(lfp) - self._owns_lock = False + """ + Provides methods to obtain, check for, and release a file based lock which + should be used to handle concurrent access to the same file. + + As we are a utility class to be derived from, we only use protected methods. + + Locks will automatically be released on destruction + """ + __slots__ = ("_file_path", "_owns_lock") + + def __init__(self, file_path): + self._file_path = file_path + self._owns_lock = False + + def __del__(self): + self._release_lock() + + def _lock_file_path(self): + """ + Return + Path to lockfile + """ + return "%s.lock" % (self._file_path) + + def _has_lock(self): + """ + Return + True if we have a lock and if the lockfile still exists + + Raise + AssertionError if our lock-file does not exist + """ + if not self._owns_lock: + return False + + return True + + def _obtain_lock_or_raise(self): + """ + Create a lock file as flag for other instances, mark our instance as lock-holder + + Raise + IOError if a lock was already present or a lock file could not be written + """ + if self._has_lock(): + return + lock_file = self._lock_file_path() + if os.path.isfile(lock_file): + raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) + + try: + fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) + os.close(fd) + except OSError,e: + raise IOError(str(e)) + + self._owns_lock = True + + def _obtain_lock(self): + """ + The default implementation will raise if a lock cannot be obtained. + Subclasses may override this method to provide a different implementation + """ + return self._obtain_lock_or_raise() + + def _release_lock(self): + """ + Release our lock if we have one + """ + if not self._has_lock(): + return + + # if someone removed our file beforhand, lets just flag this issue + # instead of failing, to make it more usable. + lfp = self._lock_file_path() + if os.path.isfile(lfp): + os.remove(lfp) + self._owns_lock = False class BlockingLockFile(LockFile): - """The lock file will block until a lock could be obtained, or fail after - a specified timeout""" - __slots__ = ("_check_interval", "_max_block_time") - def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): - """Configure the instance - - ``check_interval_s`` - Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time - - ``max_block_time_s`` - Maximum amount of seconds we may lock - """ - super(BlockingLockFile, self).__init__(file_path) - self._check_interval = check_interval_s - self._max_block_time = max_block_time_s - - def _obtain_lock(self): - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time. - If this method returns, you are guranteed to own the lock""" - starttime = time.time() - maxtime = starttime + float(self._max_block_time) - while True: - try: - super(BlockingLockFile, self)._obtain_lock() - except IOError: - curtime = time.time() - if curtime >= maxtime: - msg = "Waited %f seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) - raise IOError(msg) - # END abort if we wait too long - time.sleep(self._check_interval) - else: - break - # END endless loop - - + """The lock file will block until a lock could be obtained, or fail after + a specified timeout""" + __slots__ = ("_check_interval", "_max_block_time") + def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): + """Configure the instance + + ``check_interval_s`` + Period of time to sleep until the lock is checked the next time. + By default, it waits a nearly unlimited time + + ``max_block_time_s`` + Maximum amount of seconds we may lock + """ + super(BlockingLockFile, self).__init__(file_path) + self._check_interval = check_interval_s + self._max_block_time = max_block_time_s + + def _obtain_lock(self): + """This method blocks until it obtained the lock, or raises IOError if + it ran out of time. + If this method returns, you are guranteed to own the lock""" + starttime = time.time() + maxtime = starttime + float(self._max_block_time) + while True: + try: + super(BlockingLockFile, self)._obtain_lock() + except IOError: + curtime = time.time() + if curtime >= maxtime: + msg = "Waited %f seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) + raise IOError(msg) + # END abort if we wait too long + time.sleep(self._check_interval) + else: + break + # END endless loop + + class ConcurrentWriteOperation(LockFile): - """ - This class facilitates a safe write operation to a file on disk such that we: - - - lock the original file - - write to a temporary file - - rename temporary file back to the original one on close - - unlock the original file - - This type handles error correctly in that it will assure a consistent state - on destruction - """ - __slots__ = "_temp_write_fp" - - def __init__(self, file_path): - """ - Initialize an instance able to write the given file_path - """ - super(ConcurrentWriteOperation, self).__init__(file_path) - self._temp_write_fp = None - - def __del__(self): - self._end_writing(successful=False) - - def _begin_writing(self): - """ - Begin writing our file, hence we get a lock and start writing - a temporary file in the same directory. - - Returns - File Object to write to. It is still maintained by this instance - and you do not need to manually close - """ - # already writing ? - if self._temp_write_fp is not None: - return self._temp_write_fp - - self._obtain_lock_or_raise() - dirname, basename = os.path.split(self._file_path) - self._temp_write_fp = open(tempfile.mktemp(basename, '', dirname), "wb") - return self._temp_write_fp - - def _is_writing(self): - """ - Returns - True if we are currently writing a file - """ - return self._temp_write_fp is not None - - def _end_writing(self, successful=True): - """ - Indicate you successfully finished writing the file to: - - - close the underlying stream - - rename the remporary file to the original one - - release our lock - """ - # did we start a write operation ? - if self._temp_write_fp is None: - return - - if not self._temp_write_fp.closed: - self._temp_write_fp.close() - - if successful: - # on windows, rename does not silently overwrite the existing one - if sys.platform == "win32": - if os.path.isfile(self._file_path): - os.remove(self._file_path) - # END remove if exists - # END win32 special handling - os.rename(self._temp_write_fp.name, self._file_path) - else: - # just delete the file so far, we failed - os.remove(self._temp_write_fp.name) - # END successful handling - - # finally reset our handle - self._release_lock() - self._temp_write_fp = None + """ + This class facilitates a safe write operation to a file on disk such that we: + + - lock the original file + - write to a temporary file + - rename temporary file back to the original one on close + - unlock the original file + + This type handles error correctly in that it will assure a consistent state + on destruction + """ + __slots__ = "_temp_write_fp" + + def __init__(self, file_path): + """ + Initialize an instance able to write the given file_path + """ + super(ConcurrentWriteOperation, self).__init__(file_path) + self._temp_write_fp = None + + def __del__(self): + self._end_writing(successful=False) + + def _begin_writing(self): + """ + Begin writing our file, hence we get a lock and start writing + a temporary file in the same directory. + + Returns + File Object to write to. It is still maintained by this instance + and you do not need to manually close + """ + # already writing ? + if self._temp_write_fp is not None: + return self._temp_write_fp + + self._obtain_lock_or_raise() + dirname, basename = os.path.split(self._file_path) + self._temp_write_fp = open(tempfile.mktemp(basename, '', dirname), "wb") + return self._temp_write_fp + + def _is_writing(self): + """ + Returns + True if we are currently writing a file + """ + return self._temp_write_fp is not None + + def _end_writing(self, successful=True): + """ + Indicate you successfully finished writing the file to: + + - close the underlying stream + - rename the remporary file to the original one + - release our lock + """ + # did we start a write operation ? + if self._temp_write_fp is None: + return + + if not self._temp_write_fp.closed: + self._temp_write_fp.close() + + if successful: + # on windows, rename does not silently overwrite the existing one + if sys.platform == "win32": + if os.path.isfile(self._file_path): + os.remove(self._file_path) + # END remove if exists + # END win32 special handling + os.rename(self._temp_write_fp.name, self._file_path) + else: + # just delete the file so far, we failed + os.remove(self._temp_write_fp.name) + # END successful handling + + # finally reset our handle + self._release_lock() + self._temp_write_fp = None class LazyMixin(object): - """ - Base class providing an interface to lazily retrieve attribute values upon - first access. If slots are used, memory will only be reserved once the attribute - is actually accessed and retrieved the first time. All future accesses will - return the cached value as stored in the Instance's dict or slot. - """ - __slots__ = tuple() - - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. - """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) + """ + Base class providing an interface to lazily retrieve attribute values upon + first access. If slots are used, memory will only be reserved once the attribute + is actually accessed and retrieved the first time. All future accesses will + return the cached value as stored in the Instance's dict or slot. + """ + __slots__ = tuple() + + def __getattr__(self, attr): + """ + Whenever an attribute is requested that we do not know, we allow it + to be created and set. Next time the same attribute is reqeusted, it is simply + returned from our dict/slots. + """ + self._set_cache_(attr) + # will raise in case the cache was not created + return object.__getattribute__(self, attr) - def _set_cache_(self, attr): - """ This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass + def _set_cache_(self, attr): + """ This method should be overridden in the derived class. + It should check whether the attribute named by attr can be created + and cached. Do nothing if you do not know the attribute or call your subclass + + The derived class may create as many additional attributes as it deems + necessary in case a git command returns more information than represented + in the single attribute.""" + pass class IterableList(list): - """ - List of iterable objects allowing to query an object by id or by named index:: - - heads = repo.heads - heads.master - heads['master'] - heads[0] - - It requires an id_attribute name to be set which will be queried from its - contained items to have a means for comparison. - - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out. - """ - __slots__ = ('_id_attr', '_prefix') - - def __new__(cls, id_attr, prefix=''): - return super(IterableList,cls).__new__(cls) - - def __init__(self, id_attr, prefix=''): - self._id_attr = id_attr - self._prefix = prefix - - def __getattr__(self, attr): - attr = self._prefix + attr - for item in self: - if getattr(item, self._id_attr) == attr: - return item - # END for each item - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - if isinstance(index, int): - return list.__getitem__(self,index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError( "No item found with id %r" % self._prefix + index ) + """ + List of iterable objects allowing to query an object by id or by named index:: + + heads = repo.heads + heads.master + heads['master'] + heads[0] + + It requires an id_attribute name to be set which will be queried from its + contained items to have a means for comparison. + + A prefix can be specified which is to be used in case the id returned by the + items always contains a prefix that does not matter to the user, so it + can be left out. + """ + __slots__ = ('_id_attr', '_prefix') + + def __new__(cls, id_attr, prefix=''): + return super(IterableList,cls).__new__(cls) + + def __init__(self, id_attr, prefix=''): + self._id_attr = id_attr + self._prefix = prefix + + def __getattr__(self, attr): + attr = self._prefix + attr + for item in self: + if getattr(item, self._id_attr) == attr: + return item + # END for each item + return list.__getattribute__(self, attr) + + def __getitem__(self, index): + if isinstance(index, int): + return list.__getitem__(self,index) + + try: + return getattr(self, index) + except AttributeError: + raise IndexError( "No item found with id %r" % self._prefix + index ) class Iterable(object): - """ - Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository - """ - __slots__ = tuple() - _id_attribute_ = "attribute that most suitably identifies your instance" - - @classmethod - def list_items(cls, repo, *args, **kwargs): - """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - Note: Favor the iter_items method as it will - - Returns: - list(Item,...) list of item instances - """ - out_list = IterableList( cls._id_attribute_ ) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list - - - @classmethod - def iter_items(cls, repo, *args, **kwargs): - """ - For more information about the arguments, see list_items - Return: - iterator yielding Items - """ - raise NotImplementedError("To be implemented by Subclass") - - + """ + Defines an interface for iterable items which is to assure a uniform + way to retrieve and iterate items within the git repository + """ + __slots__ = tuple() + _id_attribute_ = "attribute that most suitably identifies your instance" + + @classmethod + def list_items(cls, repo, *args, **kwargs): + """ + Find all items of this type - subclasses can specify args and kwargs differently. + If no args are given, subclasses are obliged to return all items if no additional + arguments arg given. + + Note: Favor the iter_items method as it will + + Returns: + list(Item,...) list of item instances + """ + out_list = IterableList( cls._id_attribute_ ) + out_list.extend(cls.iter_items(repo, *args, **kwargs)) + return out_list + + + @classmethod + def iter_items(cls, repo, *args, **kwargs): + """ + For more information about the arguments, see list_items + Return: + iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + + |