diff options
Diffstat (limited to 'lib/git')
31 files changed, 0 insertions, 10524 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py deleted file mode 100644 index 7f275b44..00000000 --- a/lib/git/__init__.py +++ /dev/null @@ -1,47 +0,0 @@ -# __init__.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -import sys -import inspect - -__version__ = 'git' - - -#{ Initialization -def _init_externals(): - """Initialize external projects by putting them into the path""" - sys.path.append(os.path.join(os.path.dirname(__file__), 'ext')) - -#} END initialization - -################# -_init_externals() -################# - -#{ Imports - -from git.config import GitConfigParser -from git.objects import * -from git.refs import * -from git.diff import * -from git.exc import * -from git.db import * -from git.cmd import Git -from git.repo import Repo -from git.remote import * -from git.index import * -from git.util import ( - LockFile, - BlockingLockFile, - Stats - ) - -#} END imports - -__all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ] - diff --git a/lib/git/cmd.py b/lib/git/cmd.py deleted file mode 100644 index 60887f5d..00000000 --- a/lib/git/cmd.py +++ /dev/null @@ -1,515 +0,0 @@ -# cmd.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os, sys -from util import * -from exc import GitCommandError - -from subprocess import ( - call, - Popen, - PIPE - ) - -# Enables debugging of GitPython's git commands -GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) - -execute_kwargs = ('istream', 'with_keep_cwd', 'with_extended_output', - 'with_exceptions', 'as_process', - 'output_stream' ) - -__all__ = ('Git', ) - -def dashify(string): - return string.replace('_', '-') - -class Git(object): - """ - The Git class manages communication with the Git binary. - - It provides a convenient interface to calling the Git binary, such as in:: - - g = Git( git_dir ) - g.init() # calls 'git init' program - rval = g.ls_files() # calls 'git ls-files' program - - ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. - """ - __slots__ = ("_working_dir", "cat_file_all", "cat_file_header") - - # CONFIGURATION - # The size in bytes read from stdout when copying git's output to another stream - max_chunk_size = 1024*64 - - class AutoInterrupt(object): - """Kill/Interrupt the stored process instance once this instance goes out of scope. It is - used to prevent processes piling up in case iterators stop reading. - Besides all attributes are wired through to the contained process object. - - The wait method was overridden to perform automatic status code checking - and possibly raise.""" - __slots__= ("proc", "args") - - def __init__(self, proc, args ): - self.proc = proc - self.args = args - - def __del__(self): - # did the process finish already so we have a return code ? - if self.proc.poll() is not None: - return - - # can be that nothing really exists anymore ... - if os is None: - return - - # try to kill it - try: - os.kill(self.proc.pid, 2) # interrupt signal - except AttributeError: - # try windows - # for some reason, providing None for stdout/stderr still prints something. This is why - # we simply use the shell and redirect to nul. Its slower than CreateProcess, question - # is whether we really want to see all these messages. Its annoying no matter what. - call(("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(self.proc.pid)), shell=True) - # END exception handling - - def __getattr__(self, attr): - return getattr(self.proc, attr) - - def wait(self): - """Wait for the process and return its status code. - - :raise GitCommandError: if the return status is not 0""" - status = self.proc.wait() - if status != 0: - raise GitCommandError(self.args, status, self.proc.stderr.read()) - # END status handling - return status - # END auto interrupt - - class CatFileContentStream(object): - """Object representing a sized read-only stream returning the contents of - an object. - It behaves like a stream, but counts the data read and simulates an empty - stream once our sized content region is empty. - If not all data is read to the end of the objects's lifetime, we read the - rest to assure the underlying stream continues to work""" - - __slots__ = ('_stream', '_nbr', '_size') - - def __init__(self, size, stream): - self._stream = stream - self._size = size - self._nbr = 0 # num bytes read - - # special case: if the object is empty, has null bytes, get the - # final newline right away. - if size == 0: - stream.read(1) - # END handle empty streams - - def read(self, size=-1): - bytes_left = self._size - self._nbr - if bytes_left == 0: - return '' - if size > -1: - # assure we don't try to read past our limit - size = min(bytes_left, size) - else: - # they try to read all, make sure its not more than what remains - size = bytes_left - # END check early depletion - data = self._stream.read(size) - self._nbr += len(data) - - # check for depletion, read our final byte to make the stream usable by others - if self._size - self._nbr == 0: - self._stream.read(1) # final newline - # END finish reading - return data - - def readline(self, size=-1): - if self._nbr == self._size: - return '' - - # clamp size to lowest allowed value - bytes_left = self._size - self._nbr - if size > -1: - size = min(bytes_left, size) - else: - size = bytes_left - # END handle size - - data = self._stream.readline(size) - self._nbr += len(data) - - # handle final byte - if self._size - self._nbr == 0: - self._stream.read(1) - # END finish reading - - return data - - def readlines(self, size=-1): - if self._nbr == self._size: - return list() - - # leave all additional logic to our readline method, we just check the size - out = list() - nbr = 0 - while True: - line = self.readline() - if not line: - break - out.append(line) - if size > -1: - nbr += len(line) - if nbr > size: - break - # END handle size constraint - # END readline loop - return out - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def __del__(self): - bytes_left = self._size - self._nbr - if bytes_left: - # read and discard - seeking is impossible within a stream - # includes terminating newline - self._stream.read(bytes_left + 1) - # END handle incomplete read - - - def __init__(self, working_dir=None): - """Initialize this instance with: - - :param working_dir: - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd(). - It is meant to be the working tree directory if available, or the - .git directory in case of bare repositories.""" - super(Git, self).__init__() - self._working_dir = working_dir - - # cached command slots - self.cat_file_header = None - self.cat_file_all = None - - def __getattr__(self, name): - """A convenience method as it allows to call the command as if it was - an object. - :return: Callable object that will execute call _call_process with your arguments.""" - if name[:1] == '_': - raise AttributeError(name) - return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) - - @property - def working_dir(self): - """:return: Git directory we are working on""" - return self._working_dir - - def execute(self, command, - istream=None, - with_keep_cwd=False, - with_extended_output=False, - with_exceptions=True, - as_process=False, - output_stream=None, - **subprocess_kwargs - ): - """Handles executing the command on the shell and consumes and returns - the returned information (stdout) - - :param command: - The command argument list to execute. - It should be a string, or a sequence of program arguments. The - program to execute is the first item in the args sequence or string. - - :param istream: - Standard input filehandle passed to subprocess.Popen. - - :param with_keep_cwd: - Whether to use the current working directory from os.getcwd(). - The cmd otherwise uses its own working_dir that it has been initialized - with if possible. - - :param with_extended_output: - Whether to return a (status, stdout, stderr) tuple. - - :param with_exceptions: - Whether to raise an exception when git returns a non-zero status. - - :param as_process: - Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output and - with_exceptions ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. - - :param output_stream: - If set to a file-like object, data produced by the git command will be - output to the given stream directly. - This feature only has any effect if as_process is False. Processes will - always be created with a pipe due to issues with subprocess. - This merely is a workaround as data will be copied from the - output pipe to the given output stream directly. - - :param subprocess_kwargs: - Keyword arguments to be passed to subprocess.Popen. Please note that - some of the valid kwargs are already set by this method, the ones you - specify may not be the same ones. - - :return: - * str(output) if extended_output = False (Default) - * tuple(int(status), str(stdout), str(stderr)) if extended_output = True - - if ouput_stream is True, the stdout value will be your output stream: - * output_stream if extended_output = False - * tuple(int(status), output_stream, str(stderr)) if extended_output = True - - :raise GitCommandError: - - :note: - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module.""" - if GIT_PYTHON_TRACE and not GIT_PYTHON_TRACE == 'full': - print ' '.join(command) - - # Allow the user to have the command executed in their working dir. - if with_keep_cwd or self._working_dir is None: - cwd = os.getcwd() - else: - cwd=self._working_dir - - # Start the process - proc = Popen(command, - cwd=cwd, - stdin=istream, - stderr=PIPE, - stdout=PIPE, - close_fds=(os.name=='posix'),# unsupported on linux - **subprocess_kwargs - ) - if as_process: - return self.AutoInterrupt(proc, command) - - # Wait for the process to return - status = 0 - stdout_value = '' - stderr_value = '' - try: - if output_stream is None: - stdout_value, stderr_value = proc.communicate() - # strip trailing "\n" - if stdout_value.endswith("\n"): - stdout_value = stdout_value[:-1] - if stderr_value.endswith("\n"): - stderr_value = stderr_value[:-1] - status = proc.returncode - else: - stream_copy(proc.stdout, output_stream, self.max_chunk_size) - stdout_value = output_stream - stderr_value = proc.stderr.read() - # strip trailing "\n" - if stderr_value.endswith("\n"): - stderr_value = stderr_value[:-1] - status = proc.wait() - # END stdout handling - finally: - proc.stdout.close() - proc.stderr.close() - - if GIT_PYTHON_TRACE == 'full': - cmdstr = " ".join(command) - if stderr_value: - print "%s -> %d; stdout: '%s'; stderr: '%s'" % (cmdstr, status, stdout_value, stderr_value) - elif stdout_value: - print "%s -> %d; stdout: '%s'" % (cmdstr, status, stdout_value) - else: - print "%s -> %d" % (cmdstr, status) - # END handle debug printing - - if with_exceptions and status != 0: - raise GitCommandError(command, status, stderr_value) - - # Allow access to the command's status code - if with_extended_output: - return (status, stdout_value, stderr_value) - else: - return stdout_value - - def transform_kwargs(self, **kwargs): - """Transforms Python style kwargs into git command line options.""" - args = list() - for k, v in kwargs.items(): - if len(k) == 1: - if v is True: - args.append("-%s" % k) - elif type(v) is not bool: - args.append("-%s%s" % (k, v)) - else: - if v is True: - args.append("--%s" % dashify(k)) - elif type(v) is not bool: - args.append("--%s=%s" % (dashify(k), v)) - return args - - @classmethod - def __unpack_args(cls, arg_list): - if not isinstance(arg_list, (list,tuple)): - return [ str(arg_list) ] - - outlist = list() - for arg in arg_list: - if isinstance(arg_list, (list, tuple)): - outlist.extend(cls.__unpack_args( arg )) - # END recursion - else: - outlist.append(str(arg)) - # END for each arg - return outlist - - def _call_process(self, method, *args, **kwargs): - """Run the given git command with the specified arguments and return - the result as a String - - :param method: - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. - - :param args: - is the list of arguments. If None is included, it will be pruned. - This allows your commands to call git more conveniently as None - is realized as non-existent - - :param kwargs: - is a dict of keyword arguments. - This function accepts the same optional keyword arguments - as execute(). - - ``Examples``:: - git.rev_list('master', max_count=10, header=True) - - :return: Same as ``execute``""" - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. - _kwargs = dict() - for kwarg in execute_kwargs: - try: - _kwargs[kwarg] = kwargs.pop(kwarg) - except KeyError: - pass - - # Prepare the argument list - opt_args = self.transform_kwargs(**kwargs) - - ext_args = self.__unpack_args([a for a in args if a is not None]) - args = opt_args + ext_args - - call = ["git", dashify(method)] - call.extend(args) - - return self.execute(call, **_kwargs) - - def _parse_object_header(self, header_line): - """ - :param header_line: - <hex_sha> type_string size_as_int - - :return: (hex_sha, type_string, size_as_int) - - :raise ValueError: if the header contains indication for an error due to - incorrect input sha""" - tokens = header_line.split() - if len(tokens) != 3: - if not tokens: - raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) - else: - raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) - # END handle actual return value - # END error handling - - if len(tokens[0]) != 40: - raise ValueError("Failed to parse header: %r" % header_line) - return (tokens[0], tokens[1], int(tokens[2])) - - def __prepare_ref(self, ref): - # required for command to separate refs on stdin - refstr = str(ref) # could be ref-object - if refstr.endswith("\n"): - return refstr - return refstr + "\n" - - def __get_persistent_cmd(self, attr_name, cmd_name, *args,**kwargs): - cur_val = getattr(self, attr_name) - if cur_val is not None: - return cur_val - - options = { "istream" : PIPE, "as_process" : True } - options.update( kwargs ) - - cmd = self._call_process( cmd_name, *args, **options ) - setattr(self, attr_name, cmd ) - return cmd - - def __get_object_header(self, cmd, ref): - cmd.stdin.write(self.__prepare_ref(ref)) - cmd.stdin.flush() - return self._parse_object_header(cmd.stdout.readline()) - - def get_object_header(self, ref): - """ Use this method to quickly examine the type and size of the object behind - the given ref. - - :note: The method will only suffer from the costs of command invocation - once and reuses the command in subsequent calls. - - :return: (hexsha, type_string, size_as_int)""" - cmd = self.__get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) - return self.__get_object_header(cmd, ref) - - def get_object_data(self, ref): - """ As get_object_header, but returns object data as well - :return: (hexsha, type_string, size_as_int,data_string) - :note: not threadsafe""" - hexsha, typename, size, stream = self.stream_object_data(ref) - data = stream.read(size) - del(stream) - return (hexsha, typename, size, data) - - def stream_object_data(self, ref): - """As get_object_header, but returns the data as a stream - :return: (hexsha, type_string, size_as_int, stream) - :note: This method is not threadsafe, you need one independent Command instance - per thread to be safe !""" - cmd = self.__get_persistent_cmd("cat_file_all", "cat_file", batch=True) - hexsha, typename, size = self.__get_object_header(cmd, ref) - return (hexsha, typename, size, self.CatFileContentStream(size, cmd.stdout)) - - def clear_cache(self): - """Clear all kinds of internal caches to release resources. - - Currently persistent commands will be interrupted. - - :return: self""" - self.cat_file_all = None - self.cat_file_header = None - return self diff --git a/lib/git/config.py b/lib/git/config.py deleted file mode 100644 index f1a8832e..00000000 --- a/lib/git/config.py +++ /dev/null @@ -1,420 +0,0 @@ -# config.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module containing module parser implementation able to properly read and write -configuration files""" - -import re -import os -import ConfigParser as cp -import inspect -import cStringIO - -from git.odict import OrderedDict -from git.util import LockFile - -__all__ = ('GitConfigParser', 'SectionConstraint') - -class MetaParserBuilder(type): - """Utlity class wrapping base-class methods into decorators that assure read-only properties""" - def __new__(metacls, name, bases, clsdict): - """ - Equip all base-class methods with a needs_values decorator, and all non-const methods - with a set_dirty_and_flush_changes decorator in addition to that.""" - kmm = '_mutating_methods_' - if kmm in clsdict: - mutating_methods = clsdict[kmm] - for base in bases: - methods = ( t for t in inspect.getmembers(base, inspect.ismethod) if not t[0].startswith("_") ) - for name, method in methods: - if name in clsdict: - continue - method_with_values = needs_values(method) - if name in mutating_methods: - method_with_values = set_dirty_and_flush_changes(method_with_values) - # END mutating methods handling - - clsdict[name] = method_with_values - # END for each name/method pair - # END for each base - # END if mutating methods configuration is set - - new_type = super(MetaParserBuilder, metacls).__new__(metacls, name, bases, clsdict) - return new_type - - - -def needs_values(func): - """Returns method assuring we read values (on demand) before we try to access them""" - def assure_data_present(self, *args, **kwargs): - self.read() - return func(self, *args, **kwargs) - # END wrapper method - assure_data_present.__name__ = func.__name__ - return assure_data_present - -def set_dirty_and_flush_changes(non_const_func): - """Return method that checks whether given non constant function may be called. - If so, the instance will be set dirty. - Additionally, we flush the changes right to disk""" - def flush_changes(self, *args, **kwargs): - rval = non_const_func(self, *args, **kwargs) - self.write() - return rval - # END wrapper method - flush_changes.__name__ = non_const_func.__name__ - return flush_changes - - -class SectionConstraint(object): - """Constrains a ConfigParser to only option commands which are constrained to - always use the section we have been initialized with. - - It supports all ConfigParser methods that operate on an option""" - __slots__ = ("_config", "_section_name") - _valid_attrs_ = ("get_value", "set_value", "get", "set", "getint", "getfloat", "getboolean", "has_option", - "remove_section", "remove_option", "options") - - def __init__(self, config, section): - self._config = config - self._section_name = section - - def __getattr__(self, attr): - if attr in self._valid_attrs_: - return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) - return super(SectionConstraint,self).__getattribute__(attr) - - def _call_config(self, method, *args, **kwargs): - """Call the configuration at the given method which must take a section name - as first argument""" - return getattr(self._config, method)(self._section_name, *args, **kwargs) - - @property - def config(self): - """return: Configparser instance we constrain""" - return self._config - - -class GitConfigParser(cp.RawConfigParser, object): - """Implements specifics required to read git style configuration files. - - This variation behaves much like the git.config command such that the configuration - will be read on demand based on the filepath given during initialization. - - The changes will automatically be written once the instance goes out of scope, but - can be triggered manually as well. - - The configuration file will be locked if you intend to change values preventing other - instances to write concurrently. - - :note: - The config is case-sensitive even when queried, hence section and option names - must match perfectly.""" - __metaclass__ = MetaParserBuilder - - - #{ Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile - t_lock = LockFile - - #} END configuration - - OPTCRE = re.compile( - r'\s?(?P<option>[^:=\s][^:=]*)' # very permissive, incuding leading whitespace - r'\s*(?P<vi>[:=])\s*' # any number of space/tab, - # followed by separator - # (either : or =), followed - # by any # space/tab - r'(?P<value>.*)$' # everything up to eol - ) - - # list of RawConfigParser methods able to change the instance - _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") - __slots__ = ("_sections", "_defaults", "_file_or_files", "_read_only","_is_initialized", '_lock') - - def __init__(self, file_or_files, read_only=True): - """Initialize a configuration reader to read the given file_or_files and to - possibly allow changes to it by setting read_only False - - :param file_or_files: - A single file path or file objects or multiple of these - - :param read_only: - If True, the ConfigParser may only read the data , but not change it. - If False, only a single file path or file object may be given.""" - super(GitConfigParser, self).__init__() - # initialize base with ordered dictionaries to be sure we write the same - # file back - self._sections = OrderedDict() - self._defaults = OrderedDict() - - self._file_or_files = file_or_files - self._read_only = read_only - self._is_initialized = False - self._lock = None - - if not read_only: - if isinstance(file_or_files, (tuple, list)): - raise ValueError("Write-ConfigParsers can operate on a single file only, multiple files have been passed") - # END single file check - - if not isinstance(file_or_files, basestring): - file_or_files = file_or_files.name - # END get filename from handle/stream - # initialize lock base - we want to write - self._lock = self.t_lock(file_or_files) - - self._lock._obtain_lock() - # END read-only check - - - def __del__(self): - """Write pending changes if required and release locks""" - # checking for the lock here makes sure we do not raise during write() - # in case an invalid parser was created who could not get a lock - if self.read_only or not self._lock._has_lock(): - return - - try: - try: - self.write() - except IOError,e: - print "Exception during destruction of GitConfigParser: %s" % str(e) - finally: - self._lock._release_lock() - - def optionxform(self, optionstr): - """Do not transform options in any way when writing""" - return optionstr - - def _read(self, fp, fpname): - """A direct copy of the py2.4 version of the super class's _read method - to assure it uses ordered dicts. Had to change one line to make it work. - - Future versions have this fixed, but in fact its quite embarassing for the - guys not to have done it right in the first place ! - - Removed big comments to make it more compact. - - Made sure it ignores initial whitespace as git uses tabs""" - cursect = None # None, or a dictionary - optname = None - lineno = 0 - e = None # None, or an exception - while True: - line = fp.readline() - if not line: - break - lineno = lineno + 1 - # comment or blank line? - if line.strip() == '' or line[0] in '#;': - continue - if line.split(None, 1)[0].lower() == 'rem' and line[0] in "rR": - # no leading whitespace - continue - else: - # is it a section header? - mo = self.SECTCRE.match(line) - if mo: - sectname = mo.group('header') - if sectname in self._sections: - cursect = self._sections[sectname] - elif sectname == cp.DEFAULTSECT: - cursect = self._defaults - else: - # THE ONLY LINE WE CHANGED ! - cursect = OrderedDict((('__name__', sectname),)) - self._sections[sectname] = cursect - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise cp.MissingSectionHeaderError(fpname, lineno, line) - # an option line? - else: - mo = self.OPTCRE.match(line) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if vi in ('=', ':') and ';' in optval: - pos = optval.find(';') - if pos != -1 and optval[pos-1].isspace(): - optval = optval[:pos] - optval = optval.strip() - if optval == '""': - optval = '' - optname = self.optionxform(optname.rstrip()) - cursect[optname] = optval - else: - if not e: - e = cp.ParsingError(fpname) - e.append(lineno, repr(line)) - # END - # END ? - # END ? - # END while reading - # if any parsing errors occurred, raise an exception - if e: - raise e - - - def read(self): - """Reads the data stored in the files we have been initialized with. It will - ignore files that cannot be read, possibly leaving an empty configuration - - :return: Nothing - :raise IOError: if a file cannot be handled""" - if self._is_initialized: - return - - files_to_read = self._file_or_files - if not isinstance(files_to_read, (tuple, list)): - files_to_read = [ files_to_read ] - - for file_object in files_to_read: - fp = file_object - close_fp = False - # assume a path if it is not a file-object - if not hasattr(file_object, "seek"): - try: - fp = open(file_object) - close_fp = True - except IOError,e: - continue - # END fp handling - - try: - self._read(fp, fp.name) - finally: - if close_fp: - fp.close() - # END read-handling - # END for each file object to read - self._is_initialized = True - - def _write(self, fp): - """Write an .ini-format representation of the configuration state in - git compatible format""" - def write_section(name, section_dict): - fp.write("[%s]\n" % name) - for (key, value) in section_dict.items(): - if key != "__name__": - fp.write("\t%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) - # END if key is not __name__ - # END section writing - - if self._defaults: - write_section(cp.DEFAULTSECT, self._defaults) - map(lambda t: write_section(t[0],t[1]), self._sections.items()) - - - @needs_values - def write(self): - """Write changes to our file, if there are changes at all - - :raise IOError: if this is a read-only writer instance or if we could not obtain - a file lock""" - self._assure_writable("write") - - fp = self._file_or_files - close_fp = False - - # we have a physical file on disk, so get a lock - if isinstance(fp, (basestring, file)): - self._lock._obtain_lock() - # END get lock for physical files - - if not hasattr(fp, "seek"): - fp = open(self._file_or_files, "w") - close_fp = True - else: - fp.seek(0) - # END handle stream or file - - # WRITE DATA - try: - self._write(fp) - finally: - if close_fp: - fp.close() - # END data writing - - # we do not release the lock - it will be done automatically once the - # instance vanishes - - def _assure_writable(self, method_name): - if self.read_only: - raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) - - @needs_values - @set_dirty_and_flush_changes - def add_section(self, section): - """Assures added options will stay in order""" - super(GitConfigParser, self).add_section(section) - self._sections[section] = OrderedDict() - - @property - def read_only(self): - """:return: True if this instance may change the configuration file""" - return self._read_only - - def get_value(self, section, option, default = None): - """ - :param default: - If not None, the given default value will be returned in case - the option did not exist - :return: a properly typed value, either int, float or string - - :raise TypeError: in case the value could not be understood - Otherwise the exceptions known to the ConfigParser will be raised.""" - try: - valuestr = self.get(section, option) - except Exception: - if default is not None: - return default - raise - - types = ( long, float ) - for numtype in types: - try: - val = numtype( valuestr ) - - # truncated value ? - if val != float( valuestr ): - continue - - return val - except (ValueError,TypeError): - continue - # END for each numeric type - - # try boolean values as git uses them - vl = valuestr.lower() - if vl == 'false': - return False - if vl == 'true': - return True - - if not isinstance( valuestr, basestring ): - raise TypeError( "Invalid value type: only int, long, float and str are allowed", valuestr ) - - return valuestr - - @needs_values - @set_dirty_and_flush_changes - def set_value(self, section, option, value): - """Sets the given option in section to the given value. - It will create the section if required, and will not throw as opposed to the default - ConfigParser 'set' method. - - :param section: Name of the section in which the option resides or should reside - :param option: Name of the options whose value to set - - :param value: Value to set the option to. It must be a string or convertible - to a string""" - if not self.has_section(section): - self.add_section(section) - self.set(section, option, str(value)) diff --git a/lib/git/db.py b/lib/git/db.py deleted file mode 100644 index b1c65377..00000000 --- a/lib/git/db.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Module with our own gitdb implementation - it uses the git command""" -from exc import ( - GitCommandError, - BadObject - ) - -from gitdb.base import ( - OInfo, - OStream - ) - -from gitdb.util import ( - bin_to_hex, - hex_to_bin - ) -from gitdb.db import GitDB -from gitdb.db import LooseObjectDB - - -__all__ = ('GitCmdObjectDB', 'GitDB' ) - -#class GitCmdObjectDB(CompoundDB, ObjectDBW): -class GitCmdObjectDB(LooseObjectDB): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file - - It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations - """ - def __init__(self, root_path, git): - """Initialize this instance with the root and a git command""" - super(GitCmdObjectDB, self).__init__(root_path) - self._git = git - - def info(self, sha): - hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha)) - return OInfo(hex_to_bin(hexsha), typename, size) - - def stream(self, sha): - """For now, all lookup is done by git itself""" - hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha)) - return OStream(hex_to_bin(hexsha), typename, size, stream) - - - # { Interface - - def partial_to_complete_sha_hex(self, partial_hexsha): - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" - try: - hexsha, typename, size = self._git.get_object_header(partial_hexsha) - return hex_to_bin(hexsha) - except (GitCommandError, ValueError): - raise BadObject(partial_hexsha) - # END handle exceptions - - #} END interface diff --git a/lib/git/diff.py b/lib/git/diff.py deleted file mode 100644 index 48253c42..00000000 --- a/lib/git/diff.py +++ /dev/null @@ -1,346 +0,0 @@ -# diff.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import re -from objects.blob import Blob -from objects.util import mode_str_to_int -from exc import GitCommandError - -from gitdb.util import hex_to_bin - -__all__ = ('Diffable', 'DiffIndex', 'Diff') - -class Diffable(object): - """Common interface for all object that can be diffed against another object of compatible type. - - :note: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object.""" - __slots__ = tuple() - - # standin indicating you want to diff against the index - class Index(object): - pass - - def _process_diff_args(self, args): - """ - :return: - possibly altered version of the given args list. - Method is called right before git command execution. - Subclasses can use it to alter the behaviour of the superclass""" - return args - - def diff(self, other=Index, paths=None, create_patch=False, **kwargs): - """Creates diffs between two items being trees, trees and index or an - index and the working tree. - - :param other: - Is the item to compare us with. - If None, we will be compared to the working tree. - If Treeish, it will be compared against the respective tree - If Index ( type ), it will be compared against the index. - It defaults to Index to assure the method will not by-default fail - on bare repositories. - - :param paths: - is a list of paths or a single path to limit the diff to. - It will only include at least one of the givne path or paths. - - :param create_patch: - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somwhat costly as blobs have to be read - and diffed. - - :param kwargs: - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. - - :return: git.DiffIndex - - :note: - Rename detection will only work if create_patch is True. - - On a bare repository, 'other' needs to be provided as Index or as - as Tree/Commit, or a git command error will occour""" - args = list() - args.append( "--abbrev=40" ) # we need full shas - args.append( "--full-index" ) # get full index paths, not only filenames - - if create_patch: - args.append("-p") - args.append("-M") # check for renames - else: - args.append("--raw") - - if paths is not None and not isinstance(paths, (tuple,list)): - paths = [ paths ] - - if other is not None and other is not self.Index: - args.insert(0, other) - if other is self.Index: - args.insert(0, "--cached") - - args.insert(0,self) - - # paths is list here or None - if paths: - args.append("--") - args.extend(paths) - # END paths handling - - kwargs['as_process'] = True - proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) - - diff_method = Diff._index_from_raw_format - if create_patch: - diff_method = Diff._index_from_patch_format - index = diff_method(self.repo, proc.stdout) - - status = proc.wait() - return index - - -class DiffIndex(list): - """Implements an Index for diffs, allowing a list of Diffs to be queried by - the diff properties. - - The class improves the diff handling convenience""" - # change type invariant identifying possible ways a blob can have changed - # A = Added - # D = Deleted - # R = Renamed - # M = modified - change_type = ("A", "D", "R", "M") - - - def iter_change_type(self, change_type): - """ - :return: - iterator yieling Diff instances that match the given change_type - - :param change_type: - Member of DiffIndex.change_type, namely: - - * 'A' for added paths - * 'D' for deleted paths - * 'R' for renamed paths - * 'M' for paths with modified data""" - if change_type not in self.change_type: - raise ValueError( "Invalid change type: %s" % change_type ) - - for diff in self: - if change_type == "A" and diff.new_file: - yield diff - elif change_type == "D" and diff.deleted_file: - yield diff - elif change_type == "R" and diff.renamed: - yield diff - elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: - yield diff - # END for each diff - - -class Diff(object): - """A Diff contains diff information between two Trees. - - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. - - Diffs keep information about the changed blob objects, the file mode, renames, - deletions and new files. - - There are a few cases where None has to be expected as member variable value: - - ``New File``:: - - a_mode is None - a_blob is None - - ``Deleted File``:: - - b_mode is None - b_blob is None - - ``Working Tree Blobs`` - - When comparing to working trees, the working tree blob will have a null hexsha - as a corresponding object does not yet exist. The mode will be null as well. - But the path will be available though. - If it is listed in a diff the working tree version of the file must - be different to the version in the index or tree, and hence has been modified.""" - - # precompiled regex - re_header = re.compile(r""" - #^diff[ ]--git - [ ]a/(?P<a_path>.+?)[ ]b/(?P<b_path>.+?)\n - (?:^similarity[ ]index[ ](?P<similarity_index>\d+)%\n - ^rename[ ]from[ ](?P<rename_from>\S+)\n - ^rename[ ]to[ ](?P<rename_to>\S+)(?:\n|$))? - (?:^old[ ]mode[ ](?P<old_mode>\d+)\n - ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? - (?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))? - (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? - (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) - \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - """, re.VERBOSE | re.MULTILINE) - # can be used for comparisons - NULL_HEX_SHA = "0"*40 - NULL_BIN_SHA = "\0"*20 - - __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", - "rename_from", "rename_to", "diff") - - def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, - b_mode, new_file, deleted_file, rename_from, - rename_to, diff): - - self.a_mode = a_mode - self.b_mode = b_mode - - if self.a_mode: - self.a_mode = mode_str_to_int(self.a_mode) - if self.b_mode: - self.b_mode = mode_str_to_int(self.b_mode) - - if a_blob_id is None: - self.a_blob = None - else: - self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path) - if b_blob_id is None: - self.b_blob = None - else: - self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path) - - self.new_file = new_file - self.deleted_file = deleted_file - - # be clear and use None instead of empty strings - self.rename_from = rename_from or None - self.rename_to = rename_to or None - - self.diff = diff - - - def __eq__(self, other): - for name in self.__slots__: - if getattr(self, name) != getattr(other, name): - return False - # END for each name - return True - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(tuple(getattr(self,n) for n in self.__slots__)) - - def __str__(self): - h = "%s" - if self.a_blob: - h %= self.a_blob.path - elif self.b_blob: - h %= self.b_blob.path - - msg = '' - l = None # temp line - ll = 0 # line length - for b,n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')): - if b: - l = "\n%s: %o | %s" % (n, b.mode, b.hexsha) - else: - l = "\n%s: None" % n - # END if blob is not None - ll = max(len(l), ll) - msg += l - # END for each blob - - # add headline - h += '\n' + '='*ll - - if self.deleted_file: - msg += '\nfile deleted in rhs' - if self.new_file: - msg += '\nfile added in rhs' - if self.rename_from: - msg += '\nfile renamed from %r' % self.rename_from - if self.rename_to: - msg += '\nfile renamed to %r' % self.rename_to - if self.diff: - msg += '\n---' - msg += self.diff - msg += '\n---' - # END diff info - - return h + msg - - @property - def renamed(self): - """:returns: True if the blob of our diff has been renamed""" - return self.rename_from != self.rename_to - - @classmethod - def _index_from_patch_format(cls, repo, stream): - """Create a new DiffIndex from the given text which must be in patch format - :param repo: is the repository we are operating on - it is required - :param stream: result of 'git diff' as a stream (supporting file protocol) - :return: git.DiffIndex """ - # for now, we have to bake the stream - text = stream.read() - index = DiffIndex() - - diff_header = cls.re_header.match - for diff in ('\n' + text).split('\ndiff --git')[1:]: - header = diff_header(diff) - - a_path, b_path, similarity_index, rename_from, rename_to, \ - old_mode, new_mode, new_file_mode, deleted_file_mode, \ - a_blob_id, b_blob_id, b_mode = header.groups() - new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - - index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, - old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, - new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - - return index - - @classmethod - def _index_from_raw_format(cls, repo, stream): - """Create a new DiffIndex from the given stream which must be in raw format. - :note: - This format is inherently incapable of detecting renames, hence we only - modify, delete and add files - :return: git.DiffIndex""" - # handles - # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore - index = DiffIndex() - for line in stream: - if not line.startswith(":"): - continue - # END its not a valid diff line - old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split(None, 5) - path = path.strip() - a_path = path - b_path = path - deleted_file = False - new_file = False - - # NOTE: We cannot conclude from the existance of a blob to change type - # as diffs with the working do not have blobs yet - if change_type == 'D': - b_blob_id = None - deleted_file = True - elif change_type == 'A': - a_blob_id = None - new_file = True - # END add/remove handling - - diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, - new_file, deleted_file, None, None, '') - index.append(diff) - # END for each line - - return index - diff --git a/lib/git/exc.py b/lib/git/exc.py deleted file mode 100644 index d2cb8d7e..00000000 --- a/lib/git/exc.py +++ /dev/null @@ -1,58 +0,0 @@ -# exc.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" Module containing all exceptions thrown througout the git package, """ - -from gitdb.exc import * - -class InvalidGitRepositoryError(Exception): - """ Thrown if the given repository appears to have an invalid format. """ - - -class NoSuchPathError(OSError): - """ Thrown if a path could not be access by the system. """ - - -class GitCommandError(Exception): - """ Thrown if execution of the git command fails with non-zero status code. """ - def __init__(self, command, status, stderr=None): - self.stderr = stderr - self.status = status - self.command = command - - def __str__(self): - return ("'%s' returned exit status %i: %s" % - (' '.join(str(i) for i in self.command), self.status, self.stderr)) - - -class CheckoutError( Exception ): - """Thrown if a file could not be checked out from the index as it contained - changes. - - The .failed_files attribute contains a list of relative paths that failed - to be checked out as they contained changes that did not exist in the index. - - The .failed_reasons attribute contains a string informing about the actual - cause of the issue. - - The .valid_files attribute contains a list of relative paths to files that - were checked out successfully and hence match the version stored in the - index""" - def __init__(self, message, failed_files, valid_files, failed_reasons): - Exception.__init__(self, message) - self.failed_files = failed_files - self.failed_reasons = failed_reasons - self.valid_files = valid_files - - def __str__(self): - return Exception.__str__(self) + ":%s" % self.failed_files - - -class CacheError(Exception): - """Base for all errors related to the git index, which is called cache internally""" - -class UnmergedEntriesError(CacheError): - """Thrown if an operation cannot proceed as there are still unmerged - entries in the cache""" diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb deleted file mode 160000 -Subproject 1bc281d31b8d31fd4dcbcd9b441b5c7b2c1b0bb diff --git a/lib/git/index/__init__.py b/lib/git/index/__init__.py deleted file mode 100644 index fe4a7f59..00000000 --- a/lib/git/index/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Initialize the index package""" - -from base import * -from typ import *
\ No newline at end of file diff --git a/lib/git/index/base.py b/lib/git/index/base.py deleted file mode 100644 index 05caa06d..00000000 --- a/lib/git/index/base.py +++ /dev/null @@ -1,1143 +0,0 @@ -# index.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module containing Index implementation, allowing to perform all kinds of index -manipulations such as querying and merging.""" -import tempfile -import os -import sys -import subprocess -import glob -from cStringIO import StringIO - -from stat import S_ISLNK - -from typ import ( - BaseIndexEntry, - IndexEntry, - ) - -from util import ( - TemporaryFileSwap, - post_clear_cache, - default_index, - git_working_dir - ) - -import git.objects -import git.diff as diff - -from git.exc import ( - GitCommandError, - CheckoutError - ) - -from git.objects import ( - Blob, - Submodule, - Tree, - Object, - Commit, - ) - -from git.objects.util import Serializable - -from git.util import ( - IndexFileSHA1Writer, - LazyMixin, - LockedFD, - join_path_native, - file_contents_ro, - to_native_path_linux, - to_native_path - ) - -from fun import ( - entry_key, - write_cache, - read_cache, - aggressive_tree_merge, - write_tree_from_cache, - stat_mode_to_index_mode, - S_IFGITLINK - ) - -from gitdb.base import IStream -from gitdb.db import MemoryDB -from gitdb.util import to_bin_sha -from itertools import izip - -__all__ = ( 'IndexFile', 'CheckoutError' ) - - -class IndexFile(LazyMixin, diff.Diffable, Serializable): - """ - Implements an Index that can be manipulated using a native implementation in - order to save git command function calls wherever possible. - - It provides custom merging facilities allowing to merge without actually changing - your index or your working tree. This way you can perform own test-merges based - on the index only without having to deal with the working copy. This is useful - in case of partial working trees. - - ``Entries`` - - The index contains an entries dict whose keys are tuples of type IndexEntry - to facilitate access. - - You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: - - index.entries[index.entry_key(index_entry_instance)] = index_entry_instance - - Make sure you use index.write() once you are done manipulating the index directly - before operating on it using the git command""" - __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") - _VERSION = 2 # latest version we support - S_IFGITLINK = S_IFGITLINK # a submodule - - def __init__(self, repo, file_path=None): - """Initialize this Index instance, optionally from the given ``file_path``. - If no file_path is given, we will be created from the current index file. - - If a stream is not given, the stream will be initialized from the current - repository's index on demand.""" - self.repo = repo - self.version = self._VERSION - self._extension_data = '' - self._file_path = file_path or self._index_path() - - def _set_cache_(self, attr): - if attr == "entries": - # read the current index - # try memory map for speed - lfd = LockedFD(self._file_path) - try: - fd = lfd.open(write=False, stream=False) - except OSError: - lfd.rollback() - # in new repositories, there may be no index, which means we are empty - self.entries = dict() - return - # END exception handling - - # Here it comes: on windows in python 2.5, memory maps aren't closed properly - # Hence we are in trouble if we try to delete a file that is memory mapped, - # which happens during read-tree. - # In this case, we will just read the memory in directly. - # Its insanely bad ... I am disappointed ! - allow_mmap = (os.name != 'nt' or sys.version_info[1] > 5) - stream = file_contents_ro(fd, stream=True, allow_mmap=allow_mmap) - - try: - self._deserialize(stream) - finally: - lfd.rollback() - # The handles will be closed on desctruction - # END read from default index on demand - else: - super(IndexFile, self)._set_cache_(attr) - - def _index_path(self): - return join_path_native(self.repo.git_dir, "index") - - @property - def path(self): - """ :return: Path to the index file we are representing """ - return self._file_path - - def _delete_entries_cache(self): - """Safely clear the entries cache so it can be recreated""" - try: - del(self.entries) - except AttributeError: - # fails in python 2.6.5 with this exception - pass - # END exception handling - - #{ Serializable Interface - - def _deserialize(self, stream): - """Initialize this instance with index values read from the given stream""" - self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) - return self - - def _entries_sorted(self): - """:return: list of entries, in a sorted fashion, first by path, then by stage""" - entries_sorted = self.entries.values() - entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key - return entries_sorted - - def _serialize(self, stream, ignore_tree_extension_data=False): - entries = self._entries_sorted() - write_cache(entries, - stream, - (ignore_tree_extension_data and None) or self._extension_data) - return self - - - #} END serializable interface - - def write(self, file_path = None, ignore_tree_extension_data=False): - """Write the current state to our file path or to the given one - - :param file_path: - If None, we will write to our stored file path from which we have - been initialized. Otherwise we write to the given file path. - Please note that this will change the file_path of this index to - the one you gave. - - :param ignore_tree_extension_data: - If True, the TREE type extension data read in the index will not - be written to disk. Use this if you have altered the index and - would like to use git-write-tree afterwards to create a tree - representing your written changes. - If this data is present in the written index, git-write-tree - will instead write the stored/cached tree. - Alternatively, use IndexFile.write_tree() to handle this case - automatically - - :return: self""" - # make sure we have our entries read before getting a write lock - # else it would be done when streaming. This can happen - # if one doesn't change the index, but writes it right away - self.entries - lfd = LockedFD(file_path or self._file_path) - stream = lfd.open(write=True, stream=True) - - self._serialize(stream, ignore_tree_extension_data) - - lfd.commit() - - # make sure we represent what we have written - if file_path is not None: - self._file_path = file_path - - @post_clear_cache - @default_index - def merge_tree(self, rhs, base=None): - """Merge the given rhs treeish into the current index, possibly taking - a common base treeish into account. - - As opposed to the from_tree_ method, this allows you to use an already - existing tree as the left side of the merge - - :param rhs: - treeish reference pointing to the 'other' side of the merge. - - :param base: - optional treeish reference pointing to the common base of 'rhs' and - this index which equals lhs - - :return: - self ( containing the merge and possibly unmerged entries in case of - conflicts ) - - :raise GitCommandError: - If there is a merge conflict. The error will - be raised at the first conflicting path. If you want to have proper - merge resolution to be done by yourself, you have to commit the changed - index ( or make a valid tree from it ) and retry with a three-way - index.from_tree call. """ - # -i : ignore working tree status - # --aggressive : handle more merge cases - # -m : do an actual merge - args = ["--aggressive", "-i", "-m"] - if base is not None: - args.append(base) - args.append(rhs) - - self.repo.git.read_tree(args) - return self - - @classmethod - def new(cls, repo, *tree_sha): - """ Merge the given treeish revisions into a new index which is returned. - This method behaves like git-read-tree --aggressive when doing the merge. - - :param repo: The repository treeish are located in. - - :param tree_sha: - 20 byte or 40 byte tree sha or tree objects - - :return: - New IndexFile instance. Its path will be undefined. - If you intend to write such a merged Index, supply an alternate file_path - to its 'write' method.""" - base_entries = aggressive_tree_merge(repo.odb, [to_bin_sha(str(t)) for t in tree_sha]) - - inst = cls(repo) - # convert to entries dict - entries = dict(izip(((e.path, e.stage) for e in base_entries), - (IndexEntry.from_base(e) for e in base_entries))) - - inst.entries = entries - return inst - - - @classmethod - def from_tree(cls, repo, *treeish, **kwargs): - """Merge the given treeish revisions into a new index which is returned. - The original index will remain unaltered - - :param repo: - The repository treeish are located in. - - :param treeish: - One, two or three Tree Objects, Commits or 40 byte hexshas. The result - changes according to the amount of trees. - If 1 Tree is given, it will just be read into a new index - If 2 Trees are given, they will be merged into a new index using a - two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' - one. It behaves like a fast-forward. - If 3 Trees are given, a 3-way merge will be performed with the first tree - being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, - tree 3 is the 'other' one - - :param kwargs: - Additional arguments passed to git-read-tree - - :return: - New IndexFile instance. It will point to a temporary index location which - does not exist anymore. If you intend to write such a merged Index, supply - an alternate file_path to its 'write' method. - - :note: - In the three-way merge case, --aggressive will be specified to automatically - resolve more cases in a commonly correct manner. Specify trivial=True as kwarg - to override that. - - As the underlying git-read-tree command takes into account the current index, - it will be temporarily moved out of the way to assure there are no unsuspected - interferences.""" - if len(treeish) == 0 or len(treeish) > 3: - raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) - - arg_list = list() - # ignore that working tree and index possibly are out of date - if len(treeish)>1: - # drop unmerged entries when reading our index and merging - arg_list.append("--reset") - # handle non-trivial cases the way a real merge does - arg_list.append("--aggressive") - # END merge handling - - # tmp file created in git home directory to be sure renaming - # works - /tmp/ dirs could be on another device - tmp_index = tempfile.mktemp('','',repo.git_dir) - arg_list.append("--index-output=%s" % tmp_index) - arg_list.extend(treeish) - - # move current index out of the way - otherwise the merge may fail - # as it considers existing entries. moving it essentially clears the index. - # Unfortunately there is no 'soft' way to do it. - # The TemporaryFileSwap assure the original file get put back - index_handler = TemporaryFileSwap(join_path_native(repo.git_dir, 'index')) - try: - repo.git.read_tree(*arg_list, **kwargs) - index = cls(repo, tmp_index) - index.entries # force it to read the file as we will delete the temp-file - del(index_handler) # release as soon as possible - finally: - if os.path.exists(tmp_index): - os.remove(tmp_index) - # END index merge handling - - return index - - # UTILITIES - def _iter_expand_paths(self, paths): - """Expand the directories in list of paths to the corresponding paths accordingly, - - Note: git will add items multiple times even if a glob overlapped - with manually specified paths or if paths where specified multiple - times - we respect that and do not prune""" - def raise_exc(e): - raise e - r = self.repo.working_tree_dir - rs = r + os.sep - for path in paths: - abs_path = path - if not os.path.isabs(abs_path): - abs_path = os.path.join(r, path) - # END make absolute path - - # resolve globs if possible - if '?' in path or '*' in path or '[' in path: - for f in self._iter_expand_paths(glob.glob(abs_path)): - yield f.replace(rs, '') - continue - # END glob handling - try: - for root, dirs, files in os.walk(abs_path, onerror=raise_exc): - for rela_file in files: - # add relative paths only - yield os.path.join(root.replace(rs, ''), rela_file) - # END for each file in subdir - # END for each subdirectory - except OSError: - # was a file or something that could not be iterated - yield path.replace(rs, '') - # END path exception handling - # END for each path - - def _write_path_to_stdin(self, proc, filepath, item, fmakeexc, fprogress, - read_from_stdout=True): - """Write path to proc.stdin and make sure it processes the item, including progress. - - :return: stdout string - :param read_from_stdout: if True, proc.stdout will be read after the item - was sent to stdin. In that case, it will return None - :note: There is a bug in git-update-index that prevents it from sending - reports just in time. This is why we have a version that tries to - read stdout and one which doesn't. In fact, the stdout is not - important as the piped-in files are processed anyway and just in time - :note: Newlines are essential here, gits behaviour is somewhat inconsistent - on this depending on the version, hence we try our best to deal with - newlines carefully. Usually the last newline will not be sent, instead - we will close stdin to break the pipe.""" - - fprogress(filepath, False, item) - rval = None - try: - proc.stdin.write("%s\n" % filepath) - except IOError: - # pipe broke, usually because some error happend - raise fmakeexc() - # END write exception handling - proc.stdin.flush() - if read_from_stdout: - rval = proc.stdout.readline().strip() - fprogress(filepath, True, item) - return rval - - def iter_blobs(self, predicate = lambda t: True): - """ - :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) - - :param predicate: - Function(t) returning True if tuple(stage, Blob) should be yielded by the - iterator. A default filter, the BlobFilter, allows you to yield blobs - only if they match a given list of paths. """ - for entry in self.entries.itervalues(): - # TODO: is it necessary to convert the mode ? We did that when adding - # it to the index, right ? - mode = stat_mode_to_index_mode(entry.mode) - blob = entry.to_blob(self.repo) - blob.size = entry.size - output = (entry.stage, blob) - if predicate(output): - yield output - # END for each entry - - def unmerged_blobs(self): - """ - :return: - Iterator yielding dict(path : list( tuple( stage, Blob, ...))), being - a dictionary associating a path in the index with a list containing - sorted stage/blob pairs - - :note: - Blobs that have been removed in one side simply do not exist in the - given stage. I.e. a file removed on the 'other' branch whose entries - are at stage 3 will not have a stage 3 entry. - """ - is_unmerged_blob = lambda t: t[0] != 0 - path_map = dict() - for stage, blob in self.iter_blobs(is_unmerged_blob): - path_map.setdefault(blob.path, list()).append((stage, blob)) - # END for each unmerged blob - for l in path_map.itervalues(): - l.sort() - return path_map - - @classmethod - def entry_key(cls, *entry): - return entry_key(*entry) - - def resolve_blobs(self, iter_blobs): - """Resolve the blobs given in blob iterator. This will effectively remove the - index entries of the respective path at all non-null stages and add the given - blob as new stage null blob. - - For each path there may only be one blob, otherwise a ValueError will be raised - claiming the path is already at stage 0. - - :raise ValueError: if one of the blobs already existed at stage 0 - :return: self - - :note: - You will have to write the index manually once you are done, i.e. - index.resolve_blobs(blobs).write() - """ - for blob in iter_blobs: - stage_null_key = (blob.path, 0) - if stage_null_key in self.entries: - raise ValueError( "Path %r already exists at stage 0" % blob.path ) - # END assert blob is not stage 0 already - - # delete all possible stages - for stage in (1, 2, 3): - try: - del( self.entries[(blob.path, stage)]) - except KeyError: - pass - # END ignore key errors - # END for each possible stage - - self.entries[stage_null_key] = IndexEntry.from_blob(blob) - # END for each blob - - return self - - def update(self): - """Reread the contents of our index file, discarding all cached information - we might have. - - :note: This is a possibly dangerious operations as it will discard your changes - to index.entries - :return: self""" - self._delete_entries_cache() - # allows to lazily reread on demand - return self - - def write_tree(self): - """Writes this index to a corresponding Tree object into the repository's - object database and return it. - - :return: Tree object representing this index - :note: The tree will be written even if one or more objects the tree refers to - does not yet exist in the object database. This could happen if you added - Entries to the index directly. - :raise ValueError: if there are no entries in the cache - :raise UnmergedEntriesError: """ - # we obtain no lock as we just flush our contents to disk as tree - # If we are a new index, the entries access will load our data accordingly - mdb = MemoryDB() - entries = self._entries_sorted() - binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) - - # copy changed trees only - mdb.stream_copy(mdb.sha_iter(), self.repo.odb) - - - # note: additional deserialization could be saved if write_tree_from_cache - # would return sorted tree entries - root_tree = Tree(self.repo, binsha, path='') - root_tree._cache = tree_items - return root_tree - - def _process_diff_args(self, args): - try: - args.pop(args.index(self)) - except IndexError: - pass - # END remove self - return args - - def _to_relative_path(self, path): - """:return: Version of path relative to our git directory or raise ValueError - if it is not within our git direcotory""" - if not os.path.isabs(path): - return path - relative_path = path.replace(self.repo.working_tree_dir+os.sep, "") - if relative_path == path: - raise ValueError("Absolute path %r is not in git repository at %r" % (path,self.repo.working_tree_dir)) - return relative_path - - def _preprocess_add_items(self, items): - """ Split the items into two lists of path strings and BaseEntries. """ - paths = list() - entries = list() - - for item in items: - if isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - elif isinstance(item, (Blob, Submodule)): - entries.append(BaseIndexEntry.from_blob(item)) - elif isinstance(item, BaseIndexEntry): - entries.append(item) - else: - raise TypeError("Invalid Type: %r" % item) - # END for each item - return (paths, entries) - - @git_working_dir - def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=None, - write=True): - """Add files from the working tree, specific blobs or BaseIndexEntries - to the index. - - :param items: - Multiple types of items are supported, types can be mixed within one call. - Different types imply a different handling. File paths may generally be - relative or absolute. - - - path string - strings denote a relative or absolute path into the repository pointing to - an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. - - Paths provided like this must exist. When added, they will be written - into the object database. - - PathStrings may contain globs, such as 'lib/__init__*' or can be directories - like 'lib', the latter ones will add all the files within the dirctory and - subdirectories. - - This equals a straight git-add. - - They are added at stage 0 - - - Blob or Submodule object - Blobs are added as they are assuming a valid mode is set. - The file they refer to may or may not exist in the file system, but - must be a path relative to our repository. - - If their sha is null ( 40*0 ), their path must exist in the file system - relative to the git repository as an object will be created from - the data at the path. - The handling now very much equals the way string paths are processed, except that - the mode you have set will be kept. This allows you to create symlinks - by settings the mode respectively and writing the target of the symlink - directly into the file. This equals a default Linux-Symlink which - is not dereferenced automatically, except that it can be created on - filesystems not supporting it as well. - - Please note that globs or directories are not allowed in Blob objects. - - They are added at stage 0 - - - BaseIndexEntry or type - Handling equals the one of Blob objects, but the stage may be - explicitly set. Please note that Index Entries require binary sha's. - - :param force: - **CURRENTLY INEFFECTIVE** - If True, otherwise ignored or excluded files will be - added anyway. - As opposed to the git-add command, we enable this flag by default - as the API user usually wants the item to be added even though - they might be excluded. - - :param fprogress: - Function with signature f(path, done=False, item=item) called for each - path to be added, one time once it is about to be added where done==False - and once after it was added where done=True. - item is set to the actual item we handle, either a Path or a BaseIndexEntry - Please note that the processed path is not guaranteed to be present - in the index already as the index is currently being processed. - - :param path_rewriter: - Function with signature (string) func(BaseIndexEntry) function returning a path - for each passed entry which is the path to be actually recorded for the - object created from entry.path. This allows you to write an index which - is not identical to the layout of the actual files on your hard-dist. - If not None and ``items`` contain plain paths, these paths will be - converted to Entries beforehand and passed to the path_rewriter. - Please note that entry.path is relative to the git repository. - - :param write: - If True, the index will be written once it was altered. Otherwise - the changes only exist in memory and are not available to git commands. - - :return: - List(BaseIndexEntries) representing the entries just actually added. - - :raise OSError: - if a supplied Path did not exist. Please note that BaseIndexEntry - Objects that do not have a null sha will be added even if their paths - do not exist. - """ - # sort the entries into strings and Entries, Blobs are converted to entries - # automatically - # paths can be git-added, for everything else we use git-update-index - entries_added = list() - paths, entries = self._preprocess_add_items(items) - if paths and path_rewriter: - for path in paths: - abspath = os.path.abspath(path) - gitrelative_path = abspath[len(self.repo.working_tree_dir)+1:] - blob = Blob(self.repo, Blob.NULL_BIN_SHA, - stat_mode_to_index_mode(os.stat(abspath).st_mode), - to_native_path_linux(gitrelative_path)) - entries.append(BaseIndexEntry.from_blob(blob)) - # END for each path - del(paths[:]) - # END rewrite paths - - - def store_path(filepath): - """Store file at filepath in the database and return the base index entry""" - st = os.lstat(filepath) # handles non-symlinks as well - stream = None - if S_ISLNK(st.st_mode): - stream = StringIO(os.readlink(filepath)) - else: - stream = open(filepath, 'rb') - # END handle stream - fprogress(filepath, False, filepath) - istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) - fprogress(filepath, True, filepath) - return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), - istream.binsha, 0, to_native_path_linux(filepath))) - # END utility method - - - # HANDLE PATHS - if paths: - assert len(entries_added) == 0 - added_files = list() - for filepath in self._iter_expand_paths(paths): - entries_added.append(store_path(filepath)) - # END for each filepath - # END path handling - - - # HANDLE ENTRIES - if entries: - null_mode_entries = [ e for e in entries if e.mode == 0 ] - if null_mode_entries: - raise ValueError("At least one Entry has a null-mode - please use index.remove to remove files for clarity") - # END null mode should be remove - - # HANLDE ENTRY OBJECT CREATION - # create objects if required, otherwise go with the existing shas - null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ] - if null_entries_indices: - for ei in null_entries_indices: - null_entry = entries[ei] - new_entry = store_path(null_entry.path) - - # update null entry - entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) - # END for each entry index - # END null_entry handling - - # REWRITE PATHS - # If we have to rewrite the entries, do so now, after we have generated - # all object sha's - if path_rewriter: - for i,e in enumerate(entries): - entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) - # END for each entry - # END handle path rewriting - - # just go through the remaining entries and provide progress info - for i, entry in enumerate(entries): - progress_sent = i in null_entries_indices - if not progress_sent: - fprogress(entry.path, False, entry) - fprogress(entry.path, True, entry) - # END handle progress - # END for each enty - entries_added.extend(entries) - # END if there are base entries - - # FINALIZE - # add the new entries to this instance - for entry in entries_added: - self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) - - if write: - self.write() - # END handle write - - return entries_added - - def _items_to_rela_paths(self, items): - """Returns a list of repo-relative paths from the given items which - may be absolute or relative paths, entries or blobs""" - paths = list() - for item in items: - if isinstance(item, (BaseIndexEntry,(Blob, Submodule))): - paths.append(self._to_relative_path(item.path)) - elif isinstance(item, basestring): - paths.append(self._to_relative_path(item)) - else: - raise TypeError("Invalid item type: %r" % item) - # END for each item - return paths - - @post_clear_cache - @default_index - def remove(self, items, working_tree=False, **kwargs): - """Remove the given items from the index and optionally from - the working tree as well. - - :param items: - Multiple types of items are supported which may be be freely mixed. - - - path string - Remove the given path at all stages. If it is a directory, you must - specify the r=True keyword argument to remove all file entries - below it. If absolute paths are given, they will be converted - to a path relative to the git repository directory containing - the working tree - - The path string may include globs, such as *.c. - - - Blob Object - Only the path portion is used in this case. - - - BaseIndexEntry or compatible type - The only relevant information here Yis the path. The stage is ignored. - - :param working_tree: - If True, the entry will also be removed from the working tree, physically - removing the respective file. This may fail if there are uncommited changes - in it. - - :param kwargs: - Additional keyword arguments to be passed to git-rm, such - as 'r' to allow recurive removal of - - :return: - List(path_string, ...) list of repository relative paths that have - been removed effectively. - This is interesting to know in case you have provided a directory or - globs. Paths are relative to the repository. """ - args = list() - if not working_tree: - args.append("--cached") - args.append("--") - - # preprocess paths - paths = self._items_to_rela_paths(items) - removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() - - # process output to gain proper paths - # rm 'path' - return [ p[4:-1] for p in removed_paths ] - - @post_clear_cache - @default_index - def move(self, items, skip_errors=False, **kwargs): - """Rename/move the items, whereas the last item is considered the destination of - the move operation. If the destination is a file, the first item ( of two ) - must be a file as well. If the destination is a directory, it may be preceeded - by one or more directories or files. - - The working tree will be affected in non-bare repositories. - - :parma items: - Multiple types of items are supported, please see the 'remove' method - for reference. - :param skip_errors: - If True, errors such as ones resulting from missing source files will - be skpped. - :param kwargs: - Additional arguments you would like to pass to git-mv, such as dry_run - or force. - - :return:List(tuple(source_path_string, destination_path_string), ...) - A list of pairs, containing the source file moved as well as its - actual destination. Relative to the repository root. - - :raise ValueErorr: If only one item was given - GitCommandError: If git could not handle your request""" - args = list() - if skip_errors: - args.append('-k') - - paths = self._items_to_rela_paths(items) - if len(paths) < 2: - raise ValueError("Please provide at least one source and one destination of the move operation") - - was_dry_run = kwargs.pop('dry_run', kwargs.pop('n', None)) - kwargs['dry_run'] = True - - # first execute rename in dryrun so the command tells us what it actually does - # ( for later output ) - out = list() - mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() - - # parse result - first 0:n/2 lines are 'checking ', the remaining ones - # are the 'renaming' ones which we parse - for ln in xrange(len(mvlines)/2, len(mvlines)): - tokens = mvlines[ln].split(' to ') - assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] - - # [0] = Renaming x - # [1] = y - out.append((tokens[0][9:], tokens[1])) - # END for each line to parse - - # either prepare for the real run, or output the dry-run result - if was_dry_run: - return out - # END handle dryrun - - - # now apply the actual operation - kwargs.pop('dry_run') - self.repo.git.mv(args, paths, **kwargs) - - return out - - def commit(self, message, parent_commits=None, head=True): - """Commit the current default index file, creating a commit object. - - For more information on the arguments, see tree.commit. - :note: - If you have manually altered the .entries member of this instance, - don't forget to write() your changes to disk beforehand. - - :return: - Commit object representing the new commit""" - tree = self.write_tree() - return Commit.create_from_tree(self.repo, tree, message, parent_commits, head) - - @classmethod - def _flush_stdin_and_wait(cls, proc, ignore_stdout = False): - proc.stdin.flush() - proc.stdin.close() - stdout = '' - if not ignore_stdout: - stdout = proc.stdout.read() - proc.stdout.close() - proc.wait() - return stdout - - @default_index - def checkout(self, paths=None, force=False, fprogress=lambda *args: None, **kwargs): - """Checkout the given paths or all files from the version known to the index into - the working tree. - - :note: Be sure you have written pending changes using the ``write`` method - in case you have altered the enties dictionary directly - - :param paths: - If None, all paths in the index will be checked out. Otherwise an iterable - of relative or absolute paths or a single path pointing to files or directories - in the index is expected. - - :param force: - If True, existing files will be overwritten even if they contain local modifications. - If False, these will trigger a CheckoutError. - - :param fprogress: - see Index.add_ for signature and explanation. - The provided progress information will contain None as path and item if no - explicit paths are given. Otherwise progress information will be send - prior and after a file has been checked out - - :param kwargs: - Additional arguments to be pasesd to git-checkout-index - - :return: - iterable yielding paths to files which have been checked out and are - guaranteed to match the version stored in the index - - :raise CheckoutError: - If at least one file failed to be checked out. This is a summary, - hence it will checkout as many files as it can anyway. - If one of files or directories do not exist in the index - ( as opposed to the original git command who ignores them ). - Raise GitCommandError if error lines could not be parsed - this truly is - an exceptional state""" - args = ["--index"] - if force: - args.append("--force") - - def handle_stderr(proc, iter_checked_out_files): - stderr = proc.stderr.read() - if not stderr: - return - # line contents: - # git-checkout-index: this already exists - failed_files = list() - failed_reasons = list() - unknown_lines = list() - endings = (' already exists', ' is not in the cache', ' does not exist at stage', ' is unmerged') - for line in stderr.splitlines(): - if not line.startswith("git checkout-index: ") and not line.startswith("git-checkout-index: "): - is_a_dir = " is a directory" - unlink_issue = "unable to unlink old '" - if line.endswith(is_a_dir): - failed_files.append(line[:-len(is_a_dir)]) - failed_reasons.append(is_a_dir) - elif line.startswith(unlink_issue): - failed_files.append(line[len(unlink_issue):line.rfind("'")]) - failed_reasons.append(unlink_issue) - else: - unknown_lines.append(line) - continue - # END special lines parsing - - for e in endings: - if line.endswith(e): - failed_files.append(line[20:-len(e)]) - failed_reasons.append(e) - break - # END if ending matches - # END for each possible ending - # END for each line - if unknown_lines: - raise GitCommandError(("git-checkout-index", ), 128, stderr) - if failed_files: - valid_files = list(set(iter_checked_out_files) - set(failed_files)) - raise CheckoutError("Some files could not be checked out from the index due to local modifications", failed_files, valid_files, failed_reasons) - # END stderr handler - - - if paths is None: - args.append("--all") - kwargs['as_process'] = 1 - fprogress(None, False, None) - proc = self.repo.git.checkout_index(*args, **kwargs) - proc.wait() - fprogress(None, True, None) - rval_iter = ( e.path for e in self.entries.itervalues() ) - handle_stderr(proc, rval_iter) - return rval_iter - else: - if isinstance(paths, basestring): - paths = [paths] - - # make sure we have our entries loaded before we start checkout_index - # which will hold a lock on it. We try to get the lock as well during - # our entries initialization - self.entries - - args.append("--stdin") - kwargs['as_process'] = True - kwargs['istream'] = subprocess.PIPE - proc = self.repo.git.checkout_index(args, **kwargs) - make_exc = lambda : GitCommandError(("git-checkout-index",)+tuple(args), 128, proc.stderr.read()) - checked_out_files = list() - - for path in paths: - co_path = to_native_path_linux(self._to_relative_path(path)) - # if the item is not in the index, it could be a directory - path_is_directory = False - - try: - self.entries[(co_path, 0)] - except KeyError: - dir = co_path - if not dir.endswith('/'): - dir += '/' - for entry in self.entries.itervalues(): - if entry.path.startswith(dir): - p = entry.path - self._write_path_to_stdin(proc, p, p, make_exc, - fprogress, read_from_stdout=False) - checked_out_files.append(p) - path_is_directory = True - # END if entry is in directory - # END for each entry - # END path exception handlnig - - if not path_is_directory: - self._write_path_to_stdin(proc, co_path, path, make_exc, - fprogress, read_from_stdout=False) - checked_out_files.append(co_path) - # END path is a file - # END for each path - self._flush_stdin_and_wait(proc, ignore_stdout=True) - - handle_stderr(proc, checked_out_files) - return checked_out_files - # END paths handling - assert "Should not reach this point" - - @default_index - def reset(self, commit='HEAD', working_tree=False, paths=None, head=False, **kwargs): - """Reset the index to reflect the tree at the given commit. This will not - adjust our HEAD reference as opposed to HEAD.reset by default. - - :param commit: - Revision, Reference or Commit specifying the commit we should represent. - If you want to specify a tree only, use IndexFile.from_tree and overwrite - the default index. - - :param working_tree: - If True, the files in the working tree will reflect the changed index. - If False, the working tree will not be touched - Please note that changes to the working copy will be discarded without - warning ! - - :param head: - If True, the head will be set to the given commit. This is False by default, - but if True, this method behaves like HEAD.reset. - - :param paths: if given as an iterable of absolute or repository-relative paths, - only these will be reset to their state at the given commit'ish. - The paths need to exist at the commit, otherwise an exception will be - raised. - - :param kwargs: - Additional keyword arguments passed to git-reset - - :return: self """ - # what we actually want to do is to merge the tree into our existing - # index, which is what git-read-tree does - new_inst = type(self).from_tree(self.repo, commit) - if not paths: - self.entries = new_inst.entries - else: - nie = new_inst.entries - for path in paths: - path = self._to_relative_path(path) - try: - key = entry_key(path, 0) - self.entries[key] = nie[key] - except KeyError: - # if key is not in theirs, it musn't be in ours - try: - del(self.entries[key]) - except KeyError: - pass - # END handle deletion keyerror - # END handle keyerror - # END for each path - # END handle paths - self.write() - - if working_tree: - self.checkout(paths=paths, force=True) - # END handle working tree - - if head: - self.repo.head.commit = self.repo.commit(commit) - # END handle head change - - return self - - @default_index - def diff(self, other=diff.Diffable.Index, paths=None, create_patch=False, **kwargs): - """Diff this index against the working copy or a Tree or Commit object - - For a documentation of the parameters and return values, see - Diffable.diff - - :note: - Will only work with indices that represent the default git index as - they have not been initialized with a stream. - """ - # index against index is always empty - if other is self.Index: - return diff.DiffIndex() - - # index against anything but None is a reverse diff with the respective - # item. Handle existing -R flags properly. Transform strings to the object - # so that we can call diff on it - if isinstance(other, basestring): - other = self.repo.rev_parse(other) - # END object conversion - - if isinstance(other, Object): - # invert the existing R flag - cur_val = kwargs.get('R', False) - kwargs['R'] = not cur_val - return other.diff(self.Index, paths, create_patch, **kwargs) - # END diff against other item handlin - - # if other is not None here, something is wrong - if other is not None: - raise ValueError( "other must be None, Diffable.Index, a Tree or Commit, was %r" % other ) - - # diff against working copy - can be handled by superclass natively - return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) - diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py deleted file mode 100644 index 87fdf1a9..00000000 --- a/lib/git/index/fun.py +++ /dev/null @@ -1,323 +0,0 @@ -""" -Contains standalone functions to accompany the index implementation and make it -more versatile -""" -from stat import ( - S_IFDIR, - S_IFLNK, - S_ISLNK, - S_IFDIR, - S_ISDIR, - S_IFMT, - S_IFREG, - ) - -S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule - -from cStringIO import StringIO - -from git.util import IndexFileSHA1Writer -from git.exc import UnmergedEntriesError -from git.objects.fun import ( - tree_to_stream, - traverse_tree_recursive, - traverse_trees_recursive - ) - -from typ import ( - BaseIndexEntry, - IndexEntry, - CE_NAMEMASK, - CE_STAGESHIFT - ) -CE_NAMEMASK_INV = ~CE_NAMEMASK - -from util import ( - pack, - unpack - ) - -from gitdb.base import IStream -from gitdb.typ import str_tree_type - -__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key', - 'stat_mode_to_index_mode', 'S_IFGITLINK') - - -def stat_mode_to_index_mode(mode): - """Convert the given mode from a stat call to the corresponding index mode - and return it""" - if S_ISLNK(mode): # symlinks - return S_IFLNK - if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules - return S_IFGITLINK - return S_IFREG | 0644 | (mode & 0100) # blobs with or without executable bit - - -def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1Writer): - """Write the cache represented by entries to a stream - - :param entries: **sorted** list of entries - :param stream: stream to wrap into the AdapterStreamCls - it is used for - final output. - - :param ShaStreamCls: Type to use when writing to the stream. It produces a sha - while writing to it, before the data is passed on to the wrapped stream - - :param extension_data: any kind of data to write as a trailer, it must begin - a 4 byte identifier, followed by its size ( 4 bytes )""" - # wrap the stream into a compatible writer - stream = ShaStreamCls(stream) - - tell = stream.tell - write = stream.write - - # header - version = 2 - write("DIRC") - write(pack(">LL", version, len(entries))) - - # body - for entry in entries: - beginoffset = tell() - write(entry[4]) # ctime - write(entry[5]) # mtime - path = entry[3] - plen = len(path) & CE_NAMEMASK # path length - assert plen == len(path), "Path %s too long to fit into index" % entry[3] - flags = plen | (entry[2] & CE_NAMEMASK_INV) # clear possible previous values - write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], - entry[8], entry[9], entry[10], entry[1], flags)) - write(path) - real_size = ((tell() - beginoffset + 8) & ~7) - write("\0" * ((beginoffset + real_size) - tell())) - # END for each entry - - # write previously cached extensions data - if extension_data is not None: - stream.write(extension_data) - - # write the sha over the content - stream.write_sha() - -def read_header(stream): - """Return tuple(version_long, num_entries) from the given stream""" - type_id = stream.read(4) - if type_id != "DIRC": - raise AssertionError("Invalid index file header: %r" % type_id) - version, num_entries = unpack(">LL", stream.read(4 * 2)) - - # TODO: handle version 3: extended data, see read-cache.c - assert version in (1, 2) - return version, num_entries - -def entry_key(*entry): - """:return: Key suitable to be used for the index.entries dictionary - :param entry: One instance of type BaseIndexEntry or the path and the stage""" - if len(entry) == 1: - return (entry[0].path, entry[0].stage) - else: - return tuple(entry) - # END handle entry - -def read_cache(stream): - """Read a cache file from the given stream - :return: tuple(version, entries_dict, extension_data, content_sha) - * version is the integer version number - * entries dict is a dictionary which maps IndexEntry instances to a path - at a stage - * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes - * content_sha is a 20 byte sha on all cache file contents""" - version, num_entries = read_header(stream) - count = 0 - entries = dict() - - read = stream.read - tell = stream.tell - while count < num_entries: - beginoffset = tell() - ctime = unpack(">8s", read(8))[0] - mtime = unpack(">8s", read(8))[0] - (dev, ino, mode, uid, gid, size, sha, flags) = \ - unpack(">LLLLLL20sH", read(20 + 4 * 6 + 2)) - path_size = flags & CE_NAMEMASK - path = read(path_size) - - real_size = ((tell() - beginoffset + 8) & ~7) - data = read((beginoffset + real_size) - tell()) - entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) - # entry_key would be the method to use, but we safe the effort - entries[(path, entry.stage)] = entry - count += 1 - # END for each entry - - # the footer contains extension data and a sha on the content so far - # Keep the extension footer,and verify we have a sha in the end - # Extension data format is: - # 4 bytes ID - # 4 bytes length of chunk - # repeated 0 - N times - extension_data = stream.read(~0) - assert len(extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) - - content_sha = extension_data[-20:] - - # truncate the sha in the end as we will dynamically create it anyway - extension_data = extension_data[:-20] - - return (version, entries, extension_data, content_sha) - -def write_tree_from_cache(entries, odb, sl, si=0): - """Create a tree from the given sorted list of entries and put the respective - trees into the given object database - - :param entries: **sorted** list of IndexEntries - :param odb: object database to store the trees in - :param si: start index at which we should start creating subtrees - :param sl: slice indicating the range we should process on the entries list - :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of - tree entries being a tuple of hexsha, mode, name""" - tree_items = list() - tree_items_append = tree_items.append - ci = sl.start - end = sl.stop - while ci < end: - entry = entries[ci] - if entry.stage != 0: - raise UnmergedEntriesError(entry) - # END abort on unmerged - ci += 1 - rbound = entry.path.find('/', si) - if rbound == -1: - # its not a tree - tree_items_append((entry.binsha, entry.mode, entry.path[si:])) - else: - # find common base range - base = entry.path[si:rbound] - xi = ci - while xi < end: - oentry = entries[xi] - orbound = oentry.path.find('/', si) - if orbound == -1 or oentry.path[si:orbound] != base: - break - # END abort on base mismatch - xi += 1 - # END find common base - - # enter recursion - # ci - 1 as we want to count our current item as well - sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1) - tree_items_append((sha, S_IFDIR, base)) - - # skip ahead - ci = xi - # END handle bounds - # END for each entry - - # finally create the tree - sio = StringIO() - tree_to_stream(tree_items, sio.write) - sio.seek(0) - - istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) - return (istream.binsha, tree_items) - -def _tree_entry_to_baseindexentry(tree_entry, stage): - return BaseIndexEntry((tree_entry[1], tree_entry[0], stage <<CE_STAGESHIFT, tree_entry[2])) - -def aggressive_tree_merge(odb, tree_shas): - """ - :return: list of BaseIndexEntries representing the aggressive merge of the given - trees. All valid entries are on stage 0, whereas the conflicting ones are left - on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree, - 2 to our tree and 3 to 'their' tree. - :param tree_shas: 1, 2 or 3 trees as identified by their binary 20 byte shas - If 1 or two, the entries will effectively correspond to the last given tree - If 3 are given, a 3 way merge is performed""" - out = list() - out_append = out.append - - # one and two way is the same for us, as we don't have to handle an existing - # index, instrea - if len(tree_shas) in (1,2): - for entry in traverse_tree_recursive(odb, tree_shas[-1], ''): - out_append(_tree_entry_to_baseindexentry(entry, 0)) - # END for each entry - return out - # END handle single tree - - if len(tree_shas) > 3: - raise ValueError("Cannot handle %i trees at once" % len(tree_shas)) - - # three trees - for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ''): - if base is not None: - # base version exists - if ours is not None: - # ours exists - if theirs is not None: - # it exists in all branches, if it was changed in both - # its a conflict, otherwise we take the changed version - # This should be the most common branch, so it comes first - if( base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0] ) or \ - ( base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1] ): - # changed by both - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(ours, 2)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - elif base[0] != ours[0] or base[1] != ours[1]: - # only we changed it - out_append(_tree_entry_to_baseindexentry(ours, 0)) - else: - # either nobody changed it, or they did. In either - # case, use theirs - out_append(_tree_entry_to_baseindexentry(theirs, 0)) - # END handle modification - else: - - if ours[0] != base[0] or ours[1] != base[1]: - # they deleted it, we changed it, conflict - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(ours, 2)) - # else: - # we didn't change it, ignore - # pass - # END handle our change - # END handle theirs - else: - if theirs is None: - # deleted in both, its fine - its out - pass - else: - if theirs[0] != base[0] or theirs[1] != base[1]: - # deleted in ours, changed theirs, conflict - out_append(_tree_entry_to_baseindexentry(base, 1)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - # END theirs changed - #else: - # theirs didnt change - # pass - # END handle theirs - # END handle ours - else: - # all three can't be None - if ours is None: - # added in their branch - out_append(_tree_entry_to_baseindexentry(theirs, 0)) - elif theirs is None: - # added in our branch - out_append(_tree_entry_to_baseindexentry(ours, 0)) - else: - # both have it, except for the base, see whether it changed - if ours[0] != theirs[0] or ours[1] != theirs[1]: - out_append(_tree_entry_to_baseindexentry(ours, 2)) - out_append(_tree_entry_to_baseindexentry(theirs, 3)) - else: - # it was added the same in both - out_append(_tree_entry_to_baseindexentry(ours, 0)) - # END handle two items - # END handle heads - # END handle base exists - # END for each entries tuple - - return out diff --git a/lib/git/index/typ.py b/lib/git/index/typ.py deleted file mode 100644 index ad988285..00000000 --- a/lib/git/index/typ.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Module with additional types used by the index""" - -from util import ( - pack, - unpack - ) - -from binascii import ( - b2a_hex, - ) - -from git.objects import Blob -__all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry') - -#{ Invariants -CE_NAMEMASK = 0x0fff -CE_STAGEMASK = 0x3000 -CE_EXTENDED = 0x4000 -CE_VALID = 0x8000 -CE_STAGESHIFT = 12 - -#} END invariants - -class BlobFilter(object): - """ - Predicate to be used by iter_blobs allowing to filter only return blobs which - match the given list of directories or files. - - The given paths are given relative to the repository. - """ - __slots__ = 'paths' - - def __init__(self, paths): - """:param paths: - tuple or list of paths which are either pointing to directories or - to files relative to the current repository - """ - self.paths = paths - - def __call__(self, stage_blob): - path = stage_blob[1].path - for p in self.paths: - if path.startswith(p): - return True - # END for each path in filter paths - return False - - -class BaseIndexEntry(tuple): - """Small Brother of an index entry which can be created to describe changes - done to the index in which case plenty of additional information is not requried. - - As the first 4 data members match exactly to the IndexEntry type, methods - expecting a BaseIndexEntry can also handle full IndexEntries even if they - use numeric indices for performance reasons. """ - - def __str__(self): - return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path) - - def __repr__(self): - return "(%o, %s, %i, %s)" % (self.mode, self.hexsha, self.stage, self.path) - - @property - def mode(self): - """ File Mode, compatible to stat module constants """ - return self[0] - - @property - def binsha(self): - """binary sha of the blob """ - return self[1] - - @property - def hexsha(self): - """hex version of our sha""" - return b2a_hex(self[1]) - - @property - def stage(self): - """Stage of the entry, either: - - * 0 = default stage - * 1 = stage before a merge or common ancestor entry in case of a 3 way merge - * 2 = stage of entries from the 'left' side of the merge - * 3 = stage of entries from the right side of the merge - - :note: For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html - """ - return (self[2] & CE_STAGEMASK) >> CE_STAGESHIFT - - @property - def path(self): - """:return: our path relative to the repository working tree root""" - return self[3] - - @property - def flags(self): - """:return: flags stored with this entry""" - return self[2] - - @classmethod - def from_blob(cls, blob, stage = 0): - """:return: Fully equipped BaseIndexEntry at the given stage""" - return cls((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path)) - - def to_blob(self, repo): - """:return: Blob using the information of this index entry""" - return Blob(repo, self.binsha, self.mode, self.path) - - -class IndexEntry(BaseIndexEntry): - """Allows convenient access to IndexEntry data without completely unpacking it. - - Attributes usully accessed often are cached in the tuple whereas others are - unpacked on demand. - - See the properties for a mapping between names and tuple indices. """ - @property - def ctime(self): - """ - :return: - Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the - file's creation time""" - return unpack(">LL", self[4]) - - @property - def mtime(self): - """See ctime property, but returns modification time """ - return unpack(">LL", self[5]) - - @property - def dev(self): - """ Device ID """ - return self[6] - - @property - def inode(self): - """ Inode ID """ - return self[7] - - @property - def uid(self): - """ User ID """ - return self[8] - - @property - def gid(self): - """ Group ID """ - return self[9] - - @property - def size(self): - """:return: Uncompressed size of the blob """ - return self[10] - - @classmethod - def from_base(cls, base): - """ - :return: - Minimal entry as created from the given BaseIndexEntry instance. - Missing values will be set to null-like values - - :param base: Instance of type BaseIndexEntry""" - time = pack(">LL", 0, 0) - return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) - - @classmethod - def from_blob(cls, blob, stage = 0): - """:return: Minimal entry resembling the given blob object""" - time = pack(">LL", 0, 0) - return IndexEntry((blob.mode, blob.binsha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) - - diff --git a/lib/git/index/util.py b/lib/git/index/util.py deleted file mode 100644 index bd5fcc03..00000000 --- a/lib/git/index/util.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Module containing index utilities""" -import struct -import tempfile -import os - -__all__ = ( 'TemporaryFileSwap', 'post_clear_cache', 'default_index', 'git_working_dir' ) - -#{ Aliases -pack = struct.pack -unpack = struct.unpack - - -#} END aliases - -class TemporaryFileSwap(object): - """Utility class moving a file to a temporary location within the same directory - and moving it back on to where on object deletion.""" - __slots__ = ("file_path", "tmp_file_path") - - def __init__(self, file_path): - self.file_path = file_path - self.tmp_file_path = self.file_path + tempfile.mktemp('','','') - # it may be that the source does not exist - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass - - def __del__(self): - if os.path.isfile(self.tmp_file_path): - if os.name == 'nt' and os.path.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - # END temp file exists - - -#{ Decorators - -def post_clear_cache(func): - """Decorator for functions that alter the index using the git command. This would - invalidate our possibly existing entries dictionary which is why it must be - deleted to allow it to be lazily reread later. - - :note: - This decorator will not be required once all functions are implemented - natively which in fact is possible, but probably not feasible performance wise. - """ - def post_clear_cache_if_not_raised(self, *args, **kwargs): - rval = func(self, *args, **kwargs) - self._delete_entries_cache() - return rval - - # END wrapper method - post_clear_cache_if_not_raised.__name__ = func.__name__ - return post_clear_cache_if_not_raised - -def default_index(func): - """Decorator assuring the wrapped method may only run if we are the default - repository index. This is as we rely on git commands that operate - on that index only. """ - def check_default_index(self, *args, **kwargs): - if self._file_path != self._index_path(): - raise AssertionError( "Cannot call %r on indices that do not represent the default git index" % func.__name__ ) - return func(self, *args, **kwargs) - # END wrpaper method - - check_default_index.__name__ = func.__name__ - return check_default_index - -def git_working_dir(func): - """Decorator which changes the current working dir to the one of the git - repository in order to assure relative paths are handled correctly""" - def set_git_working_dir(self, *args, **kwargs): - cur_wd = os.getcwd() - os.chdir(self.repo.working_tree_dir) - try: - return func(self, *args, **kwargs) - finally: - os.chdir(cur_wd) - # END handle working dir - # END wrapper - - set_git_working_dir.__name__ = func.__name__ - return set_git_working_dir - -#} END decorators diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py deleted file mode 100644 index e8e0ef39..00000000 --- a/lib/git/objects/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Import all submodules main classes into the package space -""" -import inspect -from base import * -# Fix import dependency - add IndexObject to the util module, so that it can be -# imported by the submodule.base -import submodule.util -submodule.util.IndexObject = IndexObject -from submodule.base import * -from submodule.root import * - -# must come after submodule was made available -from tag import * -from blob import * -from commit import * -from tree import * -from util import Actor - -__all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py deleted file mode 100644 index b8cec47f..00000000 --- a/lib/git/objects/base.py +++ /dev/null @@ -1,168 +0,0 @@ -# base.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import LazyMixin, join_path_native, stream_copy -from util import get_object_type_by_name -from gitdb.util import ( - hex_to_bin, - bin_to_hex, - basename - ) - -import gitdb.typ as dbtyp - -_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" - -__all__ = ("Object", "IndexObject") - -class Object(LazyMixin): - """Implements an Object which may be Blobs, Trees, Commits and Tags""" - NULL_HEX_SHA = '0'*40 - NULL_BIN_SHA = '\0'*20 - - TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) - __slots__ = ("repo", "binsha", "size" ) - type = None # to be set by subclass - - def __init__(self, repo, binsha): - """Initialize an object by identifying it by its binary sha. - All keyword arguments will be set on demand if None. - - :param repo: repository this object is located in - - :param binsha: 20 byte SHA1""" - super(Object,self).__init__() - self.repo = repo - self.binsha = binsha - assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) - - @classmethod - def new(cls, repo, id): - """ - :return: New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a binsha even though - the input id may have been a Reference or Rev-Spec - - :param id: reference, rev-spec, or hexsha - - :note: This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a binsha.""" - return repo.rev_parse(str(id)) - - @classmethod - def new_from_sha(cls, repo, sha1): - """ - :return: new object instance of a type appropriate to represent the given - binary sha1 - :param sha1: 20 byte binary sha1""" - oinfo = repo.odb.info(sha1) - inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) - inst.size = oinfo.size - return inst - - def _set_cache_(self, attr): - """Retrieve object information""" - if attr == "size": - oinfo = self.repo.odb.info(self.binsha) - self.size = oinfo.size - # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """:return: True if the objects have the same SHA1""" - return self.binsha == other.binsha - - def __ne__(self, other): - """:return: True if the objects do not have the same SHA1 """ - return self.binsha != other.binsha - - def __hash__(self): - """:return: Hash of our id allowing objects to be used in dicts and sets""" - return hash(self.binsha) - - def __str__(self): - """:return: string of our SHA1 as understood by all git commands""" - return bin_to_hex(self.binsha) - - def __repr__(self): - """:return: string with pythonic representation of our object""" - return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) - - @property - def hexsha(self): - """:return: 40 byte hex version of our 20 byte binary sha""" - return bin_to_hex(self.binsha) - - @property - def data_stream(self): - """ :return: File Object compatible stream to the uncompressed raw data of the object - :note: returned streams must be read in order""" - return self.repo.odb.stream(self.binsha) - - def stream_data(self, ostream): - """Writes our data directly to the given output stream - :param ostream: File object compatible stream object. - :return: self""" - istream = self.repo.odb.stream(self.binsha) - stream_copy(istream, ostream) - return self - - -class IndexObject(Object): - """Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects""" - __slots__ = ("path", "mode") - - # for compatability with iterable lists - _id_attribute_ = 'path' - - def __init__(self, repo, binsha, mode=None, path=None): - """Initialize a newly instanced IndexObject - :param repo: is the Repo we are located in - :param binsha: 20 byte sha1 - :param mode: is the stat compatible file mode as int, use the stat module - to evaluate the infomration - :param path: - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - :note: - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) - if mode is not None: - self.mode = mode - if path is not None: - self.path = path - - def __hash__(self): - """:return: - Hash of our path as index items are uniquely identifyable by path, not - by their data !""" - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - # END hanlde slot attribute - - @property - def name(self): - """:return: Name portion of the path, effectively being the basename""" - return basename(self.path) - - @property - def abspath(self): - """ - :return: - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. """ - return join_path_native(self.repo.working_tree_dir, self.path) - diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py deleted file mode 100644 index 32f8c61c..00000000 --- a/lib/git/objects/blob.py +++ /dev/null @@ -1,27 +0,0 @@ -# blob.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from mimetypes import guess_type -import base - -__all__ = ('Blob', ) - -class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - __slots__ = tuple() - - @property - def mime_type(self): - """ - :return: String describing the mime type of this file (based on the filename) - :note: Defaults to 'text/plain' in case the actual file type is unknown. """ - guesses = None - if self.path: - guesses = guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py deleted file mode 100644 index a2b6c554..00000000 --- a/lib/git/objects/commit.py +++ /dev/null @@ -1,472 +0,0 @@ -# commit.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.util import ( - Iterable, - Stats, - ) -from git.diff import Diffable -from tree import Tree -from gitdb import IStream -from cStringIO import StringIO - -import base -from gitdb.util import ( - hex_to_bin - ) -from util import ( - Traversable, - Serializable, - get_user_id, - parse_date, - Actor, - altz_to_utctz_str, - parse_actor_and_date - ) -from time import ( - time, - altzone - ) -import os -import sys - -__all__ = ('Commit', ) - -class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): - """Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_name = "GIT_AUTHOR_NAME" - env_author_email = "GIT_AUTHOR_EMAIL" - env_author_date = "GIT_AUTHOR_DATE" - env_committer_name = "GIT_COMMITTER_NAME" - env_committer_email = "GIT_COMMITTER_EMAIL" - env_committer_date = "GIT_COMMITTER_DATE" - env_email = "EMAIL" - - # CONFIGURATION KEYS - conf_name = 'name' - conf_email = 'email' - conf_encoding = 'i18n.commitencoding' - - # INVARIANTS - default_encoding = "UTF-8" - - - # object configuration - type = "commit" - __slots__ = ("tree", - "author", "authored_date", "author_tz_offset", - "committer", "committed_date", "committer_tz_offset", - "message", "parents", "encoding") - _id_attribute_ = "binsha" - - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs - - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - - def count(self, paths='', **kwargs): - """Count the number of commits reachable from this commit - - :param paths: - is an optinal path or a list of paths restricting the return value - to commits actually containing the paths - - :param kwargs: - Additional options to be passed to git-rev-list. They must not alter - the ouput style of the command, or parsing will yield incorrect results - :return: int defining the number of reachable commits""" - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. - if paths: - return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) - else: - return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) - - - @property - def name_rev(self): - """ - :return: - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes""" - return self.repo.git.name_rev(self) - - @classmethod - def iter_items(cls, repo, rev, paths='', **kwargs): - """Find all commits matching the given criteria. - - :param repo: is the Repo - :param rev: revision specifier, see git-rev-parse for viable options - :param paths: - is an optinal path or list of paths, if set only Commits that include the path - or paths will be considered - :param kwargs: - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - :return: iterator yielding Commit items""" - if 'pretty' in kwargs: - raise ValueError("--pretty cannot be used as parsing expects single sha's only") - # END handle pretty - args = list() - if paths: - args.extend(('--', paths)) - # END if paths - - proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) - return cls._iter_from_process_or_stream(repo, proc) - - def iter_parents(self, paths='', **kwargs): - """Iterate _all_ parents of this commit. - - :param paths: - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - :param kwargs: All arguments allowed by git-rev-list - :return: Iterator yielding Commit objects which are parents of self """ - # skip ourselves - skip = kwargs.get("skip", 1) - if skip == 0: # skip ourselves - skip = 1 - kwargs['skip'] = skip - - return self.iter_items(self.repo, self, paths, **kwargs) - - @property - def stats(self): - """Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - :return: git.Stats""" - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) - return Stats._list_from_string(self.repo, text) - - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database - - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - - - @classmethod - def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): - """Commit the given tree, creating a commit object. - - :param repo: Repo object the commit should be part of - :param tree: Tree object or hex or bin sha - the tree of the new commit - :param message: Commit message. It may be an empty string if no message is provided. - It will be converted to a string in any case. - :param parent_commits: - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object - :param head: - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could - lead to undesired results when diffing files. - - :return: Commit object representing the new commit - - :note: - Additional information about the committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information""" - parents = parent_commits - if parent_commits is None: - try: - parent_commits = [ repo.head.commit ] - except ValueError: - # empty repositories have no head commit - parent_commits = list() - # END handle parent commits - # END if parent commits are unset - - # retrieve all additional information, create a commit object, and - # serialize it - # Generally: - # * Environment variables override configuration values - # * Sensible defaults are set according to the git documentation - - # COMMITER AND AUTHOR INFO - cr = repo.config_reader() - env = os.environ - default_email = get_user_id() - default_name = default_email.split('@')[0] - - conf_name = cr.get_value('user', cls.conf_name, default_name) - conf_email = cr.get_value('user', cls.conf_email, default_email) - - author_name = env.get(cls.env_author_name, conf_name) - author_email = env.get(cls.env_author_email, conf_email) - - committer_name = env.get(cls.env_committer_name, conf_name) - committer_email = env.get(cls.env_committer_email, conf_email) - - # PARSE THE DATES - unix_time = int(time()) - offset = altzone - - author_date_str = env.get(cls.env_author_date, '') - if author_date_str: - author_time, author_offset = parse_date(author_date_str) - else: - author_time, author_offset = unix_time, offset - # END set author time - - committer_date_str = env.get(cls.env_committer_date, '') - if committer_date_str: - committer_time, committer_offset = parse_date(committer_date_str) - else: - committer_time, committer_offset = unix_time, offset - # END set committer time - - # assume utf8 encoding - enc_section, enc_option = cls.conf_encoding.split('.') - conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) - - author = Actor(author_name, author_email) - committer = Actor(committer_name, committer_email) - - - # if the tree is no object, make sure we create one - otherwise - # the created commit object is invalid - if isinstance(tree, str): - tree = repo.tree(tree) - # END tree conversion - - # CREATE NEW COMMIT - new_commit = cls(repo, cls.NULL_BIN_SHA, tree, - author, author_time, author_offset, - committer, committer_time, committer_offset, - message, parent_commits, conf_encoding) - - stream = StringIO() - new_commit._serialize(stream) - streamlen = stream.tell() - stream.seek(0) - - istream = repo.odb.store(IStream(cls.type, streamlen, stream)) - new_commit.binsha = istream.binsha - - if head: - # need late import here, importing git at the very beginning throws - # as well ... - import git.refs - try: - repo.head.commit = new_commit - except ValueError: - # head is not yet set to the ref our HEAD points to - # Happens on first commit - import git.refs - master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit) - repo.head.reference = master - # END handle empty repositories - # END advance head handling - - return new_commit - - #{ Serializable Implementation - - def _serialize(self, stream): - write = stream.write - write("tree %s\n" % self.tree) - for p in self.parents: - write("parent %s\n" % p) - - a = self.author - aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - - c = self.committer - fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) - - write(fmt % ("committer", c.name, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) - - if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) - - write("\n") - - # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): - write(self.message.encode(self.encoding)) - else: - write(self.message) - # END handle encoding - return self - - def _deserialize(self, stream): - """:param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object""" - readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') - - self.parents = list() - next_line = None - while True: - parent_line = readline() - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) - # END for each parent line - self.parents = tuple(self.parents) - - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) - - - # now we can have the encoding line, or an empty line followed by the optional - # message. - self.encoding = self.default_encoding - # read encoding or empty line to separate message - enc = readline() - enc = enc.strip() - if enc: - self.encoding = enc[enc.find(' ')+1:] - # now comes the message separator - readline() - # END handle encoding - - # decode the authors name - try: - self.author.name = self.author.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) - # END handle author's encoding - - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read() - try: - self.message = self.message.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) - # END exception handling - return self - - #} END serializable implementation diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py deleted file mode 100644 index 9b0a377c..00000000 --- a/lib/git/objects/fun.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Module with functions which are supposed to be as fast as possible""" -from stat import S_ISDIR - -__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', - 'traverse_tree_recursive') - - - - -def tree_to_stream(entries, write): - """Write the give list of entries into a stream using its write method - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" - ord_zero = ord('0') - bit_mask = 7 # 3 bits set - - for binsha, mode, name in entries: - mode_str = '' - for i in xrange(6): - mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str - # END for each 8 octal value - - # git slices away the first octal if its zero - if mode_str[0] == '0': - mode_str = mode_str[1:] - # END save a byte - - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) - # END for each item - - -def tree_entries_from_data(data): - """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" - ord_zero = ord('0') - len_data = len(data) - i = 0 - out = list() - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded - name = data[ns:i] - name_enc = name.decode("utf-8") - if len(name) > len(name_enc): - name = name_enc - # END handle encoding - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - out.append((sha, mode, name)) - # END for each byte in data stream - return out - - -def _find_by_name(tree_data, name, is_dir, start_at): - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" - try: - item = tree_data[start_at] - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[start_at] = None - return item - except IndexError: - pass - # END exception handling - for index, item in enumerate(tree_data): - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[index] = None - return item - # END if item matches - # END for each item - return None - -def _to_full_path(item, path_prefix): - """Rebuild entry with given path prefix""" - if not item: - return item - return (item[0], item[1], path_prefix+item[2]) - -def traverse_trees_recursive(odb, tree_shas, path_prefix): - """ - :return: list with entries according to the given binary tree-shas. - The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where - * [0] == 20 byte sha - * [1] == mode as int - * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" - trees_data = list() - nt = len(tree_shas) - for tree_sha in tree_shas: - if tree_sha is None: - data = list() - else: - data = tree_entries_from_data(odb.stream(tree_sha).read()) - # END handle muted trees - trees_data.append(data) - # END for each sha to get data for - - out = list() - out_append = out.append - - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None - for ti, tree_data in enumerate(trees_data): - for ii, item in enumerate(tree_data): - if not item: - continue - # END skip already done items - entries = [ None for n in range(nt) ] - entries[ti] = item - sha, mode, name = item # its faster to unpack - is_dir = S_ISDIR(mode) # type mode bits - - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt - for tio in range(ti+1, ti+nt): - tio = tio % nt - entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) - # END for each other item data - - # if we are a directory, enter recursion - if is_dir: - out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) - else: - out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) - # END handle recursion - - # finally mark it done - tree_data[ii] = None - # END for each item - - # we are done with one tree, set all its data empty - del(tree_data[:]) - # END for each tree_data chunk - return out - -def traverse_tree_recursive(odb, tree_sha, path_prefix): - """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: - * [0] 20 byte sha - * [1] mode as int - * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" - entries = list() - data = tree_entries_from_data(odb.stream(tree_sha).read()) - - # unpacking/packing is faster than accessing individual items - for sha, mode, name in data: - if S_ISDIR(mode): - entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) - else: - entries.append((sha, mode, path_prefix+name)) - # END for each item - - return entries diff --git a/lib/git/objects/submodule/__init__.py b/lib/git/objects/submodule/__init__.py deleted file mode 100644 index 82df59b0..00000000 --- a/lib/git/objects/submodule/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# NOTE: Cannot import anything here as the top-level _init_ has to handle -# our dependencies diff --git a/lib/git/objects/submodule/base.py b/lib/git/objects/submodule/base.py deleted file mode 100644 index 4f4223b6..00000000 --- a/lib/git/objects/submodule/base.py +++ /dev/null @@ -1,862 +0,0 @@ -import util -from util import ( - mkhead, - sm_name, - sm_section, - unbare_repo, - SubmoduleConfigParser, - find_first_remote_branch - ) -from git.objects.util import Traversable -from StringIO import StringIO # need a dict to set bloody .name field -from git.util import ( - Iterable, - join_path_native, - to_native_path_linux - ) -from git.config import SectionConstraint -from git.exc import ( - InvalidGitRepositoryError, - NoSuchPathError - ) -import stat -import git - -import os -import sys -import time - -import shutil - -__all__ = ["Submodule"] - - -# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import -# mechanism which cause plenty of trouble of the only reason for packages and -# modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(util.IndexObject, Iterable, Traversable): - """Implements access to a git submodule. They are special in that their sha - represents a commit in the submodule's repository which is to be checked out - at the path of this instance. - The submodule type does not have a string type associated with it, as it exists - solely as a marker in the tree and index. - - All methods work in bare and non-bare repositories.""" - - _id_attribute_ = "name" - k_modules_file = '.gitmodules' - k_head_option = 'branch' - k_head_default = 'master' - k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status - - # this is a bogus type for base class compatability - type = 'submodule' - - __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') - _cache_attrs = ('path', '_url', '_branch_path') - - def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None): - """Initialize this instance with its attributes. We only document the ones - that differ from ``IndexObject`` - - :param repo: Our parent repository - :param binsha: binary sha referring to a commit in the remote repository, see url parameter - :param parent_commit: see set_parent_commit() - :param url: The url to the remote repository which is the submodule - :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" - super(Submodule, self).__init__(repo, binsha, mode, path) - self.size = 0 - if parent_commit is not None: - self._parent_commit = parent_commit - if url is not None: - self._url = url - if branch_path is not None: - assert isinstance(branch_path, basestring) - self._branch_path = branch_path - if name is not None: - self._name = name - - def _set_cache_(self, attr): - if attr == '_parent_commit': - # set a default value, which is the root tree of the current head - self._parent_commit = self.repo.commit() - elif attr in ('path', '_url', '_branch_path'): - reader = self.config_reader() - # default submodule values - self.path = reader.get_value('path') - self._url = reader.get_value('url') - # git-python extension values - optional - self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) - elif attr == '_name': - raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") - else: - super(Submodule, self)._set_cache_(attr) - # END handle attribute name - - def _get_intermediate_items(self, item): - """:return: all the submodules of our module repository""" - try: - return type(self).list_items(item.module()) - except InvalidGitRepositoryError: - return list() - # END handle intermeditate items - - def __eq__(self, other): - """Compare with another submodule""" - # we may only compare by name as this should be the ID they are hashed with - # Otherwise this type wouldn't be hashable - # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) - return self._name == other._name - - def __ne__(self, other): - """Compare with another submodule for inequality""" - return not (self == other) - - def __hash__(self): - """Hash this instance using its logical id, not the sha""" - return hash(self._name) - - def __str__(self): - return self._name - - def __repr__(self): - return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path) - - @classmethod - def _config_parser(cls, repo, parent_commit, read_only): - """:return: Config Parser constrained to our submodule in read or write mode - :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository - at the given parent commit. Otherwise the exception would be delayed until the first - access of the config parser""" - parent_matches_head = repo.head.commit == parent_commit - if not repo.bare and parent_matches_head: - fp_module = cls.k_modules_file - fp_module_path = os.path.join(repo.working_tree_dir, fp_module) - if not os.path.isfile(fp_module_path): - raise IOError("%s file was not accessible" % fp_module_path) - # END handle existance - fp_module = fp_module_path - else: - try: - fp_module = cls._sio_modules(parent_commit) - except KeyError: - raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) - # END handle exceptions - # END handle non-bare working tree - - if not read_only and (repo.bare or not parent_matches_head): - raise ValueError("Cannot write blobs of 'historical' submodule configurations") - # END handle writes of historical submodules - - return SubmoduleConfigParser(fp_module, read_only = read_only) - - def _clear_cache(self): - # clear the possibly changed values - for name in self._cache_attrs: - try: - delattr(self, name) - except AttributeError: - pass - # END try attr deletion - # END for each name to delete - - @classmethod - def _sio_modules(cls, parent_commit): - """:return: Configuration file as StringIO - we only access it through the respective blob's data""" - sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) - sio.name = cls.k_modules_file - return sio - - def _config_parser_constrained(self, read_only): - """:return: Config Parser constrained to our submodule in read or write mode""" - parser = self._config_parser(self.repo, self._parent_commit, read_only) - parser.set_submodule(self) - return SectionConstraint(parser, sm_section(self.name)) - - #{ Edit Interface - - @classmethod - def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): - """Add a new submodule to the given repository. This will alter the index - as well as the .gitmodules file, but will not create a new commit. - If the submodule already exists, no matter if the configuration differs - from the one provided, the existing submodule will be returned. - - :param repo: Repository instance which should receive the submodule - :param name: The name/identifier for the submodule - :param path: repository-relative or absolute path at which the submodule - should be located - It will be created as required during the repository initialization. - :param url: git-clone compatible URL, see git-clone reference for more information - If None, the repository is assumed to exist, and the url of the first - remote is taken instead. This is useful if you want to make an existing - repository a submodule of anotherone. - :param branch: branch at which the submodule should (later) be checked out. - The given branch must exist in the remote repository, and will be checked - out locally as a tracking branch. - It will only be written into the configuration if it not None, which is - when the checked out branch will be the one the remote HEAD pointed to. - The result you get in these situation is somewhat fuzzy, and it is recommended - to specify at least 'master' here - :param no_checkout: if True, and if the repository has to be cloned manually, - no checkout will be performed - :return: The newly created submodule instance - :note: works atomically, such that no change will be done if the repository - update fails for instance""" - if repo.bare: - raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") - # END handle bare repos - - path = to_native_path_linux(path) - if path.endswith('/'): - path = path[:-1] - # END handle trailing slash - - # assure we never put backslashes into the url, as some operating systems - # like it ... - if url != None: - url = to_native_path_linux(url) - #END assure url correctness - - # INSTANTIATE INTERMEDIATE SM - sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name) - if sm.exists(): - # reretrieve submodule from tree - try: - return repo.head.commit.tree[path] - except KeyError: - # could only be in index - index = repo.index - entry = index.entries[index.entry_key(path, 0)] - sm.binsha = entry.binsha - return sm - # END handle exceptions - # END handle existing - - br = git.Head.to_full_path(str(branch) or cls.k_head_default) - has_module = sm.module_exists() - branch_is_default = branch is None - if has_module and url is not None: - if url not in [r.url for r in sm.module().remotes]: - raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath)) - # END check url - # END verify urls match - - mrepo = None - if url is None: - if not has_module: - raise ValueError("A URL was not given and existing repository did not exsit at %s" % path) - # END check url - mrepo = sm.module() - urls = [r.url for r in mrepo.remotes] - if not urls: - raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath) - # END verify we have url - url = urls[0] - else: - # clone new repo - kwargs = {'n' : no_checkout} - if not branch_is_default: - kwargs['b'] = br - # END setup checkout-branch - mrepo = git.Repo.clone_from(url, path, **kwargs) - # END verify url - - # update configuration and index - index = sm.repo.index - writer = sm.config_writer(index=index, write=False) - writer.set_value('url', url) - writer.set_value('path', path) - - sm._url = url - if not branch_is_default: - # store full path - writer.set_value(cls.k_head_option, br) - sm._branch_path = br - # END handle path - del(writer) - - # we deliberatly assume that our head matches our index ! - pcommit = repo.head.commit - sm._parent_commit = pcommit - sm.binsha = mrepo.head.commit.binsha - index.add([sm], write=True) - - return sm - - def update(self, recursive=False, init=True, to_latest_revision=False): - """Update the repository of this submodule to point to the checkout - we point at with the binsha of this instance. - - :param recursive: if True, we will operate recursively and update child- - modules as well. - :param init: if True, the module repository will be cloned into place if necessary - :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. - Instead, the remote will be fetched, and the local tracking branch updated. - This only works if we have a local tracking branch, which is the case - if the remote repository had a master branch, or of the 'branch' option - was specified for this submodule and the branch existed remotely - :note: does nothing in bare repositories - :note: method is definitely not atomic if recurisve is True - :return: self""" - if self.repo.bare: - return self - #END pass in bare mode - - - # ASSURE REPO IS PRESENT AND UPTODATE - ##################################### - try: - mrepo = self.module() - for remote in mrepo.remotes: - remote.fetch() - #END fetch new data - except InvalidGitRepositoryError: - if not init: - return self - # END early abort if init is not allowed - import git - - # there is no git-repository yet - but delete empty paths - module_path = join_path_native(self.repo.working_tree_dir, self.path) - if os.path.isdir(module_path): - try: - os.rmdir(module_path) - except OSError: - raise OSError("Module directory at %r does already exist and is non-empty" % module_path) - # END handle OSError - # END handle directory removal - - # don't check it out at first - nonetheless it will create a local - # branch according to the remote-HEAD if possible - mrepo = git.Repo.clone_from(self.url, module_path, n=True) - - # see whether we have a valid branch to checkout - try: - # find a remote which has our branch - we try to be flexible - remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) - local_branch = mkhead(mrepo, self.branch_path) - - # have a valid branch, but no checkout - make sure we can figure - # that out by marking the commit with a null_sha - # have to write it directly as .commit = NULLSHA tries to resolve the sha - # This will bring the branch into existance - refpath = join_path_native(mrepo.git_dir, local_branch.path) - refdir = os.path.dirname(refpath) - if not os.path.isdir(refdir): - os.makedirs(refdir) - #END handle directory - open(refpath, 'w').write(self.NULL_HEX_SHA) - # END initial checkout + branch creation - - # make sure HEAD is not detached - mrepo.head.ref = local_branch - mrepo.head.ref.set_tracking_branch(remote_branch) - except IndexError: - print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path - #END handle tracking branch - - # NOTE: Have to write the repo config file as well, otherwise - # the default implementation will be offended and not update the repository - # Maybe this is a good way to assure it doesn't get into our way, but - # we want to stay backwards compatible too ... . Its so redundant ! - self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) - #END handle initalization - - - # DETERMINE SHAS TO CHECKOUT - ############################ - binsha = self.binsha - hexsha = self.hexsha - is_detached = mrepo.head.is_detached - if to_latest_revision: - msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir - if not is_detached: - rref = mrepo.head.ref.tracking_branch() - if rref is not None: - rcommit = rref.commit - binsha = rcommit.binsha - hexsha = rcommit.hexsha - else: - print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref) - # END handle remote ref - else: - print >> sys.stderr, "%s there was no local tracking branch" % msg_base - # END handle detached head - # END handle to_latest_revision option - - # update the working tree - if mrepo.head.commit.binsha != binsha: - if is_detached: - # NOTE: for now we force, the user is no supposed to change detached - # submodules anyway. Maybe at some point this becomes an option, to - # properly handle user modifications - see below for future options - # regarding rebase and merge. - mrepo.git.checkout(hexsha, force=True) - else: - # TODO: allow to specify a rebase, merge, or reset - # TODO: Warn if the hexsha forces the tracking branch off the remote - # branch - this should be prevented when setting the branch option - mrepo.head.reset(hexsha, index=True, working_tree=True) - # END handle checkout - # END update to new commit only if needed - - # HANDLE RECURSION - ################## - if recursive: - for submodule in self.iter_items(self.module()): - submodule.update(recursive, init, to_latest_revision) - # END handle recursive update - # END for each submodule - - return self - - @unbare_repo - def move(self, module_path, configuration=True, module=True): - """Move the submodule to a another module path. This involves physically moving - the repository at our current path, changing the configuration, as well as - adjusting our index entry accordingly. - - :param module_path: the path to which to move our module, given as - repository-relative path. Intermediate directories will be created - accordingly. If the path already exists, it must be empty. - Trailling (back)slashes are removed automatically - :param configuration: if True, the configuration will be adjusted to let - the submodule point to the given path. - :param module: if True, the repository managed by this submodule - will be moved, not the configuration. This will effectively - leave your repository in an inconsistent state unless the configuration - and index already point to the target location. - :return: self - :raise ValueError: if the module path existed and was not empty, or was a file - :note: Currently the method is not atomic, and it could leave the repository - in an inconsistent state if a sub-step fails for some reason - """ - if module + configuration < 1: - raise ValueError("You must specify to move at least the module or the configuration of the submodule") - #END handle input - - module_path = to_native_path_linux(module_path) - if module_path.endswith('/'): - module_path = module_path[:-1] - # END handle trailing slash - - # VERIFY DESTINATION - if module_path == self.path: - return self - #END handle no change - - dest_path = join_path_native(self.repo.working_tree_dir, module_path) - if os.path.isfile(dest_path): - raise ValueError("Cannot move repository onto a file: %s" % dest_path) - # END handle target files - - index = self.repo.index - tekey = index.entry_key(module_path, 0) - # if the target item already exists, fail - if configuration and tekey in index.entries: - raise ValueError("Index entry for target path did alredy exist") - #END handle index key already there - - # remove existing destination - if module: - if os.path.exists(dest_path): - if len(os.listdir(dest_path)): - raise ValueError("Destination module directory was not empty") - #END handle non-emptyness - - if os.path.islink(dest_path): - os.remove(dest_path) - else: - os.rmdir(dest_path) - #END handle link - else: - # recreate parent directories - # NOTE: renames() does that now - pass - #END handle existance - # END handle module - - # move the module into place if possible - cur_path = self.abspath - renamed_module = False - if module and os.path.exists(cur_path): - os.renames(cur_path, dest_path) - renamed_module = True - #END move physical module - - - # rename the index entry - have to manipulate the index directly as - # git-mv cannot be used on submodules ... yeah - try: - if configuration: - try: - ekey = index.entry_key(self.path, 0) - entry = index.entries[ekey] - del(index.entries[ekey]) - nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:]) - index.entries[tekey] = nentry - except KeyError: - raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) - #END handle submodule doesn't exist - - # update configuration - writer = self.config_writer(index=index) # auto-write - writer.set_value('path', module_path) - self.path = module_path - del(writer) - # END handle configuration flag - except Exception: - if renamed_module: - os.renames(dest_path, cur_path) - # END undo module renaming - raise - #END handle undo rename - - return self - - @unbare_repo - def remove(self, module=True, force=False, configuration=True, dry_run=False): - """Remove this submodule from the repository. This will remove our entry - from the .gitmodules file and the entry in the .git/config file. - - :param module: If True, the module we point to will be deleted - as well. If the module is currently on a commit which is not part - of any branch in the remote, if the currently checked out branch - working tree, or untracked files, - is ahead of its tracking branch, if you have modifications in the - In case the removal of the repository fails for these reasons, the - submodule status will not have been altered. - If this submodule has child-modules on its own, these will be deleted - prior to touching the own module. - :param force: Enforces the deletion of the module even though it contains - modifications. This basically enforces a brute-force file system based - deletion. - :param configuration: if True, the submodule is deleted from the configuration, - otherwise it isn't. Although this should be enabled most of the times, - this flag enables you to safely delete the repository of your submodule. - :param dry_run: if True, we will not actually do anything, but throw the errors - we would usually throw - :return: self - :note: doesn't work in bare repositories - :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted - :raise OSError: if directories or files could not be removed""" - if not (module + configuration): - raise ValueError("Need to specify to delete at least the module, or the configuration") - # END handle params - - # DELETE MODULE REPOSITORY - ########################## - if module and self.module_exists(): - if force: - # take the fast lane and just delete everything in our module path - # TODO: If we run into permission problems, we have a highly inconsistent - # state. Delete the .git folders last, start with the submodules first - mp = self.abspath - method = None - if os.path.islink(mp): - method = os.remove - elif os.path.isdir(mp): - method = shutil.rmtree - elif os.path.exists(mp): - raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory") - #END handle brutal deletion - if not dry_run: - assert method - method(mp) - #END apply deletion method - else: - # verify we may delete our module - mod = self.module() - if mod.is_dirty(untracked_files=True): - raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir) - # END check for dirt - - # figure out whether we have new commits compared to the remotes - # NOTE: If the user pulled all the time, the remote heads might - # not have been updated, so commits coming from the remote look - # as if they come from us. But we stay strictly read-only and - # don't fetch beforhand. - for remote in mod.remotes: - num_branches_with_new_commits = 0 - rrefs = remote.refs - for rref in rrefs: - num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0 - # END for each remote ref - # not a single remote branch contained all our commits - if num_branches_with_new_commits == len(rrefs): - raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir) - # END handle new commits - # have to manually delete references as python's scoping is - # not existing, they could keep handles open ( on windows this is a problem ) - if len(rrefs): - del(rref) - #END handle remotes - del(rrefs) - del(remote) - # END for each remote - - # gently remove all submodule repositories - for sm in self.children(): - sm.remove(module=True, force=False, configuration=False, dry_run=dry_run) - del(sm) - # END for each child-submodule - - # finally delete our own submodule - if not dry_run: - wtd = mod.working_tree_dir - del(mod) # release file-handles (windows) - shutil.rmtree(wtd) - # END delete tree if possible - # END handle force - # END handle module deletion - - # DELETE CONFIGURATION - ###################### - if configuration and not dry_run: - # first the index-entry - index = self.repo.index - try: - del(index.entries[index.entry_key(self.path, 0)]) - except KeyError: - pass - #END delete entry - index.write() - - # now git config - need the config intact, otherwise we can't query - # inforamtion anymore - self.repo.config_writer().remove_section(sm_section(self.name)) - self.config_writer().remove_section() - # END delete configuration - - # void our data not to delay invalid access - self._clear_cache() - - return self - - def set_parent_commit(self, commit, check=True): - """Set this instance to use the given commit whose tree is supposed to - contain the .gitmodules blob. - - :param commit: Commit'ish reference pointing at the root_tree - :param check: if True, relatively expensive checks will be performed to verify - validity of the submodule. - :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. - :raise ValueError: if the parent commit didn't store this submodule under the - current path - :return: self""" - pcommit = self.repo.commit(commit) - pctree = pcommit.tree - if self.k_modules_file not in pctree: - raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) - # END handle exceptions - - prev_pc = self._parent_commit - self._parent_commit = pcommit - - if check: - parser = self._config_parser(self.repo, self._parent_commit, read_only=True) - if not parser.has_section(sm_section(self.name)): - self._parent_commit = prev_pc - raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) - # END handle submodule did not exist - # END handle checking mode - - # update our sha, it could have changed - self.binsha = pctree[self.path].binsha - - self._clear_cache() - - return self - - @unbare_repo - def config_writer(self, index=None, write=True): - """:return: a config writer instance allowing you to read and write the data - belonging to this submodule into the .gitmodules file. - - :param index: if not None, an IndexFile instance which should be written. - defaults to the index of the Submodule's parent repository. - :param write: if True, the index will be written each time a configuration - value changes. - :note: the parameters allow for a more efficient writing of the index, - as you can pass in a modified index on your own, prevent automatic writing, - and write yourself once the whole operation is complete - :raise ValueError: if trying to get a writer on a parent_commit which does not - match the current head commit - :raise IOError: If the .gitmodules file/blob could not be read""" - writer = self._config_parser_constrained(read_only=False) - if index is not None: - writer.config._index = index - writer.config._auto_write = write - return writer - - #} END edit interface - - #{ Query Interface - - @unbare_repo - def module(self): - """:return: Repo instance initialized from the repository at our submodule path - :raise InvalidGitRepositoryError: if a repository was not available. This could - also mean that it was not yet initialized""" - # late import to workaround circular dependencies - module_path = self.abspath - try: - repo = git.Repo(module_path) - if repo != self.repo: - return repo - # END handle repo uninitialized - except (InvalidGitRepositoryError, NoSuchPathError): - raise InvalidGitRepositoryError("No valid repository at %s" % self.path) - else: - raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path) - # END handle exceptions - - def module_exists(self): - """:return: True if our module exists and is a valid git repository. See module() method""" - try: - self.module() - return True - except Exception: - return False - # END handle exception - - def exists(self): - """ - :return: True if the submodule exists, False otherwise. Please note that - a submodule may exist (in the .gitmodules file) even though its module - doesn't exist""" - # keep attributes for later, and restore them if we have no valid data - # this way we do not actually alter the state of the object - loc = locals() - for attr in self._cache_attrs: - if hasattr(self, attr): - loc[attr] = getattr(self, attr) - # END if we have the attribute cache - #END for each attr - self._clear_cache() - - try: - try: - self.path - return True - except Exception: - return False - # END handle exceptions - finally: - for attr in self._cache_attrs: - if attr in loc: - setattr(self, attr, loc[attr]) - # END if we have a cache - # END reapply each attribute - # END handle object state consistency - - @property - def branch(self): - """:return: The branch instance that we are to checkout - :raise InvalidGitRepositoryError: if our module is not yet checked out""" - return mkhead(self.module(), self._branch_path) - - @property - def branch_path(self): - """ - :return: full (relative) path as string to the branch we would checkout - from the remote and track""" - return self._branch_path - - @property - def branch_name(self): - """:return: the name of the branch, which is the shortest possible branch name""" - # use an instance method, for this we create a temporary Head instance - # which uses a repository that is available at least ( it makes no difference ) - return git.Head(self.repo, self._branch_path).name - - @property - def url(self): - """:return: The url to the repository which our module-repository refers to""" - return self._url - - @property - def parent_commit(self): - """:return: Commit instance with the tree containing the .gitmodules file - :note: will always point to the current head's commit if it was not set explicitly""" - return self._parent_commit - - @property - def name(self): - """:return: The name of this submodule. It is used to identify it within the - .gitmodules file. - :note: by default, the name is the path at which to find the submodule, but - in git-python it should be a unique identifier similar to the identifiers - used for remotes, which allows to change the path of the submodule - easily - """ - return self._name - - def config_reader(self): - """ - :return: ConfigReader instance which allows you to qurey the configuration values - of this submodule, as provided by the .gitmodules file - :note: The config reader will actually read the data directly from the repository - and thus does not need nor care about your working tree. - :note: Should be cached by the caller and only kept as long as needed - :raise IOError: If the .gitmodules file/blob could not be read""" - return self._config_parser_constrained(read_only=True) - - def children(self): - """ - :return: IterableList(Submodule, ...) an iterable list of submodules instances - which are children of this submodule - :raise InvalidGitRepositoryError: if the submodule is not checked-out""" - return self._get_intermediate_items(self) - - #} END query interface - - #{ Iterable Interface - - @classmethod - def iter_items(cls, repo, parent_commit='HEAD'): - """:return: iterator yielding Submodule instances available in the given repository""" - pc = repo.commit(parent_commit) # parent commit instance - try: - parser = cls._config_parser(repo, pc, read_only=True) - except IOError: - raise StopIteration - # END handle empty iterator - - rt = pc.tree # root tree - - for sms in parser.sections(): - n = sm_name(sms) - p = parser.get_value(sms, 'path') - u = parser.get_value(sms, 'url') - b = cls.k_head_default - if parser.has_option(sms, cls.k_head_option): - b = parser.get_value(sms, cls.k_head_option) - # END handle optional information - - # get the binsha - index = repo.index - try: - sm = rt[p] - except KeyError: - # try the index, maybe it was just added - try: - entry = index.entries[index.entry_key(p, 0)] - sm = cls(repo, entry.binsha, entry.mode, entry.path) - except KeyError: - raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) - # END handle keyerror - # END handle critical error - - # fill in remaining info - saves time as it doesn't have to be parsed again - sm._name = n - sm._parent_commit = pc - sm._branch_path = git.Head.to_full_path(b) - sm._url = u - - yield sm - # END for each section - - #} END iterable interface - diff --git a/lib/git/objects/submodule/root.py b/lib/git/objects/submodule/root.py deleted file mode 100644 index 2e3cc775..00000000 --- a/lib/git/objects/submodule/root.py +++ /dev/null @@ -1,262 +0,0 @@ -from base import Submodule -from util import ( - find_first_remote_branch - ) -from git.exc import InvalidGitRepositoryError -import git - -import sys - -__all__ = ["RootModule"] - - -class RootModule(Submodule): - """A (virtual) Root of all submodules in the given repository. It can be used - to more easily traverse all submodules of the master repository""" - - __slots__ = tuple() - - k_root_name = '__ROOT__' - - def __init__(self, repo): - # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) - super(RootModule, self).__init__( - repo, - binsha = self.NULL_BIN_SHA, - mode = self.k_default_mode, - path = '', - name = self.k_root_name, - parent_commit = repo.head.commit, - url = '', - branch_path = git.Head.to_full_path(self.k_head_default) - ) - - - def _clear_cache(self): - """May not do anything""" - pass - - #{ Interface - - def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, to_latest_revision=False): - """Update the submodules of this repository to the current HEAD commit. - This method behaves smartly by determining changes of the path of a submodules - repository, next to changes to the to-be-checked-out commit or the branch to be - checked out. This works if the submodules ID does not change. - Additionally it will detect addition and removal of submodules, which will be handled - gracefully. - - :param previous_commit: If set to a commit'ish, the commit we should use - as the previous commit the HEAD pointed to before it was set to the commit it points to now. - If None, it defaults to ORIG_HEAD otherwise, or the parent of the current - commit if it is not given - :param recursive: if True, the children of submodules will be updated as well - using the same technique - :param force_remove: If submodules have been deleted, they will be forcibly removed. - Otherwise the update may fail if a submodule's repository cannot be deleted as - changes have been made to it (see Submodule.update() for more information) - :param init: If we encounter a new module which would need to be initialized, then do it. - :param to_latest_revision: If True, instead of checking out the revision pointed to - by this submodule's sha, the checked out tracking branch will be merged with the - newest remote branch fetched from the repository's origin""" - if self.repo.bare: - raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") - # END handle bare - - repo = self.repo - - # HANDLE COMMITS - ################## - cur_commit = repo.head.commit - if previous_commit is None: - symref = repo.head.orig_head() - try: - previous_commit = symref.commit - except Exception: - pcommits = cur_commit.parents - if pcommits: - previous_commit = pcommits[0] - else: - # in this special case, we just diff against ourselve, which - # means exactly no change - previous_commit = cur_commit - # END handle initial commit - # END no ORIG_HEAD - else: - previous_commit = repo.commit(previous_commit) # obtain commit object - # END handle previous commit - - - psms = self.list_items(repo, parent_commit=previous_commit) - sms = self.list_items(self.module()) - spsms = set(psms) - ssms = set(sms) - - # HANDLE REMOVALS - ################### - for rsm in (spsms - ssms): - # fake it into thinking its at the current commit to allow deletion - # of previous module. Trigger the cache to be updated before that - #rsm.url - rsm._parent_commit = repo.head.commit - rsm.remove(configuration=False, module=True, force=force_remove) - # END for each removed submodule - - # HANDLE PATH RENAMES - ##################### - # url changes + branch changes - for csm in (spsms & ssms): - psm = psms[csm.name] - sm = sms[csm.name] - - if sm.path != psm.path and psm.module_exists(): - # move the module to the new path - psm.move(sm.path, module=True, configuration=False) - # END handle path changes - - if sm.module_exists(): - # handle url change - if sm.url != psm.url: - # Add the new remote, remove the old one - # This way, if the url just changes, the commits will not - # have to be re-retrieved - nn = '__new_origin__' - smm = sm.module() - rmts = smm.remotes - - # don't do anything if we already have the url we search in place - if len([r for r in rmts if r.url == sm.url]) == 0: - - - assert nn not in [r.name for r in rmts] - smr = smm.create_remote(nn, sm.url) - smr.fetch() - - # If we have a tracking branch, it should be available - # in the new remote as well. - if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0: - raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url)) - # END head is not detached - - # now delete the changed one - rmt_for_deletion = None - for remote in rmts: - if remote.url == psm.url: - rmt_for_deletion = remote - break - # END if urls match - # END for each remote - - # if we didn't find a matching remote, but have exactly one, - # we can safely use this one - if rmt_for_deletion is None: - if len(rmts) == 1: - rmt_for_deletion = rmts[0] - else: - # if we have not found any remote with the original url - # we may not have a name. This is a special case, - # and its okay to fail here - # Alternatively we could just generate a unique name and leave all - # existing ones in place - raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) - #END handle one single remote - # END handle check we found a remote - - orig_name = rmt_for_deletion.name - smm.delete_remote(rmt_for_deletion) - # NOTE: Currently we leave tags from the deleted remotes - # as well as separate tracking branches in the possibly totally - # changed repository ( someone could have changed the url to - # another project ). At some point, one might want to clean - # it up, but the danger is high to remove stuff the user - # has added explicitly - - # rename the new remote back to what it was - smr.rename(orig_name) - - # early on, we verified that the our current tracking branch - # exists in the remote. Now we have to assure that the - # sha we point to is still contained in the new remote - # tracking branch. - smsha = sm.binsha - found = False - rref = smr.refs[self.branch_name] - for c in rref.commit.traverse(): - if c.binsha == smsha: - found = True - break - # END traverse all commits in search for sha - # END for each commit - - if not found: - # adjust our internal binsha to use the one of the remote - # this way, it will be checked out in the next step - # This will change the submodule relative to us, so - # the user will be able to commit the change easily - print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha - sm.binsha = rref.commit.binsha - #END reset binsha - - #NOTE: All checkout is performed by the base implementation of update - - # END skip remote handling if new url already exists in module - # END handle url - - if sm.branch_path != psm.branch_path: - # finally, create a new tracking branch which tracks the - # new remote branch - smm = sm.module() - smmr = smm.remotes - try: - tbr = git.Head.create(smm, sm.branch_name) - except git.GitCommandError, e: - if e.status != 128: - raise - #END handle something unexpected - - # ... or reuse the existing one - tbr = git.Head(smm, sm.branch_path) - #END assure tracking branch exists - - tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) - # figure out whether the previous tracking branch contains - # new commits compared to the other one, if not we can - # delete it. - try: - tbr = find_first_remote_branch(smmr, psm.branch_name) - if len(smm.git.cherry(tbr, psm.branch)) == 0: - psm.branch.delete(smm, psm.branch) - #END delete original tracking branch if there are no changes - except InvalidGitRepositoryError: - # ignore it if the previous branch couldn't be found in the - # current remotes, this just means we can't handle it - pass - # END exception handling - - #NOTE: All checkout is done in the base implementation of update - - #END handle branch - #END handle - # END for each common submodule - - # FINALLY UPDATE ALL ACTUAL SUBMODULES - ###################################### - for sm in sms: - # update the submodule using the default method - sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision) - - # update recursively depth first - question is which inconsitent - # state will be better in case it fails somewhere. Defective branch - # or defective depth. The RootSubmodule type will never process itself, - # which was done in the previous expression - if recursive: - type(self)(sm.module()).update(recursive=True, force_remove=force_remove, - init=init, to_latest_revision=to_latest_revision) - #END handle recursive - # END for each submodule to update - - def module(self): - """:return: the actual repository containing the submodules""" - return self.repo - #} END interface -#} END classes diff --git a/lib/git/objects/submodule/util.py b/lib/git/objects/submodule/util.py deleted file mode 100644 index 9b32807a..00000000 --- a/lib/git/objects/submodule/util.py +++ /dev/null @@ -1,101 +0,0 @@ -import git -from git.exc import InvalidGitRepositoryError -from git.config import GitConfigParser -from StringIO import StringIO -import weakref - -__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', - 'SubmoduleConfigParser') - -#{ Utilities - -def sm_section(name): - """:return: section title used in .gitmodules configuration file""" - return 'submodule "%s"' % name - -def sm_name(section): - """:return: name of the submodule as parsed from the section name""" - section = section.strip() - return section[11:-1] - -def mkhead(repo, path): - """:return: New branch/head instance""" - return git.Head(repo, git.Head.to_full_path(path)) - -def unbare_repo(func): - """Methods with this decorator raise InvalidGitRepositoryError if they - encounter a bare repository""" - def wrapper(self, *args, **kwargs): - if self.repo.bare: - raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) - #END bare method - return func(self, *args, **kwargs) - # END wrapper - wrapper.__name__ = func.__name__ - return wrapper - -def find_first_remote_branch(remotes, branch_name): - """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" - for remote in remotes: - try: - return remote.refs[branch_name] - except IndexError: - continue - # END exception handling - #END for remote - raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name) - -#} END utilities - - -#{ Classes - -class SubmoduleConfigParser(GitConfigParser): - """ - Catches calls to _write, and updates the .gitmodules blob in the index - with the new data, if we have written into a stream. Otherwise it will - add the local file to the index to make it correspond with the working tree. - Additionally, the cache must be cleared - - Please note that no mutating method will work in bare mode - """ - - def __init__(self, *args, **kwargs): - self._smref = None - self._index = None - self._auto_write = True - super(SubmoduleConfigParser, self).__init__(*args, **kwargs) - - #{ Interface - def set_submodule(self, submodule): - """Set this instance's submodule. It must be called before - the first write operation begins""" - self._smref = weakref.ref(submodule) - - def flush_to_index(self): - """Flush changes in our configuration file to the index""" - assert self._smref is not None - # should always have a file here - assert not isinstance(self._file_or_files, StringIO) - - sm = self._smref() - if sm is not None: - index = self._index - if index is None: - index = sm.repo.index - # END handle index - index.add([sm.k_modules_file], write=self._auto_write) - sm._clear_cache() - # END handle weakref - - #} END interface - - #{ Overridden Methods - def write(self): - rval = super(SubmoduleConfigParser, self).write() - self.flush_to_index() - return rval - # END overridden methods - - -#} END classes diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py deleted file mode 100644 index c7d02abe..00000000 --- a/lib/git/objects/tag.py +++ /dev/null @@ -1,76 +0,0 @@ -# objects.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" Module containing all object based types. """ -import base -from gitdb.util import hex_to_bin -from util import ( - get_object_type_by_name, - parse_actor_and_date - ) - -__all__ = ("TagObject", ) - -class TagObject(base.Object): - """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - - def __init__(self, repo, binsha, object=None, tag=None, - tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): - """Initialize a tag object with additional data - - :param repo: repository this object is located in - :param binsha: 20 byte SHA1 - :param object: Object instance of object we are pointing to - :param tag: name of this tag - :param tagger: Actor identifying the tagger - :param tagged_date: int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the - authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha ) - if object is not None: - self.object = object - if tag is not None: - self.tag = tag - if tagger is not None: - self.tagger = tagger - if tagged_date is not None: - self.tagged_date = tagged_date - if tagger_tz_offset is not None: - self.tagger_tz_offset = tagger_tz_offset - if message is not None: - self.message = message - - def _set_cache_(self, attr): - """Cache all our attributes at once""" - if attr in TagObject.__slots__: - ostream = self.repo.odb.stream(self.binsha) - lines = ostream.read().splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in case there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - - diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py deleted file mode 100644 index 67431686..00000000 --- a/lib/git/objects/tree.py +++ /dev/null @@ -1,280 +0,0 @@ -# tree.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -import util -from base import IndexObject -from git.util import join_path -from blob import Blob -from submodule.base import Submodule -import git.diff as diff - -from fun import ( - tree_entries_from_data, - tree_to_stream - ) - -from gitdb.util import ( - to_bin_sha, - ) - -__all__ = ("TreeModifier", "Tree") - -class TreeModifier(object): - """A utility class providing methods to alter the underlying cache in a list-like fashion. - - Once all adjustments are complete, the _cache, which really is a refernce to - the cache of a tree, will be sorted. Assuring it will be in a serializable state""" - __slots__ = '_cache' - - def __init__(self, cache): - self._cache = cache - - def _index_by_name(self, name): - """:return: index of an item with name, or -1 if not found""" - for i, t in enumerate(self._cache): - if t[2] == name: - return i - # END found item - # END for each item in cache - return -1 - - #{ Interface - def set_done(self): - """Call this method once you are done modifying the tree information. - It may be called several times, but be aware that each call will cause - a sort operation - :return self:""" - self._cache.sort(key=lambda t: t[2]) # sort by name - return self - #} END interface - - #{ Mutators - def add(self, sha, mode, name, force=False): - """Add the given item to the tree. If an item with the given name already - exists, nothing will be done, but a ValueError will be raised if the - sha and mode of the existing item do not match the one you add, unless - force is True - - :param sha: The 20 or 40 byte sha of the item to add - :param mode: int representing the stat compatible mode of the item - :param force: If True, an item with your name and information will overwrite - any existing item with the same name, no matter which information it has - :return: self""" - if '/' in name: - raise ValueError("Name must not contain '/' characters") - if (mode >> 12) not in Tree._map_id_to_type: - raise ValueError("Invalid object type according to mode %o" % mode) - - sha = to_bin_sha(sha) - index = self._index_by_name(name) - item = (sha, mode, name) - if index == -1: - self._cache.append(item) - else: - if force: - self._cache[index] = item - else: - ex_item = self._cache[index] - if ex_item[0] != sha or ex_item[1] != mode: - raise ValueError("Item %r existed with different properties" % name) - # END handle mismatch - # END handle force - # END handle name exists - return self - - def add_unchecked(self, binsha, mode, name): - """Add the given item to the tree, its correctness is assumed, which - puts the caller into responsibility to assure the input is correct. - For more information on the parameters, see ``add`` - :param binsha: 20 byte binary sha""" - self._cache.append((binsha, mode, name)) - - def __delitem__(self, name): - """Deletes an item with the given name if it exists""" - index = self._index_by_name(name) - if index > -1: - del(self._cache[index]) - - #} END mutators - - -class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): - """Tree objects represent an ordered list of Blobs and other Trees. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - """ - - type = "tree" - __slots__ = "_cache" - - # actual integer ids for comparison - commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - _map_id_to_type = { - commit_id : Submodule, - blob_id : Blob, - symlink_id : Blob - # tree id added once Tree is defined - } - - - def __init__(self, repo, binsha, mode=tree_id<<12, path=None): - super(Tree, self).__init__(repo, binsha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return tuple(index_object._iter_convert_to_object(index_object._cache)) - return tuple() - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - ostream = self.repo.odb.stream(self.binsha) - self._cache = tree_entries_from_data(ostream.read()) - else: - super(Tree, self)._set_cache_(attr) - # END handle attribute - - def _iter_convert_to_object(self, iterable): - """Iterable yields tuples of (binsha, mode, name), which will be converted - to the respective object representation""" - for binsha, mode, name in iterable: - path = join_path(self.path, name) - try: - yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) - except KeyError: - raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) - # END for each item - - def __div__(self, file): - """Find the named object in this tree's contents - :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - - :raise KeyError: if given file or tree does not exist in tree""" - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for info in self._cache: - if info[2] == file: # [2] == name - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - @property - def trees(self): - """:return: list(Tree, ...) list of trees directly below this tree""" - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """:return: list(Blob, ...) list of blobs directly below this tree""" - return [ i for i in self if i.type == "blob" ] - - @property - def cache(self): - """ - :return: An object allowing to modify the internal cache. This can be used - to change the tree's contents. When done, make sure you call ``set_done`` - on the tree modifier, or serialization behaviour will be incorrect. - See the ``TreeModifier`` for more information on how to alter the cache""" - return TreeModifier(self._cache) - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see util.Traversable.traverse - Trees are set to visit_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self, i, j): - return list(self._iter_convert_to_object(self._cache[i:j])) - - def __iter__(self): - return self._iter_convert_to_object(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self, item): - if isinstance(item, int): - info = self._cache[item] - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self, item): - if isinstance(item, IndexObject): - for info in self._cache: - if item.binsha == info[0]: - return True - # END compare sha - # END for each entry - # END handle item is index object - # compatability - - # treat item as repo-relative path - path = self.path - for info in self._cache: - if item == join_path(path, info[2]): - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._iter_convert_to_object(self._cache)) - - def _serialize(self, stream): - """Serialize this tree into the stream. Please note that we will assume - our tree data to be in a sorted state. If this is not the case, serialization - will not generate a correct tree representation as these are assumed to be sorted - by algorithms""" - tree_to_stream(self._cache, stream.write) - return self - - def _deserialize(self, stream): - self._cache = tree_entries_from_data(stream.read()) - return self - - -# END tree - -# finalize map definition -Tree._map_id_to_type[Tree.tree_id] = Tree diff --git a/lib/git/objects/util.py b/lib/git/objects/util.py deleted file mode 100644 index a9e1143c..00000000 --- a/lib/git/objects/util.py +++ /dev/null @@ -1,374 +0,0 @@ -# util.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module for general utility functions""" -from git.util import IterableList - -import re -from collections import deque as Deque -import platform - -from string import digits -import time -import os - -__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date', - 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', - 'verify_utctz') - -#{ Functions - -def mode_str_to_int(modestr): - """ - :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used - :return: - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example.""" - mode = 0 - for iteration, char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - -def get_object_type_by_name(object_type_name): - """ - :return: type suitable to handle the given object type name. - Use the type to create new instances. - - :param object_type_name: Member of TYPES - - :raise ValueError: In case object_type_name is unknown""" - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - - -def get_user_id(): - """:return: string identifying the currently active system user as name@node - :note: user can be set with the 'USER' environment variable, usually set on windows""" - ukn = 'UNKNOWN' - username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) - if username == ukn and hasattr(os, 'getlogin'): - username = os.getlogin() - # END get username from login - return "%s@%s" % (username, platform.node()) - - -def utctz_to_altz(utctz): - """we convert utctz to the timezone in seconds, it is the format time.altzone - returns. Git stores it as UTC timezon which has the opposite sign as well, - which explains the -1 * ( that was made explicit here ) - :param utctz: git utc timezone string, i.e. +0200""" - return -1 * int(float(utctz)/100*3600) - -def altz_to_utctz_str(altz): - """As above, but inverses the operation, returning a string that can be used - in commit objects""" - utci = -1 * int((altz / 3600)*100) - utcs = str(abs(utci)) - utcs = "0"*(4-len(utcs)) + utcs - prefix = (utci < 0 and '-') or '+' - return prefix + utcs - - -def verify_utctz(offset): - """:raise ValueError: if offset is incorrect - :return: offset""" - fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) - if len(offset) != 5: - raise fmt_exc - if offset[0] not in "+-": - raise fmt_exc - if offset[1] not in digits or \ - offset[2] not in digits or \ - offset[3] not in digits or \ - offset[4] not in digits: - raise fmt_exc - # END for each char - return offset - -def parse_date(string_date): - """ - Parse the given date as one of the following - - * Git internal format: timestamp offset - * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. - * ISO 8601 2005-04-07T22:13:13 - The T can be a space as well - - :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch - :raise ValueError: If the format could not be understood - :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" - # git time - try: - if string_date.count(' ') == 1 and string_date.rfind(':') == -1: - timestamp, offset = string_date.split() - timestamp = int(timestamp) - return timestamp, utctz_to_altz(verify_utctz(offset)) - else: - offset = "+0000" # local time by default - if string_date[-5] in '-+': - offset = verify_utctz(string_date[-5:]) - string_date = string_date[:-6] # skip space as well - # END split timezone info - - # now figure out the date and time portion - split time - date_formats = list() - splitter = -1 - if ',' in string_date: - date_formats.append("%a, %d %b %Y") - splitter = string_date.rfind(' ') - else: - # iso plus additional - date_formats.append("%Y-%m-%d") - date_formats.append("%Y.%m.%d") - date_formats.append("%m/%d/%Y") - date_formats.append("%d.%m.%Y") - - splitter = string_date.rfind('T') - if splitter == -1: - splitter = string_date.rfind(' ') - # END handle 'T' and ' ' - # END handle rfc or iso - - assert splitter > -1 - - # split date and time - time_part = string_date[splitter+1:] # skip space - date_part = string_date[:splitter] - - # parse time - tstruct = time.strptime(time_part, "%H:%M:%S") - - for fmt in date_formats: - try: - dtstruct = time.strptime(date_part, fmt) - fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, - tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, - dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) - return int(time.mktime(fstruct)), utctz_to_altz(offset) - except ValueError: - continue - # END exception handling - # END for each fmt - - # still here ? fail - raise ValueError("no format matched") - # END handle format - except Exception: - raise ValueError("Unsupported date format: %s" % string_date) - # END handle exceptions - - -# precompiled regex -_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') - -def parse_actor_and_date(line): - """Parse out the actor (author or committer) info from a line like:: - - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" - m = _re_actor_epoch.search(line) - actor, epoch, offset = m.groups() - return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) - - -#} END functions - - -#{ Classes - -class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - # precompiled regex - name_only_regex = re.compile( r'<(.+)>' ) - name_email_regex = re.compile( r'(.*) <(.+?)>' ) - - def __init__(self, name, email): - self.name = name - self.email = email - - def __eq__(self, other): - return self.name == other.name and self.email == other.email - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.name, self.email)) - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.Actor "%s <%s>">' % (self.name, self.email) - - @classmethod - def _from_string(cls, string): - """Create an Actor from a string. - :param string: is the string, which is expected to be in regular git format - - John Doe <jdoe@example.com> - - :return: Actor """ - m = cls.name_email_regex.search(string) - if m: - name, email = m.groups() - return Actor(name, email) - else: - m = cls.name_only_regex.search(string) - if m: - return Actor(m.group(1), None) - else: - # assume best and use the whole string as name - return Actor(string, None) - # END special case name - # END handle name/email matching - - -class ProcessStreamAdapter(object): - """Class wireing all calls to the contained Process instance. - - Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope.""" - __slots__ = ("_proc", "_stream") - def __init__(self, process, stream_name): - self._proc = process - self._stream = getattr(process, stream_name) - - def __getattr__(self, attr): - return getattr(self._stream, attr) - - -class Traversable(object): - """Simple interface to perforam depth-first or breadth-first traversals - into one direction. - Subclasses only need to implement one function. - Instances of the Subclass must be hashable""" - __slots__ = tuple() - - @classmethod - def _get_intermediate_items(cls, item): - """ - Returns: - List of items connected to the given item. - Must be implemented in subclass - """ - raise NotImplementedError("To be implemented in subclass") - - def list_traverse(self, *args, **kwargs): - """ - :return: IterableList with the results of the traversal as produced by - traverse()""" - out = IterableList(self._id_attribute_) - out.extend(self.traverse(*args, **kwargs)) - return out - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = True, ignore_self=1, as_edge = False ): - """:return: iterator yieling of items found when traversing self - - :param predicate: f(i,d) returns False if item i at depth d should not be included in the result - - :param prune: - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. - - :param depth: - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predessessors/successors - - :param branch_first: - if True, items will be returned branch first, otherwise depth first - - :param visit_once: - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. - - :param ignore_self: - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None - - :param as_edge: - if True, return a pair of items, first being the source, second the - destinatination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - visited = set() - stack = Deque() - stack.append( ( 0 ,self, None ) ) # self is always depth level 0 - - def addToStack( stack, item, branch_first, depth ): - lst = self._get_intermediate_items( item ) - if not lst: - return - if branch_first: - stack.extendleft( ( depth , i, item ) for i in lst ) - else: - reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) - stack.extend( reviter ) - # END addToStack local method - - while stack: - d, item, src = stack.pop() # depth of item, item, item_source - - if visit_once and item in visited: - continue - - if visit_once: - visited.add(item) - - rval = ( as_edge and (src, item) ) or item - if prune( rval, d ): - continue - - skipStartItem = ignore_self and ( item is self ) - if not skipStartItem and predicate( rval, d ): - yield rval - - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: - continue - - addToStack( stack, item, branch_first, nd ) - # END for each item on work stack - - -class Serializable(object): - """Defines methods to serialize and deserialize objects from and into a data stream""" - - def _serialize(self, stream): - """Serialize the data of this object into the given data stream - :note: a serialized object would ``_deserialize`` into the same objet - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") - - def _deserialize(self, stream): - """Deserialize all information regarding this object from the stream - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") diff --git a/lib/git/odict.py b/lib/git/odict.py deleted file mode 100644 index 2c8391d7..00000000 --- a/lib/git/odict.py +++ /dev/null @@ -1,1399 +0,0 @@ -# odict.py -# An Ordered Dictionary object -# Copyright (C) 2005 Nicola Larosa, Michael Foord -# E-mail: nico AT tekNico DOT net, fuzzyman AT voidspace DOT org DOT uk - -# This software is licensed under the terms of the BSD license. -# http://www.voidspace.org.uk/python/license.shtml -# Basically you're free to copy, modify, distribute and relicense it, -# So long as you keep a copy of the license with it. - -# Documentation at http://www.voidspace.org.uk/python/odict.html -# For information about bugfixes, updates and support, please join the -# Pythonutils mailing list: -# http://groups.google.com/group/pythonutils/ -# Comments, suggestions and bug reports welcome. - -"""A dict that keeps keys in insertion order""" -from __future__ import generators - -__author__ = ('Nicola Larosa <nico-NoSp@m-tekNico.net>,' - 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>') - -__docformat__ = "restructuredtext en" - -__revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $' - -__version__ = '0.2.2' - -__all__ = ['OrderedDict', 'SequenceOrderedDict'] - -import sys -INTP_VER = sys.version_info[:2] -if INTP_VER < (2, 2): - raise RuntimeError("Python v.2.2 or later required") - -import types, warnings - -class OrderedDict(dict): - """ - A class of dictionary that keeps the insertion order of keys. - - All appropriate methods return keys, items, or values in an ordered way. - - All normal dictionary methods are available. Update and comparison is - restricted to other OrderedDict objects. - - Various sequence methods are available, including the ability to explicitly - mutate the key ordering. - - __contains__ tests: - - >>> d = OrderedDict(((1, 3),)) - >>> 1 in d - 1 - >>> 4 in d - 0 - - __getitem__ tests: - - >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[2] - 1 - >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[4] - Traceback (most recent call last): - KeyError: 4 - - __len__ tests: - - >>> len(OrderedDict()) - 0 - >>> len(OrderedDict(((1, 3), (3, 2), (2, 1)))) - 3 - - get tests: - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.get(1) - 3 - >>> d.get(4) is None - 1 - >>> d.get(4, 5) - 5 - >>> d - OrderedDict([(1, 3), (3, 2), (2, 1)]) - - has_key tests: - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.has_key(1) - 1 - >>> d.has_key(4) - 0 - """ - - def __init__(self, init_val=(), strict=False): - """ - Create a new ordered dictionary. Cannot init from a normal dict, - nor from kwargs, since items order is undefined in those cases. - - If the ``strict`` keyword argument is ``True`` (``False`` is the - default) then when doing slice assignment - the ``OrderedDict`` you are - assigning from *must not* contain any keys in the remaining dict. - - >>> OrderedDict() - OrderedDict([]) - >>> OrderedDict({1: 1}) - Traceback (most recent call last): - TypeError: undefined order, cannot get items from dict - >>> OrderedDict({1: 1}.items()) - OrderedDict([(1, 1)]) - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d - OrderedDict([(1, 3), (3, 2), (2, 1)]) - >>> OrderedDict(d) - OrderedDict([(1, 3), (3, 2), (2, 1)]) - """ - self.strict = strict - dict.__init__(self) - if isinstance(init_val, OrderedDict): - self._sequence = init_val.keys() - dict.update(self, init_val) - elif isinstance(init_val, dict): - # we lose compatibility with other ordered dict types this way - raise TypeError('undefined order, cannot get items from dict') - else: - self._sequence = [] - self.update(init_val) - -### Special methods ### - - def __delitem__(self, key): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> del d[3] - >>> d - OrderedDict([(1, 3), (2, 1)]) - >>> del d[3] - Traceback (most recent call last): - KeyError: 3 - >>> d[3] = 2 - >>> d - OrderedDict([(1, 3), (2, 1), (3, 2)]) - >>> del d[0:1] - >>> d - OrderedDict([(2, 1), (3, 2)]) - """ - if isinstance(key, types.SliceType): - # FIXME: efficiency? - keys = self._sequence[key] - for entry in keys: - dict.__delitem__(self, entry) - del self._sequence[key] - else: - # do the dict.__delitem__ *first* as it raises - # the more appropriate error - dict.__delitem__(self, key) - self._sequence.remove(key) - - def __eq__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d == OrderedDict(d) - True - >>> d == OrderedDict(((1, 3), (2, 1), (3, 2))) - False - >>> d == OrderedDict(((1, 0), (3, 2), (2, 1))) - False - >>> d == OrderedDict(((0, 3), (3, 2), (2, 1))) - False - >>> d == dict(d) - False - >>> d == False - False - """ - if isinstance(other, OrderedDict): - # FIXME: efficiency? - # Generate both item lists for each compare - return (self.items() == other.items()) - else: - return False - - def __lt__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) - >>> c < d - True - >>> d < c - False - >>> d < dict(c) - Traceback (most recent call last): - TypeError: Can only compare with other OrderedDicts - """ - if not isinstance(other, OrderedDict): - raise TypeError('Can only compare with other OrderedDicts') - # FIXME: efficiency? - # Generate both item lists for each compare - return (self.items() < other.items()) - - def __le__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) - >>> e = OrderedDict(d) - >>> c <= d - True - >>> d <= c - False - >>> d <= dict(c) - Traceback (most recent call last): - TypeError: Can only compare with other OrderedDicts - >>> d <= e - True - """ - if not isinstance(other, OrderedDict): - raise TypeError('Can only compare with other OrderedDicts') - # FIXME: efficiency? - # Generate both item lists for each compare - return (self.items() <= other.items()) - - def __ne__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d != OrderedDict(d) - False - >>> d != OrderedDict(((1, 3), (2, 1), (3, 2))) - True - >>> d != OrderedDict(((1, 0), (3, 2), (2, 1))) - True - >>> d == OrderedDict(((0, 3), (3, 2), (2, 1))) - False - >>> d != dict(d) - True - >>> d != False - True - """ - if isinstance(other, OrderedDict): - # FIXME: efficiency? - # Generate both item lists for each compare - return not (self.items() == other.items()) - else: - return True - - def __gt__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) - >>> d > c - True - >>> c > d - False - >>> d > dict(c) - Traceback (most recent call last): - TypeError: Can only compare with other OrderedDicts - """ - if not isinstance(other, OrderedDict): - raise TypeError('Can only compare with other OrderedDicts') - # FIXME: efficiency? - # Generate both item lists for each compare - return (self.items() > other.items()) - - def __ge__(self, other): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> c = OrderedDict(((0, 3), (3, 2), (2, 1))) - >>> e = OrderedDict(d) - >>> c >= d - False - >>> d >= c - True - >>> d >= dict(c) - Traceback (most recent call last): - TypeError: Can only compare with other OrderedDicts - >>> e >= d - True - """ - if not isinstance(other, OrderedDict): - raise TypeError('Can only compare with other OrderedDicts') - # FIXME: efficiency? - # Generate both item lists for each compare - return (self.items() >= other.items()) - - def __repr__(self): - """ - Used for __repr__ and __str__ - - >>> r1 = repr(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f')))) - >>> r1 - "OrderedDict([('a', 'b'), ('c', 'd'), ('e', 'f')])" - >>> r2 = repr(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd')))) - >>> r2 - "OrderedDict([('a', 'b'), ('e', 'f'), ('c', 'd')])" - >>> r1 == str(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f')))) - True - >>> r2 == str(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd')))) - True - """ - return '%s([%s])' % (self.__class__.__name__, ', '.join( - ['(%r, %r)' % (key, self[key]) for key in self._sequence])) - - def __setitem__(self, key, val): - """ - Allows slice assignment, so long as the slice is an OrderedDict - >>> d = OrderedDict() - >>> d['a'] = 'b' - >>> d['b'] = 'a' - >>> d[3] = 12 - >>> d - OrderedDict([('a', 'b'), ('b', 'a'), (3, 12)]) - >>> d[:] = OrderedDict(((1, 2), (2, 3), (3, 4))) - >>> d - OrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> d[::2] = OrderedDict(((7, 8), (9, 10))) - >>> d - OrderedDict([(7, 8), (2, 3), (9, 10)]) - >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4))) - >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8))) - >>> d - OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)]) - >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)), strict=True) - >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8))) - >>> d - OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)]) - - >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)), strict=True) - >>> a[3] = 4 - >>> a - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]) - Traceback (most recent call last): - ValueError: slice assignment must be from unique keys - >>> a = OrderedDict(((0, 1), (1, 2), (2, 3))) - >>> a[3] = 4 - >>> a - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a - OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a[::-1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> a - OrderedDict([(3, 4), (2, 3), (1, 2), (0, 1)]) - - >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> d[:1] = 3 - Traceback (most recent call last): - TypeError: slice assignment requires an OrderedDict - - >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) - >>> d[:1] = OrderedDict([(9, 8)]) - >>> d - OrderedDict([(9, 8), (1, 2), (2, 3), (3, 4)]) - """ - if isinstance(key, types.SliceType): - if not isinstance(val, OrderedDict): - # FIXME: allow a list of tuples? - raise TypeError('slice assignment requires an OrderedDict') - keys = self._sequence[key] - # NOTE: Could use ``range(*key.indices(len(self._sequence)))`` - indexes = range(len(self._sequence))[key] - if key.step is None: - # NOTE: new slice may not be the same size as the one being - # overwritten ! - # NOTE: What is the algorithm for an impossible slice? - # e.g. d[5:3] - pos = key.start or 0 - del self[key] - newkeys = val.keys() - for k in newkeys: - if k in self: - if self.strict: - raise ValueError('slice assignment must be from ' - 'unique keys') - else: - # NOTE: This removes duplicate keys *first* - # so start position might have changed? - del self[k] - self._sequence = (self._sequence[:pos] + newkeys + - self._sequence[pos:]) - dict.update(self, val) - else: - # extended slice - length of new slice must be the same - # as the one being replaced - if len(keys) != len(val): - raise ValueError('attempt to assign sequence of size %s ' - 'to extended slice of size %s' % (len(val), len(keys))) - # FIXME: efficiency? - del self[key] - item_list = zip(indexes, val.items()) - # smallest indexes first - higher indexes not guaranteed to - # exist - item_list.sort() - for pos, (newkey, newval) in item_list: - if self.strict and newkey in self: - raise ValueError('slice assignment must be from unique' - ' keys') - self.insert(pos, newkey, newval) - else: - if key not in self: - self._sequence.append(key) - dict.__setitem__(self, key, val) - - def __getitem__(self, key): - """ - Allows slicing. Returns an OrderedDict if you slice. - >>> b = OrderedDict([(7, 0), (6, 1), (5, 2), (4, 3), (3, 4), (2, 5), (1, 6)]) - >>> b[::-1] - OrderedDict([(1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 0)]) - >>> b[2:5] - OrderedDict([(5, 2), (4, 3), (3, 4)]) - >>> type(b[2:4]) - <class '__main__.OrderedDict'> - """ - if isinstance(key, types.SliceType): - # FIXME: does this raise the error we want? - keys = self._sequence[key] - # FIXME: efficiency? - return OrderedDict([(entry, self[entry]) for entry in keys]) - else: - return dict.__getitem__(self, key) - - __str__ = __repr__ - - def __setattr__(self, name, value): - """ - Implemented so that accesses to ``sequence`` raise a warning and are - diverted to the new ``setkeys`` method. - """ - if name == 'sequence': - warnings.warn('Use of the sequence attribute is deprecated.' - ' Use the keys method instead.', DeprecationWarning) - # NOTE: doesn't return anything - self.setkeys(value) - else: - # FIXME: do we want to allow arbitrary setting of attributes? - # Or do we want to manage it? - object.__setattr__(self, name, value) - - def __getattr__(self, name): - """ - Implemented so that access to ``sequence`` raises a warning. - - >>> d = OrderedDict() - >>> d.sequence - [] - """ - if name == 'sequence': - warnings.warn('Use of the sequence attribute is deprecated.' - ' Use the keys method instead.', DeprecationWarning) - # NOTE: Still (currently) returns a direct reference. Need to - # because code that uses sequence will expect to be able to - # mutate it in place. - return self._sequence - else: - # raise the appropriate error - raise AttributeError("OrderedDict has no '%s' attribute" % name) - - def __deepcopy__(self, memo): - """ - To allow deepcopy to work with OrderedDict. - - >>> from copy import deepcopy - >>> a = OrderedDict([(1, 1), (2, 2), (3, 3)]) - >>> a['test'] = {} - >>> b = deepcopy(a) - >>> b == a - True - >>> b is a - False - >>> a['test'] is b['test'] - False - """ - from copy import deepcopy - return self.__class__(deepcopy(self.items(), memo), self.strict) - - -### Read-only methods ### - - def copy(self): - """ - >>> OrderedDict(((1, 3), (3, 2), (2, 1))).copy() - OrderedDict([(1, 3), (3, 2), (2, 1)]) - """ - return OrderedDict(self) - - def items(self): - """ - ``items`` returns a list of tuples representing all the - ``(key, value)`` pairs in the dictionary. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.items() - [(1, 3), (3, 2), (2, 1)] - >>> d.clear() - >>> d.items() - [] - """ - return zip(self._sequence, self.values()) - - def keys(self): - """ - Return a list of keys in the ``OrderedDict``. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.keys() - [1, 3, 2] - """ - return self._sequence[:] - - def values(self, values=None): - """ - Return a list of all the values in the OrderedDict. - - Optionally you can pass in a list of values, which will replace the - current list. The value list must be the same len as the OrderedDict. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.values() - [3, 2, 1] - """ - return [self[key] for key in self._sequence] - - def iteritems(self): - """ - >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iteritems() - >>> ii.next() - (1, 3) - >>> ii.next() - (3, 2) - >>> ii.next() - (2, 1) - >>> ii.next() - Traceback (most recent call last): - StopIteration - """ - def make_iter(self=self): - keys = self.iterkeys() - while True: - key = keys.next() - yield (key, self[key]) - return make_iter() - - def iterkeys(self): - """ - >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iterkeys() - >>> ii.next() - 1 - >>> ii.next() - 3 - >>> ii.next() - 2 - >>> ii.next() - Traceback (most recent call last): - StopIteration - """ - return iter(self._sequence) - - __iter__ = iterkeys - - def itervalues(self): - """ - >>> iv = OrderedDict(((1, 3), (3, 2), (2, 1))).itervalues() - >>> iv.next() - 3 - >>> iv.next() - 2 - >>> iv.next() - 1 - >>> iv.next() - Traceback (most recent call last): - StopIteration - """ - def make_iter(self=self): - keys = self.iterkeys() - while True: - yield self[keys.next()] - return make_iter() - -### Read-write methods ### - - def clear(self): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.clear() - >>> d - OrderedDict([]) - """ - dict.clear(self) - self._sequence = [] - - def pop(self, key, *args): - """ - No dict.pop in Python 2.2, gotta reimplement it - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.pop(3) - 2 - >>> d - OrderedDict([(1, 3), (2, 1)]) - >>> d.pop(4) - Traceback (most recent call last): - KeyError: 4 - >>> d.pop(4, 0) - 0 - >>> d.pop(4, 0, 1) - Traceback (most recent call last): - TypeError: pop expected at most 2 arguments, got 3 - """ - if len(args) > 1: - raise TypeError, ('pop expected at most 2 arguments, got %s' % - (len(args) + 1)) - if key in self: - val = self[key] - del self[key] - else: - try: - val = args[0] - except IndexError: - raise KeyError(key) - return val - - def popitem(self, i=-1): - """ - Delete and return an item specified by index, not a random one as in - dict. The index is -1 by default (the last item). - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.popitem() - (2, 1) - >>> d - OrderedDict([(1, 3), (3, 2)]) - >>> d.popitem(0) - (1, 3) - >>> OrderedDict().popitem() - Traceback (most recent call last): - KeyError: 'popitem(): dictionary is empty' - >>> d.popitem(2) - Traceback (most recent call last): - IndexError: popitem(): index 2 not valid - """ - if not self._sequence: - raise KeyError('popitem(): dictionary is empty') - try: - key = self._sequence[i] - except IndexError: - raise IndexError('popitem(): index %s not valid' % i) - return (key, self.pop(key)) - - def setdefault(self, key, defval = None): - """ - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.setdefault(1) - 3 - >>> d.setdefault(4) is None - True - >>> d - OrderedDict([(1, 3), (3, 2), (2, 1), (4, None)]) - >>> d.setdefault(5, 0) - 0 - >>> d - OrderedDict([(1, 3), (3, 2), (2, 1), (4, None), (5, 0)]) - """ - if key in self: - return self[key] - else: - self[key] = defval - return defval - - def update(self, from_od): - """ - Update from another OrderedDict or sequence of (key, value) pairs - - >>> d = OrderedDict(((1, 0), (0, 1))) - >>> d.update(OrderedDict(((1, 3), (3, 2), (2, 1)))) - >>> d - OrderedDict([(1, 3), (0, 1), (3, 2), (2, 1)]) - >>> d.update({4: 4}) - Traceback (most recent call last): - TypeError: undefined order, cannot get items from dict - >>> d.update((4, 4)) - Traceback (most recent call last): - TypeError: cannot convert dictionary update sequence element "4" to a 2-item sequence - """ - if isinstance(from_od, OrderedDict): - for key, val in from_od.items(): - self[key] = val - elif isinstance(from_od, dict): - # we lose compatibility with other ordered dict types this way - raise TypeError('undefined order, cannot get items from dict') - else: - # FIXME: efficiency? - # sequence of 2-item sequences, or error - for item in from_od: - try: - key, val = item - except TypeError: - raise TypeError('cannot convert dictionary update' - ' sequence element "%s" to a 2-item sequence' % item) - self[key] = val - - def rename(self, old_key, new_key): - """ - Rename the key for a given value, without modifying sequence order. - - For the case where new_key already exists this raise an exception, - since if new_key exists, it is ambiguous as to what happens to the - associated values, and the position of new_key in the sequence. - - >>> od = OrderedDict() - >>> od['a'] = 1 - >>> od['b'] = 2 - >>> od.items() - [('a', 1), ('b', 2)] - >>> od.rename('b', 'c') - >>> od.items() - [('a', 1), ('c', 2)] - >>> od.rename('c', 'a') - Traceback (most recent call last): - ValueError: New key already exists: 'a' - >>> od.rename('d', 'b') - Traceback (most recent call last): - KeyError: 'd' - """ - if new_key == old_key: - # no-op - return - if new_key in self: - raise ValueError("New key already exists: %r" % new_key) - # rename sequence entry - value = self[old_key] - old_idx = self._sequence.index(old_key) - self._sequence[old_idx] = new_key - # rename internal dict entry - dict.__delitem__(self, old_key) - dict.__setitem__(self, new_key, value) - - def setitems(self, items): - """ - This method allows you to set the items in the dict. - - It takes a list of tuples - of the same sort returned by the ``items`` - method. - - >>> d = OrderedDict() - >>> d.setitems(((3, 1), (2, 3), (1, 2))) - >>> d - OrderedDict([(3, 1), (2, 3), (1, 2)]) - """ - self.clear() - # FIXME: this allows you to pass in an OrderedDict as well :-) - self.update(items) - - def setkeys(self, keys): - """ - ``setkeys`` all ows you to pass in a new list of keys which will - replace the current set. This must contain the same set of keys, but - need not be in the same order. - - If you pass in new keys that don't match, a ``KeyError`` will be - raised. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.keys() - [1, 3, 2] - >>> d.setkeys((1, 2, 3)) - >>> d - OrderedDict([(1, 3), (2, 1), (3, 2)]) - >>> d.setkeys(['a', 'b', 'c']) - Traceback (most recent call last): - KeyError: 'Keylist is not the same as current keylist.' - """ - # FIXME: Efficiency? (use set for Python 2.4 :-) - # NOTE: list(keys) rather than keys[:] because keys[:] returns - # a tuple, if keys is a tuple. - kcopy = list(keys) - kcopy.sort() - self._sequence.sort() - if kcopy != self._sequence: - raise KeyError('Keylist is not the same as current keylist.') - # NOTE: This makes the _sequence attribute a new object, instead - # of changing it in place. - # FIXME: efficiency? - self._sequence = list(keys) - - def setvalues(self, values): - """ - You can pass in a list of values, which will replace the - current list. The value list must be the same len as the OrderedDict. - - (Or a ``ValueError`` is raised.) - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.setvalues((1, 2, 3)) - >>> d - OrderedDict([(1, 1), (3, 2), (2, 3)]) - >>> d.setvalues([6]) - Traceback (most recent call last): - ValueError: Value list is not the same length as the OrderedDict. - """ - if len(values) != len(self): - # FIXME: correct error to raise? - raise ValueError('Value list is not the same length as the ' - 'OrderedDict.') - self.update(zip(self, values)) - -### Sequence Methods ### - - def index(self, key): - """ - Return the position of the specified key in the OrderedDict. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.index(3) - 1 - >>> d.index(4) - Traceback (most recent call last): - ValueError: list.index(x): x not in list - """ - return self._sequence.index(key) - - def insert(self, index, key, value): - """ - Takes ``index``, ``key``, and ``value`` as arguments. - - Sets ``key`` to ``value``, so that ``key`` is at position ``index`` in - the OrderedDict. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.insert(0, 4, 0) - >>> d - OrderedDict([(4, 0), (1, 3), (3, 2), (2, 1)]) - >>> d.insert(0, 2, 1) - >>> d - OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2)]) - >>> d.insert(8, 8, 1) - >>> d - OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2), (8, 1)]) - """ - if key in self: - # FIXME: efficiency? - del self[key] - self._sequence.insert(index, key) - dict.__setitem__(self, key, value) - - def reverse(self): - """ - Reverse the order of the OrderedDict. - - >>> d = OrderedDict(((1, 3), (3, 2), (2, 1))) - >>> d.reverse() - >>> d - OrderedDict([(2, 1), (3, 2), (1, 3)]) - """ - self._sequence.reverse() - - def sort(self, *args, **kwargs): - """ - Sort the key order in the OrderedDict. - - This method takes the same arguments as the ``list.sort`` method on - your version of Python. - - >>> d = OrderedDict(((4, 1), (2, 2), (3, 3), (1, 4))) - >>> d.sort() - >>> d - OrderedDict([(1, 4), (2, 2), (3, 3), (4, 1)]) - """ - self._sequence.sort(*args, **kwargs) - -class Keys(object): - # FIXME: should this object be a subclass of list? - """ - Custom object for accessing the keys of an OrderedDict. - - Can be called like the normal ``OrderedDict.keys`` method, but also - supports indexing and sequence methods. - """ - - def __init__(self, main): - self._main = main - - def __call__(self): - """Pretend to be the keys method.""" - return self._main._keys() - - def __getitem__(self, index): - """Fetch the key at position i.""" - # NOTE: this automatically supports slicing :-) - return self._main._sequence[index] - - def __setitem__(self, index, name): - """ - You cannot assign to keys, but you can do slice assignment to re-order - them. - - You can only do slice assignment if the new set of keys is a reordering - of the original set. - """ - if isinstance(index, types.SliceType): - # FIXME: efficiency? - # check length is the same - indexes = range(len(self._main._sequence))[index] - if len(indexes) != len(name): - raise ValueError('attempt to assign sequence of size %s ' - 'to slice of size %s' % (len(name), len(indexes))) - # check they are the same keys - # FIXME: Use set - old_keys = self._main._sequence[index] - new_keys = list(name) - old_keys.sort() - new_keys.sort() - if old_keys != new_keys: - raise KeyError('Keylist is not the same as current keylist.') - orig_vals = [self._main[k] for k in name] - del self._main[index] - vals = zip(indexes, name, orig_vals) - vals.sort() - for i, k, v in vals: - if self._main.strict and k in self._main: - raise ValueError('slice assignment must be from ' - 'unique keys') - self._main.insert(i, k, v) - else: - raise ValueError('Cannot assign to keys') - - ### following methods pinched from UserList and adapted ### - def __repr__(self): return repr(self._main._sequence) - - # FIXME: do we need to check if we are comparing with another ``Keys`` - # object? (like the __cast method of UserList) - def __lt__(self, other): return self._main._sequence < other - def __le__(self, other): return self._main._sequence <= other - def __eq__(self, other): return self._main._sequence == other - def __ne__(self, other): return self._main._sequence != other - def __gt__(self, other): return self._main._sequence > other - def __ge__(self, other): return self._main._sequence >= other - # FIXME: do we need __cmp__ as well as rich comparisons? - def __cmp__(self, other): return cmp(self._main._sequence, other) - - def __contains__(self, item): return item in self._main._sequence - def __len__(self): return len(self._main._sequence) - def __iter__(self): return self._main.iterkeys() - def count(self, item): return self._main._sequence.count(item) - def index(self, item, *args): return self._main._sequence.index(item, *args) - def reverse(self): self._main._sequence.reverse() - def sort(self, *args, **kwds): self._main._sequence.sort(*args, **kwds) - def __mul__(self, n): return self._main._sequence*n - __rmul__ = __mul__ - def __add__(self, other): return self._main._sequence + other - def __radd__(self, other): return other + self._main._sequence - - ## following methods not implemented for keys ## - def __delitem__(self, i): raise TypeError('Can\'t delete items from keys') - def __iadd__(self, other): raise TypeError('Can\'t add in place to keys') - def __imul__(self, n): raise TypeError('Can\'t multiply keys in place') - def append(self, item): raise TypeError('Can\'t append items to keys') - def insert(self, i, item): raise TypeError('Can\'t insert items into keys') - def pop(self, i=-1): raise TypeError('Can\'t pop items from keys') - def remove(self, item): raise TypeError('Can\'t remove items from keys') - def extend(self, other): raise TypeError('Can\'t extend keys') - -class Items(object): - """ - Custom object for accessing the items of an OrderedDict. - - Can be called like the normal ``OrderedDict.items`` method, but also - supports indexing and sequence methods. - """ - - def __init__(self, main): - self._main = main - - def __call__(self): - """Pretend to be the items method.""" - return self._main._items() - - def __getitem__(self, index): - """Fetch the item at position i.""" - if isinstance(index, types.SliceType): - # fetching a slice returns an OrderedDict - return self._main[index].items() - key = self._main._sequence[index] - return (key, self._main[key]) - - def __setitem__(self, index, item): - """Set item at position i to item.""" - if isinstance(index, types.SliceType): - # NOTE: item must be an iterable (list of tuples) - self._main[index] = OrderedDict(item) - else: - # FIXME: Does this raise a sensible error? - orig = self._main.keys[index] - key, value = item - if self._main.strict and key in self and (key != orig): - raise ValueError('slice assignment must be from ' - 'unique keys') - # delete the current one - del self._main[self._main._sequence[index]] - self._main.insert(index, key, value) - - def __delitem__(self, i): - """Delete the item at position i.""" - key = self._main._sequence[i] - if isinstance(i, types.SliceType): - for k in key: - # FIXME: efficiency? - del self._main[k] - else: - del self._main[key] - - ### following methods pinched from UserList and adapted ### - def __repr__(self): return repr(self._main.items()) - - # FIXME: do we need to check if we are comparing with another ``Items`` - # object? (like the __cast method of UserList) - def __lt__(self, other): return self._main.items() < other - def __le__(self, other): return self._main.items() <= other - def __eq__(self, other): return self._main.items() == other - def __ne__(self, other): return self._main.items() != other - def __gt__(self, other): return self._main.items() > other - def __ge__(self, other): return self._main.items() >= other - def __cmp__(self, other): return cmp(self._main.items(), other) - - def __contains__(self, item): return item in self._main.items() - def __len__(self): return len(self._main._sequence) # easier :-) - def __iter__(self): return self._main.iteritems() - def count(self, item): return self._main.items().count(item) - def index(self, item, *args): return self._main.items().index(item, *args) - def reverse(self): self._main.reverse() - def sort(self, *args, **kwds): self._main.sort(*args, **kwds) - def __mul__(self, n): return self._main.items()*n - __rmul__ = __mul__ - def __add__(self, other): return self._main.items() + other - def __radd__(self, other): return other + self._main.items() - - def append(self, item): - """Add an item to the end.""" - # FIXME: this is only append if the key isn't already present - key, value = item - self._main[key] = value - - def insert(self, i, item): - key, value = item - self._main.insert(i, key, value) - - def pop(self, i=-1): - key = self._main._sequence[i] - return (key, self._main.pop(key)) - - def remove(self, item): - key, value = item - try: - assert value == self._main[key] - except (KeyError, AssertionError): - raise ValueError('ValueError: list.remove(x): x not in list') - else: - del self._main[key] - - def extend(self, other): - # FIXME: is only a true extend if none of the keys already present - for item in other: - key, value = item - self._main[key] = value - - def __iadd__(self, other): - self.extend(other) - - ## following methods not implemented for items ## - - def __imul__(self, n): raise TypeError('Can\'t multiply items in place') - -class Values(object): - """ - Custom object for accessing the values of an OrderedDict. - - Can be called like the normal ``OrderedDict.values`` method, but also - supports indexing and sequence methods. - """ - - def __init__(self, main): - self._main = main - - def __call__(self): - """Pretend to be the values method.""" - return self._main._values() - - def __getitem__(self, index): - """Fetch the value at position i.""" - if isinstance(index, types.SliceType): - return [self._main[key] for key in self._main._sequence[index]] - else: - return self._main[self._main._sequence[index]] - - def __setitem__(self, index, value): - """ - Set the value at position i to value. - - You can only do slice assignment to values if you supply a sequence of - equal length to the slice you are replacing. - """ - if isinstance(index, types.SliceType): - keys = self._main._sequence[index] - if len(keys) != len(value): - raise ValueError('attempt to assign sequence of size %s ' - 'to slice of size %s' % (len(name), len(keys))) - # FIXME: efficiency? Would be better to calculate the indexes - # directly from the slice object - # NOTE: the new keys can collide with existing keys (or even - # contain duplicates) - these will overwrite - for key, val in zip(keys, value): - self._main[key] = val - else: - self._main[self._main._sequence[index]] = value - - ### following methods pinched from UserList and adapted ### - def __repr__(self): return repr(self._main.values()) - - # FIXME: do we need to check if we are comparing with another ``Values`` - # object? (like the __cast method of UserList) - def __lt__(self, other): return self._main.values() < other - def __le__(self, other): return self._main.values() <= other - def __eq__(self, other): return self._main.values() == other - def __ne__(self, other): return self._main.values() != other - def __gt__(self, other): return self._main.values() > other - def __ge__(self, other): return self._main.values() >= other - def __cmp__(self, other): return cmp(self._main.values(), other) - - def __contains__(self, item): return item in self._main.values() - def __len__(self): return len(self._main._sequence) # easier :-) - def __iter__(self): return self._main.itervalues() - def count(self, item): return self._main.values().count(item) - def index(self, item, *args): return self._main.values().index(item, *args) - - def reverse(self): - """Reverse the values""" - vals = self._main.values() - vals.reverse() - # FIXME: efficiency - self[:] = vals - - def sort(self, *args, **kwds): - """Sort the values.""" - vals = self._main.values() - vals.sort(*args, **kwds) - self[:] = vals - - def __mul__(self, n): return self._main.values()*n - __rmul__ = __mul__ - def __add__(self, other): return self._main.values() + other - def __radd__(self, other): return other + self._main.values() - - ## following methods not implemented for values ## - def __delitem__(self, i): raise TypeError('Can\'t delete items from values') - def __iadd__(self, other): raise TypeError('Can\'t add in place to values') - def __imul__(self, n): raise TypeError('Can\'t multiply values in place') - def append(self, item): raise TypeError('Can\'t append items to values') - def insert(self, i, item): raise TypeError('Can\'t insert items into values') - def pop(self, i=-1): raise TypeError('Can\'t pop items from values') - def remove(self, item): raise TypeError('Can\'t remove items from values') - def extend(self, other): raise TypeError('Can\'t extend values') - -class SequenceOrderedDict(OrderedDict): - """ - Experimental version of OrderedDict that has a custom object for ``keys``, - ``values``, and ``items``. - - These are callable sequence objects that work as methods, or can be - manipulated directly as sequences. - - Test for ``keys``, ``items`` and ``values``. - - >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) - >>> d - SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> d.keys - [1, 2, 3] - >>> d.keys() - [1, 2, 3] - >>> d.setkeys((3, 2, 1)) - >>> d - SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) - >>> d.setkeys((1, 2, 3)) - >>> d.keys[0] - 1 - >>> d.keys[:] - [1, 2, 3] - >>> d.keys[-1] - 3 - >>> d.keys[-2] - 2 - >>> d.keys[0:2] = [2, 1] - >>> d - SequenceOrderedDict([(2, 3), (1, 2), (3, 4)]) - >>> d.keys.reverse() - >>> d.keys - [3, 1, 2] - >>> d.keys = [1, 2, 3] - >>> d - SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> d.keys = [3, 1, 2] - >>> d - SequenceOrderedDict([(3, 4), (1, 2), (2, 3)]) - >>> a = SequenceOrderedDict() - >>> b = SequenceOrderedDict() - >>> a.keys == b.keys - 1 - >>> a['a'] = 3 - >>> a.keys == b.keys - 0 - >>> b['a'] = 3 - >>> a.keys == b.keys - 1 - >>> b['b'] = 3 - >>> a.keys == b.keys - 0 - >>> a.keys > b.keys - 0 - >>> a.keys < b.keys - 1 - >>> 'a' in a.keys - 1 - >>> len(b.keys) - 2 - >>> 'c' in d.keys - 0 - >>> 1 in d.keys - 1 - >>> [v for v in d.keys] - [3, 1, 2] - >>> d.keys.sort() - >>> d.keys - [1, 2, 3] - >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)), strict=True) - >>> d.keys[::-1] = [1, 2, 3] - >>> d - SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) - >>> d.keys[:2] - [3, 2] - >>> d.keys[:2] = [1, 3] - Traceback (most recent call last): - KeyError: 'Keylist is not the same as current keylist.' - - >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) - >>> d - SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> d.values - [2, 3, 4] - >>> d.values() - [2, 3, 4] - >>> d.setvalues((4, 3, 2)) - >>> d - SequenceOrderedDict([(1, 4), (2, 3), (3, 2)]) - >>> d.values[::-1] - [2, 3, 4] - >>> d.values[0] - 4 - >>> d.values[-2] - 3 - >>> del d.values[0] - Traceback (most recent call last): - TypeError: Can't delete items from values - >>> d.values[::2] = [2, 4] - >>> d - SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> 7 in d.values - 0 - >>> len(d.values) - 3 - >>> [val for val in d.values] - [2, 3, 4] - >>> d.values[-1] = 2 - >>> d.values.count(2) - 2 - >>> d.values.index(2) - 0 - >>> d.values[-1] = 7 - >>> d.values - [2, 3, 7] - >>> d.values.reverse() - >>> d.values - [7, 3, 2] - >>> d.values.sort() - >>> d.values - [2, 3, 7] - >>> d.values.append('anything') - Traceback (most recent call last): - TypeError: Can't append items to values - >>> d.values = (1, 2, 3) - >>> d - SequenceOrderedDict([(1, 1), (2, 2), (3, 3)]) - - >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4))) - >>> d - SequenceOrderedDict([(1, 2), (2, 3), (3, 4)]) - >>> d.items() - [(1, 2), (2, 3), (3, 4)] - >>> d.setitems([(3, 4), (2 ,3), (1, 2)]) - >>> d - SequenceOrderedDict([(3, 4), (2, 3), (1, 2)]) - >>> d.items[0] - (3, 4) - >>> d.items[:-1] - [(3, 4), (2, 3)] - >>> d.items[1] = (6, 3) - >>> d.items - [(3, 4), (6, 3), (1, 2)] - >>> d.items[1:2] = [(9, 9)] - >>> d - SequenceOrderedDict([(3, 4), (9, 9), (1, 2)]) - >>> del d.items[1:2] - >>> d - SequenceOrderedDict([(3, 4), (1, 2)]) - >>> (3, 4) in d.items - 1 - >>> (4, 3) in d.items - 0 - >>> len(d.items) - 2 - >>> [v for v in d.items] - [(3, 4), (1, 2)] - >>> d.items.count((3, 4)) - 1 - >>> d.items.index((1, 2)) - 1 - >>> d.items.index((2, 1)) - Traceback (most recent call last): - ValueError: list.index(x): x not in list - >>> d.items.reverse() - >>> d.items - [(1, 2), (3, 4)] - >>> d.items.reverse() - >>> d.items.sort() - >>> d.items - [(1, 2), (3, 4)] - >>> d.items.append((5, 6)) - >>> d.items - [(1, 2), (3, 4), (5, 6)] - >>> d.items.insert(0, (0, 0)) - >>> d.items - [(0, 0), (1, 2), (3, 4), (5, 6)] - >>> d.items.insert(-1, (7, 8)) - >>> d.items - [(0, 0), (1, 2), (3, 4), (7, 8), (5, 6)] - >>> d.items.pop() - (5, 6) - >>> d.items - [(0, 0), (1, 2), (3, 4), (7, 8)] - >>> d.items.remove((1, 2)) - >>> d.items - [(0, 0), (3, 4), (7, 8)] - >>> d.items.extend([(1, 2), (5, 6)]) - >>> d.items - [(0, 0), (3, 4), (7, 8), (1, 2), (5, 6)] - """ - - def __init__(self, init_val=(), strict=True): - OrderedDict.__init__(self, init_val, strict=strict) - self._keys = self.keys - self._values = self.values - self._items = self.items - self.keys = Keys(self) - self.values = Values(self) - self.items = Items(self) - self._att_dict = { - 'keys': self.setkeys, - 'items': self.setitems, - 'values': self.setvalues, - } - - def __setattr__(self, name, value): - """Protect keys, items, and values.""" - if not '_att_dict' in self.__dict__: - object.__setattr__(self, name, value) - else: - try: - fun = self._att_dict[name] - except KeyError: - OrderedDict.__setattr__(self, name, value) - else: - fun(value) - -if __name__ == '__main__': - if INTP_VER < (2, 3): - raise RuntimeError("Tests require Python v.2.3 or later") - # turn off warnings for tests - warnings.filterwarnings('ignore') - # run the code tests in doctest format - import doctest - m = sys.modules.get('__main__') - globs = m.__dict__.copy() - globs.update({ - 'INTP_VER': INTP_VER, - }) - doctest.testmod(m, globs=globs) - diff --git a/lib/git/refs.py b/lib/git/refs.py deleted file mode 100644 index 451cc3a5..00000000 --- a/lib/git/refs.py +++ /dev/null @@ -1,1052 +0,0 @@ -# refs.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" Module containing all ref based objects """ - -import os -from objects import ( - Object, - Commit - ) -from objects.util import get_object_type_by_name -from util import ( - LazyMixin, - Iterable, - join_path, - join_path_native, - to_native_path_linux - ) - -from gitdb.util import ( - join, - dirname, - isdir, - exists, - isfile, - rename, - hex_to_bin - ) - -from config import ( - GitConfigParser, - SectionConstraint - ) - -from exc import GitCommandError - -__all__ = ("SymbolicReference", "Reference", "HEAD", "Head", "TagReference", - "RemoteReference", "Tag" ) - -class SymbolicReference(object): - """Represents a special case of a reference such that this reference is symbolic. - It does not point to a specific commit, but to another Head, which itself - specifies a commit. - - A typical example for a symbolic reference is HEAD.""" - __slots__ = ("repo", "path") - _common_path_default = "" - _id_attribute_ = "name" - - def __init__(self, repo, path): - self.repo = repo - self.path = path - - def __str__(self): - return self.path - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.path) - - def __eq__(self, other): - return self.path == other.path - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.path) - - @property - def name(self): - """ - :return: - In case of symbolic references, the shortest assumable name - is the path itself.""" - return self.path - - def _abs_path(self): - return join_path_native(self.repo.git_dir, self.path) - - @classmethod - def _get_packed_refs_path(cls, repo): - return join(repo.git_dir, 'packed-refs') - - @classmethod - def _iter_packed_refs(cls, repo): - """Returns an iterator yielding pairs of sha1/path pairs for the corresponding refs. - :note: The packed refs file will be kept open as long as we iterate""" - try: - fp = open(cls._get_packed_refs_path(repo), 'r') - for line in fp: - line = line.strip() - if not line: - continue - if line.startswith('#'): - if line.startswith('# pack-refs with:') and not line.endswith('peeled'): - raise TypeError("PackingType of packed-Refs not understood: %r" % line) - # END abort if we do not understand the packing scheme - continue - # END parse comment - - # skip dereferenced tag object entries - previous line was actual - # tag reference for it - if line[0] == '^': - continue - - yield tuple(line.split(' ', 1)) - # END for each line - except (OSError,IOError): - raise StopIteration - # END no packed-refs file handling - # NOTE: Had try-finally block around here to close the fp, - # but some python version woudn't allow yields within that. - # I believe files are closing themselves on destruction, so it is - # alright. - - @classmethod - def dereference_recursive(cls, repo, ref_path): - """ - :return: hexsha stored in the reference at the given ref_path, recursively dereferencing all - intermediate references as required - :param repo: the repository containing the reference at ref_path""" - while True: - ref = cls(repo, ref_path) - hexsha, ref_path = ref._get_ref_info() - if hexsha is not None: - return hexsha - # END recursive dereferencing - - def _get_ref_info(self): - """Return: (sha, target_ref_path) if available, the sha the file at - rela_path points to, or None. target_ref_path is the reference we - point to, or None""" - tokens = None - try: - fp = open(self._abs_path(), 'r') - value = fp.read().rstrip() - fp.close() - tokens = value.split(" ") - except (OSError,IOError): - # Probably we are just packed, find our entry in the packed refs file - # NOTE: We are not a symbolic ref if we are in a packed file, as these - # are excluded explictly - for sha, path in self._iter_packed_refs(self.repo): - if path != self.path: continue - tokens = (sha, path) - break - # END for each packed ref - # END handle packed refs - - if tokens is None: - raise ValueError("Reference at %r does not exist" % self.path) - - # is it a reference ? - if tokens[0] == 'ref:': - return (None, tokens[1]) - - # its a commit - if self.repo.re_hexsha_only.match(tokens[0]): - return (tokens[0], None) - - raise ValueError("Failed to parse reference information from %r" % self.path) - - def _get_commit(self): - """ - :return: - Commit object we point to, works for detached and non-detached - SymbolicReferences""" - # we partially reimplement it to prevent unnecessary file access - hexsha, target_ref_path = self._get_ref_info() - - # it is a detached reference - if hexsha: - return Commit(self.repo, hex_to_bin(hexsha)) - - return self.from_path(self.repo, target_ref_path).commit - - def _set_commit(self, commit): - """Set our commit, possibly dereference our symbolic reference first. - If the reference does not exist, it will be created""" - is_detached = True - try: - is_detached = self.is_detached - except ValueError: - pass - # END handle non-existing ones - if is_detached: - return self._set_reference(commit) - - # set the commit on our reference - self._get_reference().commit = commit - - commit = property(_get_commit, _set_commit, doc="Query or set commits directly") - - def _get_reference(self): - """:return: Reference Object we point to""" - sha, target_ref_path = self._get_ref_info() - if target_ref_path is None: - raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) - return self.from_path(self.repo, target_ref_path) - - def _set_reference(self, ref): - """Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. - Otherwise we try to get a commit from it using our interface. - - Strings are allowed but will be checked to be sure we have a commit""" - write_value = None - if isinstance(ref, SymbolicReference): - write_value = "ref: %s" % ref.path - elif isinstance(ref, Commit): - write_value = ref.hexsha - else: - try: - write_value = ref.commit.hexsha - except AttributeError: - try: - obj = self.repo.rev_parse(ref+"^{}") # optionally deref tags - if obj.type != "commit": - raise TypeError("Invalid object type behind sha: %s" % sha) - write_value = obj.hexsha - except Exception: - raise ValueError("Could not extract object from %s" % ref) - # END end try string - # END try commit attribute - - # maintain the orig-head if we are currently checked-out - head = HEAD(self.repo) - try: - if head.ref == self: - try: - # TODO: implement this atomically, if we fail below, orig_head is at an incorrect spot - # Enforce the creation of ORIG_HEAD - SymbolicReference.create(self.repo, head.orig_head().name, self.commit, force=True) - except ValueError: - pass - #END exception handling - # END if we are checked-out - except TypeError: - pass - # END handle detached heads - - # if we are writing a ref, use symbolic ref to get the reflog and more - # checking - # Otherwise we detach it and have to do it manually. Besides, this works - # recursively automaitcally, but should be replaced with a python implementation - # soon - if write_value.startswith('ref:'): - self.repo.git.symbolic_ref(self.path, write_value[5:]) - return - # END non-detached handling - - path = self._abs_path() - directory = dirname(path) - if not isdir(directory): - os.makedirs(directory) - - fp = open(path, "wb") - try: - fp.write(write_value) - finally: - fp.close() - # END writing - - - # aliased reference - reference = property(_get_reference, _set_reference, doc="Returns the Reference we point to") - ref = reference - - def is_valid(self): - """ - :return: - True if the reference is valid, hence it can be read and points to - a valid object or reference.""" - try: - self.commit - except (OSError, ValueError): - return False - else: - return True - - @property - def is_detached(self): - """ - :return: - True if we are a detached reference, hence we point to a specific commit - instead to another reference""" - try: - self.reference - return False - except TypeError: - return True - - - @classmethod - def to_full_path(cls, path): - """ - :return: string with a full repository-relative path which can be used to initialize - a Reference instance, for instance by using ``Reference.from_path``""" - if isinstance(path, SymbolicReference): - path = path.path - full_ref_path = path - if not cls._common_path_default: - return full_ref_path - if not path.startswith(cls._common_path_default+"/"): - full_ref_path = '%s/%s' % (cls._common_path_default, path) - return full_ref_path - - @classmethod - def delete(cls, repo, path): - """Delete the reference at the given path - - :param repo: - Repository to delete the reference from - - :param path: - Short or full path pointing to the reference, i.e. refs/myreference - or just "myreference", hence 'refs/' is implied. - Alternatively the symbolic reference to be deleted""" - full_ref_path = cls.to_full_path(path) - abs_path = join(repo.git_dir, full_ref_path) - if exists(abs_path): - os.remove(abs_path) - else: - # check packed refs - pack_file_path = cls._get_packed_refs_path(repo) - try: - reader = open(pack_file_path) - except (OSError,IOError): - pass # it didnt exist at all - else: - new_lines = list() - made_change = False - dropped_last_line = False - for line in reader: - # keep line if it is a comment or if the ref to delete is not - # in the line - # If we deleted the last line and this one is a tag-reference object, - # we drop it as well - if ( line.startswith('#') or full_ref_path not in line ) and \ - ( not dropped_last_line or dropped_last_line and not line.startswith('^') ): - new_lines.append(line) - dropped_last_line = False - continue - # END skip comments and lines without our path - - # drop this line - made_change = True - dropped_last_line = True - # END for each line in packed refs - reader.close() - - # write the new lines - if made_change: - open(pack_file_path, 'w').writelines(new_lines) - # END open exception handling - # END handle deletion - - @classmethod - def _create(cls, repo, path, resolve, reference, force): - """internal method used to create a new symbolic reference. - If resolve is False,, the reference will be taken as is, creating - a proper symbolic reference. Otherwise it will be resolved to the - corresponding object and a detached symbolic reference will be created - instead""" - full_ref_path = cls.to_full_path(path) - abs_ref_path = join(repo.git_dir, full_ref_path) - - # figure out target data - target = reference - if resolve: - target = repo.rev_parse(str(reference)) - - if not force and isfile(abs_ref_path): - target_data = str(target) - if isinstance(target, SymbolicReference): - target_data = target.path - if not resolve: - target_data = "ref: " + target_data - if open(abs_ref_path, 'rb').read().strip() != target_data: - raise OSError("Reference at %s does already exist" % full_ref_path) - # END no force handling - - ref = cls(repo, full_ref_path) - ref.reference = target - return ref - - @classmethod - def create(cls, repo, path, reference='HEAD', force=False ): - """Create a new symbolic reference, hence a reference pointing to another reference. - - :param repo: - Repository to create the reference in - - :param path: - full path at which the new symbolic reference is supposed to be - created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" - - :param reference: - The reference to which the new symbolic reference should point to - - :param force: - if True, force creation even if a symbolic reference with that name already exists. - Raise OSError otherwise - - :return: Newly created symbolic Reference - - :raise OSError: - If a (Symbolic)Reference with the same name but different contents - already exists. - - :note: This does not alter the current HEAD, index or Working Tree""" - return cls._create(repo, path, False, reference, force) - - def rename(self, new_path, force=False): - """Rename self to a new path - - :param new_path: - Either a simple name or a full path, i.e. new_name or features/new_name. - The prefix refs/ is implied for references and will be set as needed. - In case this is a symbolic ref, there is no implied prefix - - :param force: - If True, the rename will succeed even if a head with the target name - already exists. It will be overwritten in that case - - :return: self - :raise OSError: In case a file at path but a different contents already exists """ - new_path = self.to_full_path(new_path) - if self.path == new_path: - return self - - new_abs_path = join(self.repo.git_dir, new_path) - cur_abs_path = join(self.repo.git_dir, self.path) - if isfile(new_abs_path): - if not force: - # if they point to the same file, its not an error - if open(new_abs_path,'rb').read().strip() != open(cur_abs_path,'rb').read().strip(): - raise OSError("File at path %r already exists" % new_abs_path) - # else: we could remove ourselves and use the otherone, but - # but clarity we just continue as usual - # END not force handling - os.remove(new_abs_path) - # END handle existing target file - - dname = dirname(new_abs_path) - if not isdir(dname): - os.makedirs(dname) - # END create directory - - rename(cur_abs_path, new_abs_path) - self.path = new_path - - return self - - @classmethod - def _iter_items(cls, repo, common_path = None): - if common_path is None: - common_path = cls._common_path_default - rela_paths = set() - - # walk loose refs - # Currently we do not follow links - for root, dirs, files in os.walk(join_path_native(repo.git_dir, common_path)): - if 'refs/' not in root: # skip non-refs subfolders - refs_id = [ i for i,d in enumerate(dirs) if d == 'refs' ] - if refs_id: - dirs[0:] = ['refs'] - # END prune non-refs folders - - for f in files: - abs_path = to_native_path_linux(join_path(root, f)) - rela_paths.add(abs_path.replace(to_native_path_linux(repo.git_dir) + '/', "")) - # END for each file in root directory - # END for each directory to walk - - # read packed refs - for sha, rela_path in cls._iter_packed_refs(repo): - if rela_path.startswith(common_path): - rela_paths.add(rela_path) - # END relative path matches common path - # END packed refs reading - - # return paths in sorted order - for path in sorted(rela_paths): - try: - yield cls.from_path(repo, path) - except ValueError: - continue - # END for each sorted relative refpath - - @classmethod - def iter_items(cls, repo, common_path = None): - """Find all refs in the repository - - :param repo: is the Repo - - :param common_path: - Optional keyword argument to the path which is to be shared by all - returned Ref objects. - Defaults to class specific portion if None assuring that only - refs suitable for the actual class are returned. - - :return: - git.SymbolicReference[], each of them is guaranteed to be a symbolic - ref which is not detached. - - List is lexigraphically sorted - The returned objects represent actual subclasses, such as Head or TagReference""" - return ( r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached ) - - @classmethod - def from_path(cls, repo, path): - """ - :param path: full .git-directory-relative path name to the Reference to instantiate - :note: use to_full_path() if you only have a partial path of a known Reference Type - :return: - Instance of type Reference, Head, or Tag - depending on the given path""" - if not path: - raise ValueError("Cannot create Reference from %r" % path) - - for ref_type in (HEAD, Head, RemoteReference, TagReference, Reference, SymbolicReference): - try: - instance = ref_type(repo, path) - if instance.__class__ == SymbolicReference and instance.is_detached: - raise ValueError("SymbolRef was detached, we drop it") - return instance - except ValueError: - pass - # END exception handling - # END for each type to try - raise ValueError("Could not find reference type suitable to handle path %r" % path) - - -class Reference(SymbolicReference, LazyMixin, Iterable): - """Represents a named reference to any object. Subclasses may apply restrictions though, - i.e. Heads can only point to commits.""" - __slots__ = tuple() - _common_path_default = "refs" - - def __init__(self, repo, path): - """Initialize this instance - :param repo: Our parent repository - - :param path: - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master""" - if not path.startswith(self._common_path_default+'/'): - raise ValueError("Cannot instantiate %r from path %s" % ( self.__class__.__name__, path )) - super(Reference, self).__init__(repo, path) - - - def __str__(self): - return self.name - - def _get_object(self): - """ - :return: - The object our ref currently refers to. Refs can be cached, they will - always point to the actual object as it gets re-created on each query""" - # have to be dynamic here as we may be a tag which can point to anything - # Our path will be resolved to the hexsha which will be used accordingly - return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path))) - - def _set_object(self, ref): - """ - Set our reference to point to the given ref. It will be converted - to a specific hexsha. - If the reference does not exist, it will be created. - - :note: - TypeChecking is done by the git command""" - abs_path = self._abs_path() - existed = True - if not isfile(abs_path): - existed = False - open(abs_path, 'wb').write(Object.NULL_HEX_SHA) - # END quick create - - # do it safely by specifying the old value - try: - self.repo.git.update_ref(self.path, ref, (existed and self._get_object().hexsha) or None) - except: - if not existed: - os.remove(abs_path) - # END remove file on error if it didn't exist before - raise - # END exception handling - - object = property(_get_object, _set_object, doc="Return the object our ref currently refers to") - - @property - def name(self): - """:return: (shortest) Name of this reference - it may contain path components""" - # first two path tokens are can be removed as they are - # refs/heads or refs/tags or refs/remotes - tokens = self.path.split('/') - if len(tokens) < 3: - return self.path # could be refs/HEAD - return '/'.join(tokens[2:]) - - - @classmethod - def create(cls, repo, path, commit='HEAD', force=False ): - """Create a new reference. - - :param repo: Repository to create the reference in - :param path: - The relative path of the reference, i.e. 'new_branch' or - feature/feature1. The path prefix 'refs/' is implied if not - given explicitly - - :param commit: - Commit to which the new reference should point, defaults to the - current HEAD - - :param force: - if True, force creation even if a reference with that name already exists. - Raise OSError otherwise - - :return: Newly created Reference - - :note: This does not alter the current HEAD, index or Working Tree""" - return cls._create(repo, path, True, commit, force) - - @classmethod - def iter_items(cls, repo, common_path = None): - """Equivalent to SymbolicReference.iter_items, but will return non-detached - references as well.""" - return cls._iter_items(repo, common_path) - - -class HEAD(SymbolicReference): - """Special case of a Symbolic Reference as it represents the repository's - HEAD reference.""" - _HEAD_NAME = 'HEAD' - _ORIG_HEAD_NAME = 'ORIG_HEAD' - __slots__ = tuple() - - def __init__(self, repo, path=_HEAD_NAME): - if path != self._HEAD_NAME: - raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) - super(HEAD, self).__init__(repo, path) - - def orig_head(self): - """ - :return: SymbolicReference pointing at the ORIG_HEAD, which is maintained - to contain the previous value of HEAD""" - return SymbolicReference(self.repo, self._ORIG_HEAD_NAME) - - def _set_reference(self, ref): - """If someone changes the reference through us, we must manually update - the ORIG_HEAD if we are detached. The underlying implementation can only - handle un-detached heads as it has to check whether the current head - is the checked-out one""" - if self.is_detached: - prev_commit = self.commit - super(HEAD, self)._set_reference(ref) - SymbolicReference.create(self.repo, self._ORIG_HEAD_NAME, prev_commit, force=True) - else: - super(HEAD, self)._set_reference(ref) - # END handle detached mode - - # aliased reference - reference = property(SymbolicReference._get_reference, _set_reference, doc="Returns the Reference we point to") - ref = reference - - def reset(self, commit='HEAD', index=True, working_tree = False, - paths=None, **kwargs): - """Reset our HEAD to the given commit optionally synchronizing - the index and working tree. The reference we refer to will be set to - commit as well. - - :param commit: - Commit object, Reference Object or string identifying a revision we - should reset HEAD to. - - :param index: - If True, the index will be set to match the given commit. Otherwise - it will not be touched. - - :param working_tree: - If True, the working tree will be forcefully adjusted to match the given - commit, possibly overwriting uncommitted changes without warning. - If working_tree is True, index must be true as well - - :param paths: - Single path or list of paths relative to the git root directory - that are to be reset. This allows to partially reset individual files. - - :param kwargs: - Additional arguments passed to git-reset. - - :return: self""" - mode = "--soft" - add_arg = None - if index: - mode = "--mixed" - - # it appears, some git-versions declare mixed and paths deprecated - # see http://github.com/Byron/GitPython/issues#issue/2 - if paths: - mode = None - # END special case - # END handle index - - if working_tree: - mode = "--hard" - if not index: - raise ValueError( "Cannot reset the working tree if the index is not reset as well") - - # END working tree handling - - if paths: - add_arg = "--" - # END nicely separate paths from rest - - try: - self.repo.git.reset(mode, commit, add_arg, paths, **kwargs) - except GitCommandError, e: - # git nowadays may use 1 as status to indicate there are still unstaged - # modifications after the reset - if e.status != 1: - raise - # END handle exception - - return self - - -class Head(Reference): - """A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. - - Examples:: - - >>> repo = Repo("/path/to/repo") - >>> head = repo.heads[0] - - >>> head.name - 'master' - - >>> head.commit - <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> - - >>> head.commit.hexsha - '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" - _common_path_default = "refs/heads" - k_config_remote = "remote" - k_config_remote_ref = "merge" # branch to merge from remote - - @classmethod - def create(cls, repo, path, commit='HEAD', force=False, **kwargs): - """Create a new head. - :param repo: Repository to create the head in - :param path: - The name or path of the head, i.e. 'new_branch' or - feature/feature1. The prefix refs/heads is implied. - - :param commit: - Commit to which the new head should point, defaults to the - current HEAD - - :param force: - if True, force creation even if branch with that name already exists. - - :param kwargs: - Additional keyword arguments to be passed to git-branch, i.e. - track, no-track, l - - :return: Newly created Head - :note: This does not alter the current HEAD, index or Working Tree""" - if cls is not Head: - raise TypeError("Only Heads can be created explicitly, not objects of type %s" % cls.__name__) - - args = ( path, commit ) - if force: - kwargs['f'] = True - - repo.git.branch(*args, **kwargs) - return cls(repo, "%s/%s" % ( cls._common_path_default, path)) - - - @classmethod - def delete(cls, repo, *heads, **kwargs): - """Delete the given heads - :param force: - If True, the heads will be deleted even if they are not yet merged into - the main development stream. - Default False""" - force = kwargs.get("force", False) - flag = "-d" - if force: - flag = "-D" - repo.git.branch(flag, *heads) - - - def set_tracking_branch(self, remote_reference): - """ - Configure this branch to track the given remote reference. This will alter - this branch's configuration accordingly. - - :param remote_reference: The remote reference to track or None to untrack - any references - :return: self""" - if remote_reference is not None and not isinstance(remote_reference, RemoteReference): - raise ValueError("Incorrect parameter type: %r" % remote_reference) - # END handle type - - writer = self.config_writer() - if remote_reference is None: - writer.remove_option(self.k_config_remote) - writer.remove_option(self.k_config_remote_ref) - if len(writer.options()) == 0: - writer.remove_section() - # END handle remove section - else: - writer.set_value(self.k_config_remote, remote_reference.remote_name) - writer.set_value(self.k_config_remote_ref, Head.to_full_path(remote_reference.remote_head)) - # END handle ref value - - return self - - - def tracking_branch(self): - """ - :return: The remote_reference we are tracking, or None if we are - not a tracking branch""" - reader = self.config_reader() - if reader.has_option(self.k_config_remote) and reader.has_option(self.k_config_remote_ref): - ref = Head(self.repo, Head.to_full_path(reader.get_value(self.k_config_remote_ref))) - remote_refpath = RemoteReference.to_full_path(join_path(reader.get_value(self.k_config_remote), ref.name)) - return RemoteReference(self.repo, remote_refpath) - # END handle have tracking branch - - # we are not a tracking branch - return None - - def rename(self, new_path, force=False): - """Rename self to a new path - - :param new_path: - Either a simple name or a path, i.e. new_name or features/new_name. - The prefix refs/heads is implied - - :param force: - If True, the rename will succeed even if a head with the target name - already exists. - - :return: self - :note: respects the ref log as git commands are used""" - flag = "-m" - if force: - flag = "-M" - - self.repo.git.branch(flag, self, new_path) - self.path = "%s/%s" % (self._common_path_default, new_path) - return self - - def checkout(self, force=False, **kwargs): - """Checkout this head by setting the HEAD to this reference, by updating the index - to reflect the tree we point to and by updating the working tree to reflect - the latest index. - - The command will fail if changed working tree files would be overwritten. - - :param force: - If True, changes to the index and the working tree will be discarded. - If False, GitCommandError will be raised in that situation. - - :param kwargs: - Additional keyword arguments to be passed to git checkout, i.e. - b='new_branch' to create a new branch at the given spot. - - :return: - The active branch after the checkout operation, usually self unless - a new branch has been created. - - :note: - By default it is only allowed to checkout heads - everything else - will leave the HEAD detached which is allowed and possible, but remains - a special state that some tools might not be able to handle.""" - args = list() - kwargs['f'] = force - if kwargs['f'] == False: - kwargs.pop('f') - - self.repo.git.checkout(self, **kwargs) - return self.repo.active_branch - - #{ Configruation - - def _config_parser(self, read_only): - if read_only: - parser = self.repo.config_reader() - else: - parser = self.repo.config_writer() - # END handle parser instance - - return SectionConstraint(parser, 'branch "%s"' % self.name) - - def config_reader(self): - """ - :return: A configuration parser instance constrained to only read - this instance's values""" - return self._config_parser(read_only=True) - - def config_writer(self): - """ - :return: A configuration writer instance with read-and write acccess - to options of this head""" - return self._config_parser(read_only=False) - - #} END configuration - - -class TagReference(Reference): - """Class representing a lightweight tag reference which either points to a commit - ,a tag object or any other object. In the latter case additional information, - like the signature or the tag-creator, is available. - - This tag object will always point to a commit object, but may carray additional - information in a tag object:: - - tagref = TagReference.list_items(repo)[0] - print tagref.commit.message - if tagref.tag is not None: - print tagref.tag.message""" - - __slots__ = tuple() - _common_path_default = "refs/tags" - - @property - def commit(self): - """:return: Commit object the tag ref points to""" - obj = self.object - if obj.type == "commit": - return obj - elif obj.type == "tag": - # it is a tag object which carries the commit as an object - we can point to anything - return obj.object - else: - raise ValueError( "Tag %s points to a Blob or Tree - have never seen that before" % self ) - - @property - def tag(self): - """ - :return: Tag object this tag ref points to or None in case - we are a light weight tag""" - obj = self.object - if obj.type == "tag": - return obj - return None - - # make object read-only - # It should be reasonably hard to adjust an existing tag - object = property(Reference._get_object) - - @classmethod - def create(cls, repo, path, ref='HEAD', message=None, force=False, **kwargs): - """Create a new tag reference. - - :param path: - The name of the tag, i.e. 1.0 or releases/1.0. - The prefix refs/tags is implied - - :param ref: - A reference to the object you want to tag. It can be a commit, tree or - blob. - - :param message: - If not None, the message will be used in your tag object. This will also - create an additional tag object that allows to obtain that information, i.e.:: - - tagref.tag.message - - :param force: - If True, to force creation of a tag even though that tag already exists. - - :param kwargs: - Additional keyword arguments to be passed to git-tag - - :return: A new TagReference""" - args = ( path, ref ) - if message: - kwargs['m'] = message - if force: - kwargs['f'] = True - - repo.git.tag(*args, **kwargs) - return TagReference(repo, "%s/%s" % (cls._common_path_default, path)) - - @classmethod - def delete(cls, repo, *tags): - """Delete the given existing tag or tags""" - repo.git.tag("-d", *tags) - - - - - -# provide an alias -Tag = TagReference - -class RemoteReference(Head): - """Represents a reference pointing to a remote head.""" - _common_path_default = "refs/remotes" - - - @classmethod - def iter_items(cls, repo, common_path = None, remote=None): - """Iterate remote references, and if given, constrain them to the given remote""" - common_path = common_path or cls._common_path_default - if remote is not None: - common_path = join_path(common_path, str(remote)) - # END handle remote constraint - return super(RemoteReference, cls).iter_items(repo, common_path) - - @property - def remote_name(self): - """ - :return: - Name of the remote we are a reference of, such as 'origin' for a reference - named 'origin/master'""" - tokens = self.path.split('/') - # /refs/remotes/<remote name>/<branch_name> - return tokens[2] - - @property - def remote_head(self): - """:return: Name of the remote head itself, i.e. master. - :note: The returned name is usually not qualified enough to uniquely identify - a branch""" - tokens = self.path.split('/') - return '/'.join(tokens[3:]) - - @classmethod - def delete(cls, repo, *refs, **kwargs): - """Delete the given remote references. - :note: - kwargs are given for compatability with the base class method as we - should not narrow the signature.""" - repo.git.branch("-d", "-r", *refs) - # the official deletion method will ignore remote symbolic refs - these - # are generally ignored in the refs/ folder. We don't though - # and delete remainders manually - for ref in refs: - try: - os.remove(join(repo.git_dir, ref.path)) - except OSError: - pass - # END for each ref diff --git a/lib/git/remote.py b/lib/git/remote.py deleted file mode 100644 index 3edde175..00000000 --- a/lib/git/remote.py +++ /dev/null @@ -1,719 +0,0 @@ -# remote.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module implementing a remote object allowing easy access to git remotes""" - -from exc import GitCommandError -from objects import Commit -from ConfigParser import NoOptionError -from config import SectionConstraint - -from git.util import ( - LazyMixin, - Iterable, - IterableList - ) - -from refs import ( - Reference, - RemoteReference, - SymbolicReference, - TagReference - ) - -from git.util import join_path -from gitdb.util import join - -import re -import os -import sys - -__all__ = ('RemoteProgress', 'PushInfo', 'FetchInfo', 'Remote') - -class RemoteProgress(object): - """ - Handler providing an interface to parse progress information emitted by git-push - and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. - """ - BEGIN, END, COUNTING, COMPRESSING, WRITING = [ 1 << x for x in range(5) ] - STAGE_MASK = BEGIN|END - OP_MASK = COUNTING|COMPRESSING|WRITING - - __slots__ = ("_cur_line", "_seen_ops") - re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") - re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") - - def __init__(self): - self._seen_ops = list() - - def _parse_progress_line(self, line): - """Parse progress information from the given line as retrieved by git-push - or git-fetch - - :return: list(line, ...) list of lines that could not be processed""" - # handle - # Counting objects: 4, done. - # Compressing objects: 50% (1/2) \rCompressing objects: 100% (2/2) \rCompressing objects: 100% (2/2), done. - self._cur_line = line - sub_lines = line.split('\r') - failed_lines = list() - for sline in sub_lines: - # find esacpe characters and cut them away - regex will not work with - # them as they are non-ascii. As git might expect a tty, it will send them - last_valid_index = None - for i,c in enumerate(reversed(sline)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i-1 - # END character was non-ascii - # END for each character in sline - if last_valid_index is not None: - sline = sline[:last_valid_index] - # END cut away invalid part - sline = sline.rstrip() - - cur_count, max_count = None, None - match = self.re_op_relative.match(sline) - if match is None: - match = self.re_op_absolute.match(sline) - - if not match: - self.line_dropped(sline) - failed_lines.append(sline) - continue - # END could not get match - - op_code = 0 - remote, op_name, percent, cur_count, max_count, message = match.groups() - - # get operation id - if op_name == "Counting objects": - op_code |= self.COUNTING - elif op_name == "Compressing objects": - op_code |= self.COMPRESSING - elif op_name == "Writing objects": - op_code |= self.WRITING - else: - raise ValueError("Operation name %r unknown" % op_name) - - # figure out stage - if op_code not in self._seen_ops: - self._seen_ops.append(op_code) - op_code |= self.BEGIN - # END begin opcode - - if message is None: - message = '' - # END message handling - - message = message.strip() - done_token = ', done.' - if message.endswith(done_token): - op_code |= self.END - message = message[:-len(done_token)] - # END end message handling - - self.update(op_code, cur_count, max_count, message) - # END for each sub line - return failed_lines - - def line_dropped(self, line): - """Called whenever a line could not be understood and was therefore dropped.""" - pass - - def update(self, op_code, cur_count, max_count=None, message=''): - """Called whenever the progress changes - - :param op_code: - Integer allowing to be compared against Operation IDs and stage IDs. - - Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation - ID as well as END. It may be that BEGIN and END are set at once in case only - one progress message was emitted due to the speed of the operation. - Between BEGIN and END, none of these flags will be set - - Operation IDs are all held within the OP_MASK. Only one Operation ID will - be active per call. - :param cur_count: Current absolute count of items - - :param max_count: - The maximum count of items we expect. It may be None in case there is - no maximum number of items or if it is (yet) unknown. - - :param message: - In case of the 'WRITING' operation, it contains the amount of bytes - transferred. It may possibly be used for other purposes as well. - - You may read the contents of the current line in self._cur_line""" - pass - - -class PushInfo(object): - """ - Carries information about the result of a push operation of a single head:: - - info = remote.push()[0] - info.flags # bitflags providing more information about the result - info.local_ref # Reference pointing to the local reference that was pushed - # It is None if the ref was deleted. - info.remote_ref_string # path to the remote reference located on the remote side - info.remote_ref # Remote Reference on the local side corresponding to - # the remote_ref_string. It can be a TagReference as well. - info.old_commit # commit at which the remote_ref was standing before we pushed - # it to local_ref.commit. Will be None if an error was indicated - info.summary # summary line providing human readable english text about the push - """ - __slots__ = ('local_ref', 'remote_ref_string', 'flags', 'old_commit', '_remote', 'summary') - - NEW_TAG, NEW_HEAD, NO_MATCH, REJECTED, REMOTE_REJECTED, REMOTE_FAILURE, DELETED, \ - FORCED_UPDATE, FAST_FORWARD, UP_TO_DATE, ERROR = [ 1 << x for x in range(11) ] - - _flag_map = { 'X' : NO_MATCH, '-' : DELETED, '*' : 0, - '+' : FORCED_UPDATE, ' ' : FAST_FORWARD, - '=' : UP_TO_DATE, '!' : ERROR } - - def __init__(self, flags, local_ref, remote_ref_string, remote, old_commit=None, - summary=''): - """ Initialize a new instance """ - self.flags = flags - self.local_ref = local_ref - self.remote_ref_string = remote_ref_string - self._remote = remote - self.old_commit = old_commit - self.summary = summary - - @property - def remote_ref(self): - """ - :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are - if self.remote_ref_string.startswith("refs/tags"): - return TagReference(self._remote.repo, self.remote_ref_string) - elif self.remote_ref_string.startswith("refs/heads"): - remote_ref = Reference(self._remote.repo, self.remote_ref_string) - return RemoteReference(self._remote.repo, "refs/remotes/%s/%s" % (str(self._remote), remote_ref.name)) - else: - raise ValueError("Could not handle remote ref: %r" % self.remote_ref_string) - # END - - @classmethod - def _from_line(cls, remote, line): - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e""" - control_character, from_to, summary = line.split('\t', 3) - flags = 0 - - # control character handling - try: - flags |= cls._flag_map[ control_character ] - except KeyError: - raise ValueError("Control Character %r unknown as parsed from line %r" % (control_character, line)) - # END handle control character - - # from_to handling - from_ref_string, to_ref_string = from_to.split(':') - if flags & cls.DELETED: - from_ref = None - else: - from_ref = Reference.from_path(remote.repo, from_ref_string) - - # commit handling, could be message or commit info - old_commit = None - if summary.startswith('['): - if "[rejected]" in summary: - flags |= cls.REJECTED - elif "[remote rejected]" in summary: - flags |= cls.REMOTE_REJECTED - elif "[remote failure]" in summary: - flags |= cls.REMOTE_FAILURE - elif "[no match]" in summary: - flags |= cls.ERROR - elif "[new tag]" in summary: - flags |= cls.NEW_TAG - elif "[new branch]" in summary: - flags |= cls.NEW_HEAD - # uptodate encoded in control character - else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit - split_token = "..." - if control_character == " ": - split_token = ".." - old_sha, new_sha = summary.split(' ')[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated - old_commit = remote.repo.commit(old_sha) - # END message handling - - return PushInfo(flags, from_ref, to_ref_string, remote, old_commit, summary) - - -class FetchInfo(object): - """ - Carries information about the results of a fetch operation of a single head:: - - info = remote.fetch()[0] - info.ref # Symbolic Reference or RemoteReference to the changed - # remote head or FETCH_HEAD - info.flags # additional flags to be & with enumeration members, - # i.e. info.flags & info.REJECTED - # is 0 if ref is SymbolicReference - info.note # additional notes given by git-fetch intended for the user - info.old_commit # if info.flags & info.FORCED_UPDATE|info.FAST_FORWARD, - # field is set to the previous location of ref, otherwise None - """ - __slots__ = ('ref','old_commit', 'flags', 'note') - - NEW_TAG, NEW_HEAD, HEAD_UPTODATE, TAG_UPDATE, REJECTED, FORCED_UPDATE, \ - FAST_FORWARD, ERROR = [ 1 << x for x in range(8) ] - - # %c %-*s %-*s -> %s (%s) - re_fetch_result = re.compile("^\s*(.) (\[?[\w\s\.]+\]?)\s+(.+) -> ([/\w_\+\.-]+)( \(.*\)?$)?") - - _flag_map = { '!' : ERROR, '+' : FORCED_UPDATE, '-' : TAG_UPDATE, '*' : 0, - '=' : HEAD_UPTODATE, ' ' : FAST_FORWARD } - - def __init__(self, ref, flags, note = '', old_commit = None): - """ - Initialize a new instance - """ - self.ref = ref - self.flags = flags - self.note = note - self.old_commit = old_commit - - def __str__(self): - return self.name - - @property - def name(self): - """:return: Name of our remote ref""" - return self.ref.name - - @property - def commit(self): - """:return: Commit of our remote ref""" - return self.ref.commit - - @classmethod - def _from_line(cls, repo, line, fetch_line): - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows - "%c %-*s %-*s -> %s%s" - - Where c is either ' ', !, +, -, *, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" - match = cls.re_fetch_result.match(line) - if match is None: - raise ValueError("Failed to parse line: %r" % line) - - # parse lines - control_character, operation, local_remote_ref, remote_local_ref, note = match.groups() - try: - new_hex_sha, fetch_operation, fetch_note = fetch_line.split("\t") - ref_type_name, fetch_note = fetch_note.split(' ', 1) - except ValueError: # unpack error - raise ValueError("Failed to parse FETCH__HEAD line: %r" % fetch_line) - - # handle FETCH_HEAD and figure out ref type - # If we do not specify a target branch like master:refs/remotes/origin/master, - # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached - ref_type = None - if remote_local_ref == "FETCH_HEAD": - ref_type = SymbolicReference - elif ref_type_name == "branch": - ref_type = RemoteReference - elif ref_type_name == "tag": - ref_type = TagReference - else: - raise TypeError("Cannot handle reference type: %r" % ref_type_name) - - # create ref instance - if ref_type is SymbolicReference: - remote_local_ref = ref_type(repo, "FETCH_HEAD") - else: - remote_local_ref = Reference.from_path(repo, join_path(ref_type._common_path_default, remote_local_ref.strip())) - # END create ref instance - - note = ( note and note.strip() ) or '' - - # parse flags from control_character - flags = 0 - try: - flags |= cls._flag_map[control_character] - except KeyError: - raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) - # END control char exception hanlding - - # parse operation string for more info - makes no sense for symbolic refs - old_commit = None - if isinstance(remote_local_ref, Reference): - if 'rejected' in operation: - flags |= cls.REJECTED - if 'new tag' in operation: - flags |= cls.NEW_TAG - if 'new branch' in operation: - flags |= cls.NEW_HEAD - if '...' in operation or '..' in operation: - split_token = '...' - if control_character == ' ': - split_token = split_token[:-1] - old_commit = repo.rev_parse(operation.split(split_token)[0]) - # END handle refspec - # END reference flag handling - - return cls(remote_local_ref, flags, note, old_commit) - - -class Remote(LazyMixin, Iterable): - """Provides easy read and write access to a git remote. - - Everything not part of this interface is considered an option for the current - remote, allowing constructs like remote.pushurl to query the pushurl. - - NOTE: When querying configuration, the configuration accessor will be cached - to speed up subsequent accesses.""" - - __slots__ = ( "repo", "name", "_config_reader" ) - _id_attribute_ = "name" - - def __init__(self, repo, name): - """Initialize a remote instance - - :param repo: The repository we are a remote of - :param name: the name of the remote, i.e. 'origin'""" - self.repo = repo - self.name = name - - if os.name == 'nt': - # some oddity: on windows, python 2.5, it for some reason does not realize - # that it has the config_writer property, but instead calls __getattr__ - # which will not yield the expected results. 'pinging' the members - # with a dir call creates the config_writer property that we require - # ... bugs like these make me wonder wheter python really wants to be used - # for production. It doesn't happen on linux though. - dir(self) - # END windows special handling - - def __getattr__(self, attr): - """Allows to call this instance like - remote.special( *args, **kwargs) to call git-remote special self.name""" - if attr == "_config_reader": - return super(Remote, self).__getattr__(attr) - - # sometimes, probably due to a bug in python itself, we are being called - # even though a slot of the same name exists - try: - return self._config_reader.get(attr) - except NoOptionError: - return super(Remote, self).__getattr__(attr) - # END handle exception - - def _config_section_name(self): - return 'remote "%s"' % self.name - - def _set_cache_(self, attr): - if attr == "_config_reader": - self._config_reader = SectionConstraint(self.repo.config_reader(), self._config_section_name()) - else: - super(Remote, self)._set_cache_(attr) - - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.%s "%s">' % (self.__class__.__name__, self.name) - - def __eq__(self, other): - return self.name == other.name - - def __ne__(self, other): - return not ( self == other ) - - def __hash__(self): - return hash(self.name) - - @classmethod - def iter_items(cls, repo): - """:return: Iterator yielding Remote objects of the given repository""" - for section in repo.config_reader("repository").sections(): - if not section.startswith('remote'): - continue - lbound = section.find('"') - rbound = section.rfind('"') - if lbound == -1 or rbound == -1: - raise ValueError("Remote-Section has invalid format: %r" % section) - yield Remote(repo, section[lbound+1:rbound]) - # END for each configuration section - - @property - def refs(self): - """ - :return: - IterableList of RemoteReference objects. It is prefixed, allowing - you to omit the remote path portion, i.e.:: - remote.refs.master # yields RemoteReference('/refs/remotes/origin/master')""" - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - out_refs.extend(RemoteReference.list_items(self.repo, remote=self.name)) - assert out_refs, "Remote %s did not have any references" % self.name - return out_refs - - @property - def stale_refs(self): - """ - :return: - IterableList RemoteReference objects that do not have a corresponding - head in the remote reference anymore as they have been deleted on the - remote side, but are still available locally. - - The IterableList is prefixed, hence the 'origin' must be omitted. See - 'refs' property for an example.""" - out_refs = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) - for line in self.repo.git.remote("prune", "--dry-run", self).splitlines()[2:]: - # expecting - # * [would prune] origin/new_branch - token = " * [would prune] " - if not line.startswith(token): - raise ValueError("Could not parse git-remote prune result: %r" % line) - fqhn = "%s/%s" % (RemoteReference._common_path_default,line.replace(token, "")) - out_refs.append(RemoteReference(self.repo, fqhn)) - # END for each line - return out_refs - - @classmethod - def create(cls, repo, name, url, **kwargs): - """Create a new remote to the given repository - :param repo: Repository instance that is to receive the new remote - :param name: Desired name of the remote - :param url: URL which corresponds to the remote's name - :param kwargs: - Additional arguments to be passed to the git-remote add command - - :return: New Remote instance - - :raise GitCommandError: in case an origin with that name already exists""" - repo.git.remote( "add", name, url, **kwargs ) - return cls(repo, name) - - # add is an alias - add = create - - @classmethod - def remove(cls, repo, name ): - """Remove the remote with the given name""" - repo.git.remote("rm", name) - - # alias - rm = remove - - def rename(self, new_name): - """Rename self to the given new_name - :return: self """ - if self.name == new_name: - return self - - self.repo.git.remote("rename", self.name, new_name) - self.name = new_name - del(self._config_reader) # it contains cached values, section names are different now - return self - - def update(self, **kwargs): - """Fetch all changes for this remote, including new branches which will - be forced in ( in case your local remote branch is not part the new remote branches - ancestry anymore ). - - :param kwargs: - Additional arguments passed to git-remote update - - :return: self """ - self.repo.git.remote("update", self.name) - return self - - def _digest_process_messages(self, fh, progress): - """Read progress messages from file-like object fh, supplying the respective - progress messages to the progress instance. - - :return: list(line, ...) list of lines without linebreaks that did - not contain progress information""" - line_so_far = '' - dropped_lines = list() - while True: - char = fh.read(1) - if not char: - break - - if char in ('\r', '\n'): - dropped_lines.extend(progress._parse_progress_line(line_so_far)) - line_so_far = '' - else: - line_so_far += char - # END process parsed line - # END while file is not done reading - return dropped_lines - - - def _finalize_proc(self, proc): - """Wait for the process (fetch, pull or push) and handle its errors accordingly""" - try: - proc.wait() - except GitCommandError,e: - # if a push has rejected items, the command has non-zero return status - # a return status of 128 indicates a connection error - reraise the previous one - if proc.poll() == 128: - raise - pass - # END exception handling - - - def _get_fetch_info_from_stderr(self, proc, progress): - # skip first line as it is some remote info we are not interested in - output = IterableList('name') - - - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information - fetch_info_lines = list() - for line in self._digest_process_messages(proc.stderr, progress): - if line.startswith('From') or line.startswith('remote: Total'): - continue - elif line.startswith('warning:'): - print >> sys.stderr, line - continue - elif line.startswith('fatal:'): - raise GitCommandError("Error when fetching: %s" % line) - # END handle special messages - fetch_info_lines.append(line) - # END for each line - - # read head information - fp = open(join(self.repo.git_dir, 'FETCH_HEAD'),'r') - fetch_head_info = fp.readlines() - fp.close() - - assert len(fetch_info_lines) == len(fetch_head_info) - - output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) - for err_line,fetch_line in zip(fetch_info_lines, fetch_head_info)) - - self._finalize_proc(proc) - return output - - def _get_push_info(self, proc, progress): - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually - self._digest_process_messages(proc.stderr, progress) - - output = IterableList('name') - for line in proc.stdout.readlines(): - try: - output.append(PushInfo._from_line(self, line)) - except ValueError: - # if an error happens, additional info is given which we cannot parse - pass - # END exception handling - # END for each line - - self._finalize_proc(proc) - return output - - - def fetch(self, refspec=None, progress=None, **kwargs): - """Fetch the latest changes for this remote - - :param refspec: - A "refspec" is used by fetch and push to describe the mapping - between remote ref and local ref. They are combined with a colon in - the format <src>:<dst>, preceded by an optional plus sign, +. - For example: git fetch $URL refs/heads/master:refs/heads/origin means - "grab the master branch head from the $URL and store it as my origin - branch head". And git push $URL refs/heads/master:refs/heads/to-upstream - means "publish my master branch head as to-upstream branch at $URL". - See also git-push(1). - - Taken from the git manual - :param progress: See 'push' method - :param kwargs: Additional arguments to be passed to git-fetch - :return: - IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed - information about the fetch results - - :note: - As fetch does not provide progress information to non-ttys, we cannot make - it available here unfortunately as in the 'push' method.""" - proc = self.repo.git.fetch(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def pull(self, refspec=None, progress=None, **kwargs): - """Pull changes from the given branch, being the same as a fetch followed - by a merge of branch with your local branch. - - :param refspec: see 'fetch' method - :param progress: see 'push' method - :param kwargs: Additional arguments to be passed to git-pull - :return: Please see 'fetch' method """ - proc = self.repo.git.pull(self, refspec, with_extended_output=True, as_process=True, v=True, **kwargs) - return self._get_fetch_info_from_stderr(proc, progress or RemoteProgress()) - - def push(self, refspec=None, progress=None, **kwargs): - """Push changes from source branch in refspec to target branch in refspec. - - :param refspec: see 'fetch' method - :param progress: - Instance of type RemoteProgress allowing the caller to receive - progress information until the method returns. - If None, progress information will be discarded - - :param kwargs: Additional arguments to be passed to git-push - :return: - IterableList(PushInfo, ...) iterable list of PushInfo instances, each - one informing about an individual head which had been updated on the remote - side. - If the push contains rejected heads, these will have the PushInfo.ERROR bit set - in their flags. - If the operation fails completely, the length of the returned IterableList will - be null.""" - proc = self.repo.git.push(self, refspec, porcelain=True, as_process=True, **kwargs) - return self._get_push_info(proc, progress or RemoteProgress()) - - @property - def config_reader(self): - """ - :return: - GitConfigParser compatible object able to read options for only our remote. - Hence you may simple type config.get("pushurl") to obtain the information""" - return self._config_reader - - @property - def config_writer(self): - """ - :return: GitConfigParser compatible object able to write options for this remote. - :note: - You can only own one writer at a time - delete it to release the - configuration file and make it useable by others. - - To assure consistent results, you should only query options through the - writer. Once you are done writing, you are free to use the config reader - once again.""" - writer = self.repo.config_writer() - - # clear our cache to assure we re-read the possibly changed configuration - del(self._config_reader) - return SectionConstraint(writer, self._config_section_name()) diff --git a/lib/git/repo/__init__.py b/lib/git/repo/__init__.py deleted file mode 100644 index 8902a254..00000000 --- a/lib/git/repo/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -"""Initialize the Repo package""" - -from base import *
\ No newline at end of file diff --git a/lib/git/repo/base.py b/lib/git/repo/base.py deleted file mode 100644 index 6f401628..00000000 --- a/lib/git/repo/base.py +++ /dev/null @@ -1,752 +0,0 @@ -# repo.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.exc import InvalidGitRepositoryError, NoSuchPathError -from git.cmd import Git -from git.refs import * -from git.index import IndexFile -from git.objects import * -from git.config import GitConfigParser -from git.remote import Remote -from git.db import ( - GitCmdObjectDB, - GitDB - ) - - -from gitdb.util import ( - join, - isfile, - hex_to_bin - ) - -from fun import ( - rev_parse, - is_git_dir, - touch - ) - -import os -import sys -import re - -DefaultDBType = GitDB -if sys.version_info[1] < 5: # python 2.4 compatiblity - DefaultDBType = GitCmdObjectDB -# END handle python 2.4 - - -__all__ = ('Repo', ) - - -class Repo(object): - """Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. - - The following attributes are worth using: - - 'working_dir' is the working directory of the git command, wich is the working tree - directory if available or the .git directory in case of bare repositories - - 'working_tree_dir' is the working tree directory, but will raise AssertionError - if we are a bare repository. - - 'git_dir' is the .git repository directoy, which is always set.""" - DAEMON_EXPORT_FILE = 'git-daemon-export-ok' - __slots__ = ( "working_dir", "_working_tree_dir", "git_dir", "_bare", "git", "odb" ) - - # precompiled regex - re_whitespace = re.compile(r'\s+') - re_hexsha_only = re.compile('^[0-9A-Fa-f]{40}$') - re_hexsha_shortened = re.compile('^[0-9A-Fa-f]{4,40}$') - re_author_committer_start = re.compile(r'^(author|committer)') - re_tab_full_line = re.compile(r'^\t(.*)$') - - # invariants - # represents the configuration level of a configuration file - config_level = ("system", "global", "repository") - - def __init__(self, path=None, odbt = DefaultDBType): - """Create a new Repo instance - - :param path: is the path to either the root git directory or the bare git repo:: - - repo = Repo("/Users/mtrier/Development/git-python") - repo = Repo("/Users/mtrier/Development/git-python.git") - repo = Repo("~/Development/git-python.git") - repo = Repo("$REPOSITORIES/Development/git-python.git") - - :param odbt: Object DataBase type - a type which is constructed by providing - the directory containing the database objects, i.e. .git/objects. It will - be used to access all object data - :raise InvalidGitRepositoryError: - :raise NoSuchPathError: - :return: git.Repo """ - epath = os.path.abspath(os.path.expandvars(os.path.expanduser(path or os.getcwd()))) - - if not os.path.exists(epath): - raise NoSuchPathError(epath) - - self.working_dir = None - self._working_tree_dir = None - self.git_dir = None - curpath = epath - - # walk up the path to find the .git dir - while curpath: - if is_git_dir(curpath): - self.git_dir = curpath - self._working_tree_dir = os.path.dirname(curpath) - break - gitpath = join(curpath, '.git') - if is_git_dir(gitpath): - self.git_dir = gitpath - self._working_tree_dir = curpath - break - curpath, dummy = os.path.split(curpath) - if not dummy: - break - # END while curpath - - if self.git_dir is None: - raise InvalidGitRepositoryError(epath) - - self._bare = False - try: - self._bare = self.config_reader("repository").getboolean('core','bare') - except Exception: - # lets not assume the option exists, although it should - pass - - # adjust the wd in case we are actually bare - we didn't know that - # in the first place - if self._bare: - self._working_tree_dir = None - # END working dir handling - - self.working_dir = self._working_tree_dir or self.git_dir - self.git = Git(self.working_dir) - - # special handling, in special times - args = [join(self.git_dir, 'objects')] - if issubclass(odbt, GitCmdObjectDB): - args.append(self.git) - self.odb = odbt(*args) - - def __eq__(self, rhs): - if isinstance(rhs, Repo): - return self.git_dir == rhs.git_dir - return False - - def __ne__(self, rhs): - return not self.__eq__(rhs) - - def __hash__(self): - return hash(self.git_dir) - - def __repr__(self): - return "%s(%r)" % (type(self).__name__, self.git_dir) - - # Description property - def _get_description(self): - filename = join(self.git_dir, 'description') - return file(filename).read().rstrip() - - def _set_description(self, descr): - filename = join(self.git_dir, 'description') - file(filename, 'w').write(descr+'\n') - - description = property(_get_description, _set_description, - doc="the project's description") - del _get_description - del _set_description - - - - @property - def working_tree_dir(self): - """:return: The working tree directory of our git repository - :raise AssertionError: If we are a bare repository""" - if self._working_tree_dir is None: - raise AssertionError( "Repository at %r is bare and does not have a working tree directory" % self.git_dir ) - return self._working_tree_dir - - @property - def bare(self): - """:return: True if the repository is bare""" - return self._bare - - @property - def heads(self): - """A list of ``Head`` objects representing the branch heads in - this repo - - :return: ``git.IterableList(Head, ...)``""" - return Head.list_items(self) - - @property - def references(self): - """A list of Reference objects representing tags, heads and remote references. - - :return: IterableList(Reference, ...)""" - return Reference.list_items(self) - - # alias for references - refs = references - - # alias for heads - branches = heads - - @property - def index(self): - """:return: IndexFile representing this repository's index.""" - return IndexFile(self) - - @property - def head(self): - """:return: HEAD Object pointing to the current head reference""" - return HEAD(self,'HEAD') - - @property - def remotes(self): - """A list of Remote objects allowing to access and manipulate remotes - :return: ``git.IterableList(Remote, ...)``""" - return Remote.list_items(self) - - def remote(self, name='origin'): - """:return: Remote with the specified name - :raise ValueError: if no remote with such a name exists""" - return Remote(self, name) - - #{ Submodules - - @property - def submodules(self): - """ - :return: git.IterableList(Submodule, ...) of direct submodules - available from the current head""" - return Submodule.list_items(self) - - def submodule(self, name): - """ :return: Submodule with the given name - :raise ValueError: If no such submodule exists""" - try: - return self.submodules[name] - except IndexError: - raise ValueError("Didn't find submodule named %r" % name) - # END exception handling - - def create_submodule(self, *args, **kwargs): - """Create a new submodule - - :note: See the documentation of Submodule.add for a description of the - applicable parameters - :return: created submodules""" - return Submodule.add(self, *args, **kwargs) - - def iter_submodules(self, *args, **kwargs): - """An iterator yielding Submodule instances, see Traversable interface - for a description of args and kwargs - :return: Iterator""" - return RootModule(self).traverse(*args, **kwargs) - - def submodule_update(self, *args, **kwargs): - """Update the submodules, keeping the repository consistent as it will - take the previous state into consideration. For more information, please - see the documentation of RootModule.update""" - return RootModule(self).update(*args, **kwargs) - - #}END submodules - - @property - def tags(self): - """A list of ``Tag`` objects that are available in this repo - :return: ``git.IterableList(TagReference, ...)`` """ - return TagReference.list_items(self) - - def tag(self,path): - """:return: TagReference Object, reference pointing to a Commit or Tag - :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5 """ - return TagReference(self, path) - - def create_head(self, path, commit='HEAD', force=False, **kwargs ): - """Create a new head within the repository. - For more documentation, please see the Head.create method. - - :return: newly created Head Reference""" - return Head.create(self, path, commit, force, **kwargs) - - def delete_head(self, *heads, **kwargs): - """Delete the given heads - - :param kwargs: Additional keyword arguments to be passed to git-branch""" - return Head.delete(self, *heads, **kwargs) - - def create_tag(self, path, ref='HEAD', message=None, force=False, **kwargs): - """Create a new tag reference. - For more documentation, please see the TagReference.create method. - - :return: TagReference object """ - return TagReference.create(self, path, ref, message, force, **kwargs) - - def delete_tag(self, *tags): - """Delete the given tag references""" - return TagReference.delete(self, *tags) - - def create_remote(self, name, url, **kwargs): - """Create a new remote. - - For more information, please see the documentation of the Remote.create - methods - - :return: Remote reference""" - return Remote.create(self, name, url, **kwargs) - - def delete_remote(self, remote): - """Delete the given remote.""" - return Remote.remove(self, remote) - - def _get_config_path(self, config_level ): - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if sys.platform == "win32" and config_level == "system": - config_level = "global" - - if config_level == "system": - return "/etc/gitconfig" - elif config_level == "global": - return os.path.normpath(os.path.expanduser("~/.gitconfig")) - elif config_level == "repository": - return join(self.git_dir, "config") - - raise ValueError( "Invalid configuration level: %r" % config_level ) - - def config_reader(self, config_level=None): - """ - :return: - GitConfigParser allowing to read the full git configuration, but not to write it - - The configuration will include values from the system, user and repository - configuration files. - - :param config_level: - For possible values, see config_writer method - If None, all applicable levels will be used. Specify a level in case - you know which exact file you whish to read to prevent reading multiple files for - instance - :note: On windows, system configuration cannot currently be read as the path is - unknown, instead the global path will be used.""" - files = None - if config_level is None: - files = [ self._get_config_path(f) for f in self.config_level ] - else: - files = [ self._get_config_path(config_level) ] - return GitConfigParser(files, read_only=True) - - def config_writer(self, config_level="repository"): - """ - :return: - GitConfigParser allowing to write values of the specified configuration file level. - Config writers should be retrieved, used to change the configuration ,and written - right away as they will lock the configuration file in question and prevent other's - to write it. - - :param config_level: - One of the following values - system = sytem wide configuration file - global = user level configuration file - repository = configuration file for this repostory only""" - return GitConfigParser(self._get_config_path(config_level), read_only = False) - - def commit(self, rev=None): - """The Commit object for the specified revision - :param rev: revision specifier, see git-rev-parse for viable options. - :return: ``git.Commit``""" - if rev is None: - return self.active_branch.commit - else: - return self.rev_parse(str(rev)+"^0") - - def iter_trees(self, *args, **kwargs): - """:return: Iterator yielding Tree objects - :note: Takes all arguments known to iter_commits method""" - return ( c.tree for c in self.iter_commits(*args, **kwargs) ) - - def tree(self, rev=None): - """The Tree object for the given treeish revision - Examples:: - - repo.tree(repo.heads[0]) - - :param rev: is a revision pointing to a Treeish ( being a commit or tree ) - :return: ``git.Tree`` - - :note: - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results.""" - if rev is None: - return self.active_branch.commit.tree - else: - return self.rev_parse(str(rev)+"^{tree}") - - def iter_commits(self, rev=None, paths='', **kwargs): - """A list of Commit objects representing the history of a given ref/commit - - :parm rev: - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. - - :parm paths: - is an optional path or a list of paths to limit the returned commits to - Commits that do not contain that path or the paths will not be returned. - - :parm kwargs: - Arguments to be passed to git-rev-list - common ones are - max_count and skip - - :note: to receive only commits between two named revisions, use the - "revA..revB" revision specifier - - :return ``git.Commit[]``""" - if rev is None: - rev = self.active_branch - - return Commit.iter_items(self, rev, paths, **kwargs) - - def _get_daemon_export(self): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - return os.path.exists(filename) - - def _set_daemon_export(self, value): - filename = join(self.git_dir, self.DAEMON_EXPORT_FILE) - fileexists = os.path.exists(filename) - if value and not fileexists: - touch(filename) - elif not value and fileexists: - os.unlink(filename) - - daemon_export = property(_get_daemon_export, _set_daemon_export, - doc="If True, git-daemon may export this repository") - del _get_daemon_export - del _set_daemon_export - - def _get_alternates(self): - """The list of alternates for this repo from which objects can be retrieved - - :return: list of strings being pathnames of alternates""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - - if os.path.exists(alternates_path): - try: - f = open(alternates_path) - alts = f.read() - finally: - f.close() - return alts.strip().splitlines() - else: - return list() - - def _set_alternates(self, alts): - """Sets the alternates - - :parm alts: - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects - - :raise NoSuchPathError: - :note: - The method does not check for the existance of the paths in alts - as the caller is responsible.""" - alternates_path = join(self.git_dir, 'objects', 'info', 'alternates') - if not alts: - if isfile(alternates_path): - os.remove(alternates_path) - else: - try: - f = open(alternates_path, 'w') - f.write("\n".join(alts)) - finally: - f.close() - # END file handling - # END alts handling - - alternates = property(_get_alternates, _set_alternates, doc="Retrieve a list of alternates paths or set a list paths to be used as alternates") - - def is_dirty(self, index=True, working_tree=True, untracked_files=False): - """ - :return: - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes.""" - if self._bare: - # Bare repositories with no associated working directory are - # always consired to be clean. - return False - - # start from the one which is fastest to evaluate - default_args = ('--abbrev=40', '--full-index', '--raw') - if index: - # diff index against HEAD - if isfile(self.index.path) and self.head.is_valid() and \ - len(self.git.diff('HEAD', '--cached', *default_args)): - return True - # END index handling - if working_tree: - # diff index against working tree - if len(self.git.diff(*default_args)): - return True - # END working tree handling - if untracked_files: - if len(self.untracked_files): - return True - # END untracked files - return False - - @property - def untracked_files(self): - """ - :return: - list(str,...) - - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. - - :note: - ignored files will not appear here, i.e. files mentioned in .gitignore""" - # make sure we get all files, no only untracked directores - proc = self.git.status(untracked_files=True, as_process=True) - stream = iter(proc.stdout) - untracked_files = list() - for line in stream: - if not line.startswith("# Untracked files:"): - continue - # skip two lines - stream.next() - stream.next() - - for untracked_info in stream: - if not untracked_info.startswith("#\t"): - break - untracked_files.append(untracked_info.replace("#\t", "").rstrip()) - # END for each utracked info line - # END for each line - return untracked_files - - @property - def active_branch(self): - """The name of the currently active branch. - - :return: Head to the active branch""" - return self.head.reference - - def blame(self, rev, file): - """The blame information for the given file at the given revision. - - :parm rev: revision specifier, see git-rev-parse for viable options. - :return: - list: [git.Commit, list: [<line>]] - A list of tuples associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance.""" - data = self.git.blame(rev, '--', file, p=True) - commits = dict() - blames = list() - info = None - - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] - if self.re_hexsha_only.search(firstpart): - # handles - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start - # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - digits = parts[-1].split(" ") - if len(digits) == 3: - info = {'id': firstpart} - blames.append([None, []]) - # END blame data initialization - else: - m = self.re_author_committer_start.search(firstpart) - if m: - # handles: - # author Tom Preston-Werner - # author-mail <tom@mojombo.com> - # author-time 1192271832 - # author-tz -0700 - # committer Tom Preston-Werner - # committer-mail <tom@mojombo.com> - # committer-time 1192271832 - # committer-tz -0700 - IGNORED BY US - role = m.group(0) - if firstpart.endswith('-mail'): - info["%s_email" % role] = parts[-1] - elif firstpart.endswith('-time'): - info["%s_date" % role] = int(parts[-1]) - elif role == firstpart: - info[role] = parts[-1] - # END distinguish mail,time,name - else: - # handle - # filename lib/grit.rb - # summary add Blob - # <and rest> - if firstpart.startswith('filename'): - info['filename'] = parts[-1] - elif firstpart.startswith('summary'): - info['summary'] = parts[-1] - elif firstpart == '': - if info: - sha = info['id'] - c = commits.get(sha) - if c is None: - c = Commit( self, hex_to_bin(sha), - author=Actor._from_string(info['author'] + ' ' + info['author_email']), - authored_date=info['author_date'], - committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), - committed_date=info['committer_date'], - message=info['summary']) - commits[sha] = c - # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() - blames[-1][0] = c - blames[-1][1].append( text ) - info = None - # END if we collected commit info - # END distinguish filename,summary,rest - # END distinguish author|committer vs filename,summary,rest - # END distinguish hexsha vs other information - return blames - - @classmethod - def init(cls, path=None, mkdir=True, **kwargs): - """Initialize a git repository at the given path if specified - - :param path: - is the full path to the repo (traditionally ends with /<name>.git) - or None in which case the repository will be created in the current - working directory - - :parm mkdir: - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given - - :parm kwargs: - keyword arguments serving as additional options to the git-init command - - :return: ``git.Repo`` (the newly created repo)""" - - if mkdir and path and not os.path.exists(path): - os.makedirs(path, 0755) - - # git command automatically chdir into the directory - git = Git(path) - output = git.init(**kwargs) - return Repo(path) - - @classmethod - def _clone(cls, git, url, path, odb_default_type, **kwargs): - # special handling for windows for path at which the clone should be - # created. - # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence - # we at least give a proper error instead of letting git fail - prev_cwd = None - prev_path = None - odbt = kwargs.pop('odbt', odb_default_type) - if os.name == 'nt': - if '~' in path: - raise OSError("Git cannot handle the ~ character in path %r correctly" % path) - - # on windows, git will think paths like c: are relative and prepend the - # current working dir ( before it fails ). We temporarily adjust the working - # dir to make this actually work - match = re.match("(\w:[/\\\])(.*)", path) - if match: - prev_cwd = os.getcwd() - prev_path = path - drive, rest_of_path = match.groups() - os.chdir(drive) - path = rest_of_path - kwargs['with_keep_cwd'] = True - # END cwd preparation - # END windows handling - - try: - git.clone(url, path, **kwargs) - finally: - if prev_cwd is not None: - os.chdir(prev_cwd) - path = prev_path - # END reset previous working dir - # END bad windows handling - - # our git command could have a different working dir than our actual - # environment, hence we prepend its working dir if required - if not os.path.isabs(path) and git.working_dir: - path = join(git._working_dir, path) - - # adjust remotes - there may be operating systems which use backslashes, - # These might be given as initial paths, but when handling the config file - # that contains the remote from which we were clones, git stops liking it - # as it will escape the backslashes. Hence we undo the escaping just to be - # sure - repo = cls(os.path.abspath(path), odbt = odbt) - if repo.remotes: - repo.remotes[0].config_writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) - # END handle remote repo - return repo - - def clone(self, path, **kwargs): - """Create a clone from this repository. - :param path: - is the full path of the new repo (traditionally ends with ./<name>.git). - - :param kwargs: - odbt = ObjectDatabase Type, allowing to determine the object database - implementation used by the returned Repo instance - - All remaining keyword arguments are given to the git-clone command - - :return: ``git.Repo`` (the newly cloned repo)""" - return self._clone(self.git, self.git_dir, path, type(self.odb), **kwargs) - - @classmethod - def clone_from(cls, url, to_path, **kwargs): - """Create a clone from the given URL - :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS - :param to_path: Path to which the repository should be cloned to - :param kwargs: see the ``clone`` method - :return: Repo instance pointing to the cloned directory""" - return cls._clone(Git(os.getcwd()), url, to_path, GitCmdObjectDB, **kwargs) - - def archive(self, ostream, treeish=None, prefix=None, **kwargs): - """Archive the tree at the given revision. - :parm ostream: file compatible stream object to which the archive will be written - :parm treeish: is the treeish name/id, defaults to active branch - :parm prefix: is the optional prefix to prepend to each filename in the archive - :parm kwargs: - Additional arguments passed to git-archive - NOTE: Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python - - :raise GitCommandError: in case something went wrong - :return: self""" - if treeish is None: - treeish = self.active_branch - if prefix and 'prefix' not in kwargs: - kwargs['prefix'] = prefix - kwargs['output_stream'] = ostream - - self.git.archive(treeish, **kwargs) - return self - - rev_parse = rev_parse - - def __repr__(self): - return '<git.Repo "%s">' % self.git_dir diff --git a/lib/git/repo/fun.py b/lib/git/repo/fun.py deleted file mode 100644 index a0f66fe5..00000000 --- a/lib/git/repo/fun.py +++ /dev/null @@ -1,231 +0,0 @@ -"""Package with general repository related functions""" - -from gitdb.exc import BadObject -from git.refs import SymbolicReference -from git.objects import Object -from gitdb.util import ( - join, - isdir, - isfile, - hex_to_bin, - bin_to_hex - ) -from string import digits - -__all__ = ('rev_parse', 'is_git_dir', 'touch') - -def touch(filename): - fp = open(filename, "a") - fp.close() - -def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - if isdir(d) and \ - isdir(join(d, 'objects')) and \ - isdir(join(d, 'refs')): - headref = join(d, 'HEAD') - return isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False - - -def short_to_long(odb, hexsha): - """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha - or None if no candidate could be found. - :param hexsha: hexsha with less than 40 byte""" - try: - return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) - except BadObject: - return None - # END exception handling - - -def name_to_object(repo, name): - """:return: object specified by the given name, hexshas ( short and long ) - as well as references are supported""" - hexsha = None - - # is it a hexsha ? Try the most common ones, which is 7 to 40 - if repo.re_hexsha_shortened.match(name): - if len(name) != 40: - # find long sha for short sha - hexsha = short_to_long(repo.odb, name) - else: - hexsha = name - # END handle short shas - else: - for base in ('%s', 'refs/%s', 'refs/tags/%s', 'refs/heads/%s', 'refs/remotes/%s', 'refs/remotes/%s/HEAD'): - try: - hexsha = SymbolicReference.dereference_recursive(repo, base % name) - break - except ValueError: - pass - # END for each base - # END handle hexsha - - # tried everything ? fail - if hexsha is None: - raise BadObject(name) - # END assert hexsha was found - - return Object.new_from_sha(repo, hex_to_bin(hexsha)) - -def deref_tag(tag): - """Recursively dereerence a tag and return the resulting object""" - while True: - try: - tag = tag.object - except AttributeError: - break - # END dereference tag - return tag - -def to_commit(obj): - """Convert the given object to a commit if possible and return it""" - if obj.type == 'tag': - obj = deref_tag(obj) - - if obj.type != "commit": - raise ValueError("Cannot convert object %r to type commit" % obj) - # END verify type - return obj - -def rev_parse(repo, rev): - """ - :return: Object at the given revision, either Commit, Tag, Tree or Blob - :param rev: git-rev-parse compatible revision specification, please see - http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html - for details - :note: Currently there is no access to the rev-log, rev-specs may only contain - topological tokens such ~ and ^. - :raise BadObject: if the given revision could not be found""" - if '@' in rev: - raise ValueError("There is no rev-log support yet") - - - # colon search mode ? - if rev.startswith(':/'): - # colon search mode - raise NotImplementedError("commit by message search ( regex )") - # END handle search - - obj = None - output_type = "commit" - start = 0 - parsed_to = 0 - lr = len(rev) - while start < lr: - if rev[start] not in "^~:": - start += 1 - continue - # END handle start - - if obj is None: - # token is a rev name - obj = name_to_object(repo, rev[:start]) - # END initialize obj on first token - - token = rev[start] - start += 1 - - # try to parse {type} - if start < lr and rev[start] == '{': - end = rev.find('}', start) - if end == -1: - raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start+1:end] # exclude brace - - # handle type - if output_type == 'commit': - pass # default - elif output_type == 'tree': - try: - obj = to_commit(obj).tree - except (AttributeError, ValueError): - pass # error raised later - # END exception handling - elif output_type in ('', 'blob'): - if obj.type == 'tag': - obj = deref_tag(obj) - else: - # cannot do anything for non-tags - pass - # END handle tag - else: - raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) - # END handle output type - - # empty output types don't require any specific type, its just about dereferencing tags - if output_type and obj.type != output_type: - raise ValueError("Could not accomodate requested object type %r, got %s" % (output_type, obj.type)) - # END verify ouput type - - start = end+1 # skip brace - parsed_to = start - continue - # END parse type - - # try to parse a number - num = 0 - if token != ":": - found_digit = False - while start < lr: - if rev[start] in digits: - num = num * 10 + int(rev[start]) - start += 1 - found_digit = True - else: - break - # END handle number - # END number parse loop - - # no explicit number given, 1 is the default - # It could be 0 though - if not found_digit: - num = 1 - # END set default num - # END number parsing only if non-blob mode - - - parsed_to = start - # handle hiererarchy walk - try: - if token == "~": - obj = to_commit(obj) - for item in xrange(num): - obj = obj.parents[0] - # END for each history item to walk - elif token == "^": - obj = to_commit(obj) - # must be n'th parent - if num: - obj = obj.parents[num-1] - elif token == ":": - if obj.type != "tree": - obj = obj.tree - # END get tree type - obj = obj[rev[start:]] - parsed_to = lr - else: - raise ValueError("Invalid token: %r" % token) - # END end handle tag - except (IndexError, AttributeError): - raise BadObject("Invalid Revision in %s" % rev) - # END exception handling - # END parse loop - - # still no obj ? Its probably a simple name - if obj is None: - obj = name_to_object(repo, rev) - parsed_to = lr - # END handle simple name - - if obj is None: - raise ValueError("Revision specifier could not be parsed: %s" % rev) - - if parsed_to != lr: - raise ValueError("Didn't consume complete rev spec %s, consumed part: %s" % (rev, rev[:parsed_to])) - - return obj diff --git a/lib/git/util.py b/lib/git/util.py deleted file mode 100644 index c945e6a3..00000000 --- a/lib/git/util.py +++ /dev/null @@ -1,348 +0,0 @@ -# utils.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -import os -import sys -import time -import tempfile - -from gitdb.util import ( - make_sha, - LockedFD, - file_contents_ro, - LazyMixin, - to_hex_sha, - to_bin_sha - ) - -__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux", - "join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList", - "BlockingLockFile", "LockFile" ) - -def stream_copy(source, destination, chunk_size=512*1024): - """Copy all data from the source stream into the destination stream in chunks - of size chunk_size - - :return: amount of bytes written""" - br = 0 - while True: - chunk = source.read(chunk_size) - destination.write(chunk) - br += len(chunk) - if len(chunk) < chunk_size: - break - # END reading output stream - return br - -def join_path(a, *p): - """Join path tokens together similar to os.path.join, but always use - '/' instead of possibly '\' on windows.""" - path = a - for b in p: - if b.startswith('/'): - path += b[1:] - elif path == '' or path.endswith('/'): - path += b - else: - path += '/' + b - return path - -def to_native_path_windows(path): - return path.replace('/','\\') - -def to_native_path_linux(path): - return path.replace('\\','/') - -if sys.platform.startswith('win'): - to_native_path = to_native_path_windows -else: - # no need for any work on linux - def to_native_path_linux(path): - return path - to_native_path = to_native_path_linux - -def join_path_native(a, *p): - """As join path, but makes sure an OS native path is returned. This is only - needed to play it safe on my dear windows and to assure nice paths that only - use '\'""" - return to_native_path(join_path(a, *p)) - - -class Stats(object): - """ - Represents stat information as presented by git at the end of a merge. It is - created from the output of a diff operation. - - ``Example``:: - - c = Commit( sha1 ) - s = c.stats - s.total # full-stat-dict - s.files # dict( filepath : stat-dict ) - - ``stat-dict`` - - A dictionary with the following keys and values:: - - deletions = number of deleted lines as int - insertions = number of inserted lines as int - lines = total number of lines changed as int, or deletions + insertions - - ``full-stat-dict`` - - In addition to the items in the stat-dict, it features additional information:: - - files = number of changed files as int""" - __slots__ = ("total", "files") - - def __init__(self, total, files): - self.total = total - self.files = files - - @classmethod - def _list_from_string(cls, repo, text): - """Create a Stat object from output retrieved by git-diff. - - :return: git.Stat""" - hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': dict()} - for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") - insertions = raw_insertions != '-' and int(raw_insertions) or 0 - deletions = raw_deletions != '-' and int(raw_deletions) or 0 - hsh['total']['insertions'] += insertions - hsh['total']['deletions'] += deletions - hsh['total']['lines'] += insertions + deletions - hsh['total']['files'] += 1 - hsh['files'][filename.strip()] = {'insertions': insertions, - 'deletions': deletions, - 'lines': insertions + deletions} - return Stats(hsh['total'], hsh['files']) - - -class IndexFileSHA1Writer(object): - """Wrapper around a file-like object that remembers the SHA1 of - the data written to it. It will write a sha when the stream is closed - or if the asked for explicitly usign write_sha. - - Only useful to the indexfile - - :note: Based on the dulwich project""" - __slots__ = ("f", "sha1") - - def __init__(self, f): - self.f = f - self.sha1 = make_sha("") - - def write(self, data): - self.sha1.update(data) - return self.f.write(data) - - def write_sha(self): - sha = self.sha1.digest() - self.f.write(sha) - return sha - - def close(self): - sha = self.write_sha() - self.f.close() - return sha - - def tell(self): - return self.f.tell() - - -class LockFile(object): - """Provides methods to obtain, check for, and release a file based lock which - should be used to handle concurrent access to the same file. - - As we are a utility class to be derived from, we only use protected methods. - - Locks will automatically be released on destruction""" - __slots__ = ("_file_path", "_owns_lock") - - def __init__(self, file_path): - self._file_path = file_path - self._owns_lock = False - - def __del__(self): - self._release_lock() - - def _lock_file_path(self): - """:return: Path to lockfile""" - return "%s.lock" % (self._file_path) - - def _has_lock(self): - """:return: True if we have a lock and if the lockfile still exists - :raise AssertionError: if our lock-file does not exist""" - if not self._owns_lock: - return False - - return True - - def _obtain_lock_or_raise(self): - """Create a lock file as flag for other instances, mark our instance as lock-holder - - :raise IOError: if a lock was already present or a lock file could not be written""" - if self._has_lock(): - return - lock_file = self._lock_file_path() - if os.path.isfile(lock_file): - raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - - try: - fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) - os.close(fd) - except OSError,e: - raise IOError(str(e)) - - self._owns_lock = True - - def _obtain_lock(self): - """The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation""" - return self._obtain_lock_or_raise() - - def _release_lock(self): - """Release our lock if we have one""" - if not self._has_lock(): - return - - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. - lfp = self._lock_file_path() - try: - # on bloody windows, the file needs write permissions to be removable. - # Why ... - if os.name == 'nt': - os.chmod(lfp, 0777) - # END handle win32 - os.remove(lfp) - except OSError: - pass - self._owns_lock = False - - -class BlockingLockFile(LockFile): - """The lock file will block until a lock could be obtained, or fail after - a specified timeout. - - :note: If the directory containing the lock was removed, an exception will - be raised during the blocking period, preventing hangs as the lock - can never be obtained.""" - __slots__ = ("_check_interval", "_max_block_time") - def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): - """Configure the instance - - :parm check_interval_s: - Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time - - :parm max_block_time_s: Maximum amount of seconds we may lock""" - super(BlockingLockFile, self).__init__(file_path) - self._check_interval = check_interval_s - self._max_block_time = max_block_time_s - - def _obtain_lock(self): - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time or if the parent directory was not available anymore. - If this method returns, you are guranteed to own the lock""" - starttime = time.time() - maxtime = starttime + float(self._max_block_time) - while True: - try: - super(BlockingLockFile, self)._obtain_lock() - except IOError: - # synity check: if the directory leading to the lockfile is not - # readable anymore, raise an execption - curtime = time.time() - if not os.path.isdir(os.path.dirname(self._lock_file_path())): - msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) - raise IOError(msg) - # END handle missing directory - - if curtime >= maxtime: - msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) - raise IOError(msg) - # END abort if we wait too long - time.sleep(self._check_interval) - else: - break - # END endless loop - - -class IterableList(list): - """ - List of iterable objects allowing to query an object by id or by named index:: - - heads = repo.heads - heads.master - heads['master'] - heads[0] - - It requires an id_attribute name to be set which will be queried from its - contained items to have a means for comparison. - - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out.""" - __slots__ = ('_id_attr', '_prefix') - - def __new__(cls, id_attr, prefix=''): - return super(IterableList,cls).__new__(cls) - - def __init__(self, id_attr, prefix=''): - self._id_attr = id_attr - self._prefix = prefix - if not isinstance(id_attr, basestring): - raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") - # END help debugging ! - - def __getattr__(self, attr): - attr = self._prefix + attr - for item in self: - if getattr(item, self._id_attr) == attr: - return item - # END for each item - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - if isinstance(index, int): - return list.__getitem__(self,index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError( "No item found with id %r" % (self._prefix + index) ) - - -class Iterable(object): - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository""" - __slots__ = tuple() - _id_attribute_ = "attribute that most suitably identifies your instance" - - @classmethod - def list_items(cls, repo, *args, **kwargs): - """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - :note: Favor the iter_items method as it will - - :return:list(Item,...) list of item instances""" - out_list = IterableList( cls._id_attribute_ ) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list - - - @classmethod - def iter_items(cls, repo, *args, **kwargs): - """For more information about the arguments, see list_items - :return: iterator yielding Items""" - raise NotImplementedError("To be implemented by Subclass") - - |