diff options
Diffstat (limited to 'objects')
-rw-r--r-- | objects/__init__.py | 21 | ||||
-rw-r--r-- | objects/base.py | 172 | ||||
-rw-r--r-- | objects/blob.py | 27 | ||||
-rw-r--r-- | objects/commit.py | 465 | ||||
-rw-r--r-- | objects/fun.py | 199 | ||||
-rw-r--r-- | objects/submodule/__init__.py | 2 | ||||
-rw-r--r-- | objects/submodule/base.py | 924 | ||||
-rw-r--r-- | objects/submodule/root.py | 315 | ||||
-rw-r--r-- | objects/submodule/util.py | 101 | ||||
-rw-r--r-- | objects/tag.py | 76 | ||||
-rw-r--r-- | objects/tree.py | 280 | ||||
-rw-r--r-- | objects/util.py | 315 |
12 files changed, 0 insertions, 2897 deletions
diff --git a/objects/__init__.py b/objects/__init__.py deleted file mode 100644 index 77f69d29..00000000 --- a/objects/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -""" -Import all submodules main classes into the package space -""" -import inspect -from base import * -# Fix import dependency - add IndexObject to the util module, so that it can be -# imported by the submodule.base -import submodule.util -submodule.util.IndexObject = IndexObject -submodule.util.Object = Object -from submodule.base import * -from submodule.root import * - -# must come after submodule was made available -from tag import * -from blob import * -from commit import * -from tree import * - -__all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file diff --git a/objects/base.py b/objects/base.py deleted file mode 100644 index 5f2f7809..00000000 --- a/objects/base.py +++ /dev/null @@ -1,172 +0,0 @@ -# base.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -from git.util import LazyMixin, join_path_native, stream_copy -from util import get_object_type_by_name -from gitdb.util import ( - hex_to_bin, - bin_to_hex, - basename - ) - -import gitdb.typ as dbtyp - -_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" - -__all__ = ("Object", "IndexObject") - -class Object(LazyMixin): - """Implements an Object which may be Blobs, Trees, Commits and Tags""" - NULL_HEX_SHA = '0'*40 - NULL_BIN_SHA = '\0'*20 - - TYPES = (dbtyp.str_blob_type, dbtyp.str_tree_type, dbtyp.str_commit_type, dbtyp.str_tag_type) - __slots__ = ("repo", "binsha", "size" ) - type = None # to be set by subclass - - def __init__(self, repo, binsha): - """Initialize an object by identifying it by its binary sha. - All keyword arguments will be set on demand if None. - - :param repo: repository this object is located in - - :param binsha: 20 byte SHA1""" - super(Object,self).__init__() - self.repo = repo - self.binsha = binsha - assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha)) - - @classmethod - def new(cls, repo, id): - """ - :return: New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a binsha even though - the input id may have been a Reference or Rev-Spec - - :param id: reference, rev-spec, or hexsha - - :note: This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a binsha.""" - return repo.rev_parse(str(id)) - - @classmethod - def new_from_sha(cls, repo, sha1): - """ - :return: new object instance of a type appropriate to represent the given - binary sha1 - :param sha1: 20 byte binary sha1""" - if sha1 == cls.NULL_BIN_SHA: - # the NULL binsha is always the root commit - return get_object_type_by_name('commit')(repo, sha1) - #END handle special case - oinfo = repo.odb.info(sha1) - inst = get_object_type_by_name(oinfo.type)(repo, oinfo.binsha) - inst.size = oinfo.size - return inst - - def _set_cache_(self, attr): - """Retrieve object information""" - if attr == "size": - oinfo = self.repo.odb.info(self.binsha) - self.size = oinfo.size - # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """:return: True if the objects have the same SHA1""" - return self.binsha == other.binsha - - def __ne__(self, other): - """:return: True if the objects do not have the same SHA1 """ - return self.binsha != other.binsha - - def __hash__(self): - """:return: Hash of our id allowing objects to be used in dicts and sets""" - return hash(self.binsha) - - def __str__(self): - """:return: string of our SHA1 as understood by all git commands""" - return bin_to_hex(self.binsha) - - def __repr__(self): - """:return: string with pythonic representation of our object""" - return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) - - @property - def hexsha(self): - """:return: 40 byte hex version of our 20 byte binary sha""" - return bin_to_hex(self.binsha) - - @property - def data_stream(self): - """ :return: File Object compatible stream to the uncompressed raw data of the object - :note: returned streams must be read in order""" - return self.repo.odb.stream(self.binsha) - - def stream_data(self, ostream): - """Writes our data directly to the given output stream - :param ostream: File object compatible stream object. - :return: self""" - istream = self.repo.odb.stream(self.binsha) - stream_copy(istream, ostream) - return self - - -class IndexObject(Object): - """Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects""" - __slots__ = ("path", "mode") - - # for compatability with iterable lists - _id_attribute_ = 'path' - - def __init__(self, repo, binsha, mode=None, path=None): - """Initialize a newly instanced IndexObject - :param repo: is the Repo we are located in - :param binsha: 20 byte sha1 - :param mode: is the stat compatible file mode as int, use the stat module - to evaluate the infomration - :param path: - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - :note: - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) - if mode is not None: - self.mode = mode - if path is not None: - self.path = path - - def __hash__(self): - """:return: - Hash of our path as index items are uniquely identifyable by path, not - by their data !""" - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - # END hanlde slot attribute - - @property - def name(self): - """:return: Name portion of the path, effectively being the basename""" - return basename(self.path) - - @property - def abspath(self): - """ - :return: - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. """ - return join_path_native(self.repo.working_tree_dir, self.path) - diff --git a/objects/blob.py b/objects/blob.py deleted file mode 100644 index 32f8c61c..00000000 --- a/objects/blob.py +++ /dev/null @@ -1,27 +0,0 @@ -# blob.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from mimetypes import guess_type -import base - -__all__ = ('Blob', ) - -class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" - - __slots__ = tuple() - - @property - def mime_type(self): - """ - :return: String describing the mime type of this file (based on the filename) - :note: Defaults to 'text/plain' in case the actual file type is unknown. """ - guesses = None - if self.path: - guesses = guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE diff --git a/objects/commit.py b/objects/commit.py deleted file mode 100644 index 69a3adc4..00000000 --- a/objects/commit.py +++ /dev/null @@ -1,465 +0,0 @@ -# commit.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php - -from git.util import ( - Actor, - Iterable, - Stats, - ) -from git.diff import Diffable -from tree import Tree -from gitdb import IStream -from cStringIO import StringIO - -import base -from gitdb.util import ( - hex_to_bin - ) -from util import ( - Traversable, - Serializable, - parse_date, - altz_to_utctz_str, - parse_actor_and_date - ) -from time import ( - time, - altzone - ) -import os -import sys - -__all__ = ('Commit', ) - -class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): - """Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_date = "GIT_AUTHOR_DATE" - env_committer_date = "GIT_COMMITTER_DATE" - - # CONFIGURATION KEYS - conf_encoding = 'i18n.commitencoding' - - # INVARIANTS - default_encoding = "UTF-8" - - - # object configuration - type = "commit" - __slots__ = ("tree", - "author", "authored_date", "author_tz_offset", - "committer", "committed_date", "committer_tz_offset", - "message", "parents", "encoding") - _id_attribute_ = "binsha" - - def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, - committer=None, committed_date=None, committer_tz_offset=None, - message=None, parents=None, encoding=None): - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree - Tree object - :param author: Actor - is the author string ( will be implicitly converted into an Actor object ) - :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string - :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param message: string - is the commit message - :param encoding: string - encoding of the message, defaults to UTF-8 - :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit - - :note: Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit,self).__init__(repo, binsha) - if tree is not None: - assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) - if tree is not None: - self.tree = tree - if author is not None: - self.author = author - if authored_date is not None: - self.authored_date = authored_date - if author_tz_offset is not None: - self.author_tz_offset = author_tz_offset - if committer is not None: - self.committer = committer - if committed_date is not None: - self.committed_date = committed_date - if committer_tz_offset is not None: - self.committer_tz_offset = committer_tz_offset - if message is not None: - self.message = message - if parents is not None: - self.parents = parents - if encoding is not None: - self.encoding = encoding - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper - binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) - self._deserialize(StringIO(stream.read())) - else: - super(Commit, self)._set_cache_(attr) - # END handle attrs - - @property - def summary(self): - """:return: First line of the commit message""" - return self.message.split('\n', 1)[0] - - def count(self, paths='', **kwargs): - """Count the number of commits reachable from this commit - - :param paths: - is an optinal path or a list of paths restricting the return value - to commits actually containing the paths - - :param kwargs: - Additional options to be passed to git-rev-list. They must not alter - the ouput style of the command, or parsing will yield incorrect results - :return: int defining the number of reachable commits""" - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. - if paths: - return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) - else: - return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) - - - @property - def name_rev(self): - """ - :return: - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes""" - return self.repo.git.name_rev(self) - - @classmethod - def iter_items(cls, repo, rev, paths='', **kwargs): - """Find all commits matching the given criteria. - - :param repo: is the Repo - :param rev: revision specifier, see git-rev-parse for viable options - :param paths: - is an optinal path or list of paths, if set only Commits that include the path - or paths will be considered - :param kwargs: - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - :return: iterator yielding Commit items""" - if 'pretty' in kwargs: - raise ValueError("--pretty cannot be used as parsing expects single sha's only") - # END handle pretty - args = list() - if paths: - args.extend(('--', paths)) - # END if paths - - proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) - return cls._iter_from_process_or_stream(repo, proc) - - def iter_parents(self, paths='', **kwargs): - """Iterate _all_ parents of this commit. - - :param paths: - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - :param kwargs: All arguments allowed by git-rev-list - :return: Iterator yielding Commit objects which are parents of self """ - # skip ourselves - skip = kwargs.get("skip", 1) - if skip == 0: # skip ourselves - skip = 1 - kwargs['skip'] = skip - - return self.iter_items(self.repo, self, paths, **kwargs) - - @property - def stats(self): - """Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - :return: git.Stats""" - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) - return Stats._list_from_string(self.repo, text) - - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream): - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database - - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" - stream = proc_or_stream - if not hasattr(stream,'readline'): - stream = proc_or_stream.stdout - - readline = stream.readline - while True: - line = readline() - if not line: - break - hexsha = line.strip() - if len(hexsha) > 40: - # split additional information, as returned by bisect for instance - hexsha, rest = line.split(None, 1) - # END handle extra info - - assert len(hexsha) == 40, "Invalid line: %s" % hexsha - yield Commit(repo, hex_to_bin(hexsha)) - # END for each line in stream - - - @classmethod - def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): - """Commit the given tree, creating a commit object. - - :param repo: Repo object the commit should be part of - :param tree: Tree object or hex or bin sha - the tree of the new commit - :param message: Commit message. It may be an empty string if no message is provided. - It will be converted to a string in any case. - :param parent_commits: - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object - :param head: - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could - lead to undesired results when diffing files. - - :return: Commit object representing the new commit - - :note: - Additional information about the committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information""" - parents = parent_commits - if parent_commits is None: - try: - parent_commits = [ repo.head.commit ] - except ValueError: - # empty repositories have no head commit - parent_commits = list() - # END handle parent commits - # END if parent commits are unset - - # retrieve all additional information, create a commit object, and - # serialize it - # Generally: - # * Environment variables override configuration values - # * Sensible defaults are set according to the git documentation - - # COMMITER AND AUTHOR INFO - cr = repo.config_reader() - env = os.environ - - committer = Actor.committer(cr) - author = Actor.author(cr) - - # PARSE THE DATES - unix_time = int(time()) - offset = altzone - - author_date_str = env.get(cls.env_author_date, '') - if author_date_str: - author_time, author_offset = parse_date(author_date_str) - else: - author_time, author_offset = unix_time, offset - # END set author time - - committer_date_str = env.get(cls.env_committer_date, '') - if committer_date_str: - committer_time, committer_offset = parse_date(committer_date_str) - else: - committer_time, committer_offset = unix_time, offset - # END set committer time - - # assume utf8 encoding - enc_section, enc_option = cls.conf_encoding.split('.') - conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) - - - # if the tree is no object, make sure we create one - otherwise - # the created commit object is invalid - if isinstance(tree, str): - tree = repo.tree(tree) - # END tree conversion - - # CREATE NEW COMMIT - new_commit = cls(repo, cls.NULL_BIN_SHA, tree, - author, author_time, author_offset, - committer, committer_time, committer_offset, - message, parent_commits, conf_encoding) - - stream = StringIO() - new_commit._serialize(stream) - streamlen = stream.tell() - stream.seek(0) - - istream = repo.odb.store(IStream(cls.type, streamlen, stream)) - new_commit.binsha = istream.binsha - - if head: - # need late import here, importing git at the very beginning throws - # as well ... - import git.refs - try: - repo.head.set_commit(new_commit, logmsg="commit: %s" % message) - except ValueError: - # head is not yet set to the ref our HEAD points to - # Happens on first commit - import git.refs - master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit, logmsg="commit (initial): %s" % message) - repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) - # END handle empty repositories - # END advance head handling - - return new_commit - - #{ Serializable Implementation - - def _serialize(self, stream): - write = stream.write - write("tree %s\n" % self.tree) - for p in self.parents: - write("parent %s\n" % p) - - a = self.author - aname = a.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - - c = self.committer - fmt = "%s %s <%s> %s %s\n" - write(fmt % ("author", aname, a.email, - self.authored_date, - altz_to_utctz_str(self.author_tz_offset))) - - # encode committer - aname = c.name - if isinstance(aname, unicode): - aname = aname.encode(self.encoding) - # END handle unicode in name - write(fmt % ("committer", aname, c.email, - self.committed_date, - altz_to_utctz_str(self.committer_tz_offset))) - - if self.encoding != self.default_encoding: - write("encoding %s\n" % self.encoding) - - write("\n") - - # write plain bytes, be sure its encoded according to our encoding - if isinstance(self.message, unicode): - write(self.message.encode(self.encoding)) - else: - write(self.message) - # END handle encoding - return self - - def _deserialize(self, stream): - """:param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object""" - readline = stream.readline - self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') - - self.parents = list() - next_line = None - while True: - parent_line = readline() - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) - # END for each parent line - self.parents = tuple(self.parents) - - self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) - self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) - - - # now we can have the encoding line, or an empty line followed by the optional - # message. - self.encoding = self.default_encoding - # read encoding or empty line to separate message - enc = readline() - enc = enc.strip() - if enc: - self.encoding = enc[enc.find(' ')+1:] - # now comes the message separator - readline() - # END handle encoding - - # decode the authors name - try: - self.author.name = self.author.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) - # END handle author's encoding - - # decode committer name - try: - self.committer.name = self.committer.name.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding) - # END handle author's encoding - - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip - self.message = stream.read() - try: - self.message = self.message.decode(self.encoding) - except UnicodeDecodeError: - print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) - # END exception handling - return self - - #} END serializable implementation diff --git a/objects/fun.py b/objects/fun.py deleted file mode 100644 index 9b0a377c..00000000 --- a/objects/fun.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Module with functions which are supposed to be as fast as possible""" -from stat import S_ISDIR - -__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive', - 'traverse_tree_recursive') - - - - -def tree_to_stream(entries, write): - """Write the give list of entries into a stream using its write method - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" - ord_zero = ord('0') - bit_mask = 7 # 3 bits set - - for binsha, mode, name in entries: - mode_str = '' - for i in xrange(6): - mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str - # END for each 8 octal value - - # git slices away the first octal if its zero - if mode_str[0] == '0': - mode_str = mode_str[1:] - # END save a byte - - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. - if isinstance(name, unicode): - name = name.encode("utf8") - write("%s %s\0%s" % (mode_str, name, binsha)) - # END for each item - - -def tree_entries_from_data(data): - """Reads the binary representation of a tree and returns tuples of Tree items - :param data: data block with tree data - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" - ord_zero = ord('0') - len_data = len(data) - i = 0 - out = list() - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded - name = data[ns:i] - name_enc = name.decode("utf-8") - if len(name) > len(name_enc): - name = name_enc - # END handle encoding - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - out.append((sha, mode, name)) - # END for each byte in data stream - return out - - -def _find_by_name(tree_data, name, is_dir, start_at): - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" - try: - item = tree_data[start_at] - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[start_at] = None - return item - except IndexError: - pass - # END exception handling - for index, item in enumerate(tree_data): - if item and item[2] == name and S_ISDIR(item[1]) == is_dir: - tree_data[index] = None - return item - # END if item matches - # END for each item - return None - -def _to_full_path(item, path_prefix): - """Rebuild entry with given path prefix""" - if not item: - return item - return (item[0], item[1], path_prefix+item[2]) - -def traverse_trees_recursive(odb, tree_shas, path_prefix): - """ - :return: list with entries according to the given binary tree-shas. - The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where - * [0] == 20 byte sha - * [1] == mode as int - * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" - trees_data = list() - nt = len(tree_shas) - for tree_sha in tree_shas: - if tree_sha is None: - data = list() - else: - data = tree_entries_from_data(odb.stream(tree_sha).read()) - # END handle muted trees - trees_data.append(data) - # END for each sha to get data for - - out = list() - out_append = out.append - - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None - for ti, tree_data in enumerate(trees_data): - for ii, item in enumerate(tree_data): - if not item: - continue - # END skip already done items - entries = [ None for n in range(nt) ] - entries[ti] = item - sha, mode, name = item # its faster to unpack - is_dir = S_ISDIR(mode) # type mode bits - - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt - for tio in range(ti+1, ti+nt): - tio = tio % nt - entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) - # END for each other item data - - # if we are a directory, enter recursion - if is_dir: - out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/')) - else: - out_append(tuple(_to_full_path(e, path_prefix) for e in entries)) - # END handle recursion - - # finally mark it done - tree_data[ii] = None - # END for each item - - # we are done with one tree, set all its data empty - del(tree_data[:]) - # END for each tree_data chunk - return out - -def traverse_tree_recursive(odb, tree_sha, path_prefix): - """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: - * [0] 20 byte sha - * [1] mode as int - * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" - entries = list() - data = tree_entries_from_data(odb.stream(tree_sha).read()) - - # unpacking/packing is faster than accessing individual items - for sha, mode, name in data: - if S_ISDIR(mode): - entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/')) - else: - entries.append((sha, mode, path_prefix+name)) - # END for each item - - return entries diff --git a/objects/submodule/__init__.py b/objects/submodule/__init__.py deleted file mode 100644 index 82df59b0..00000000 --- a/objects/submodule/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# NOTE: Cannot import anything here as the top-level _init_ has to handle -# our dependencies diff --git a/objects/submodule/base.py b/objects/submodule/base.py deleted file mode 100644 index fd6c9396..00000000 --- a/objects/submodule/base.py +++ /dev/null @@ -1,924 +0,0 @@ -import util -from util import ( - mkhead, - sm_name, - sm_section, - unbare_repo, - SubmoduleConfigParser, - find_first_remote_branch - ) -from git.objects.util import Traversable -from StringIO import StringIO # need a dict to set bloody .name field -from git.util import ( - Iterable, - join_path_native, - to_native_path_linux, - RemoteProgress - ) - -from git.config import SectionConstraint -from git.exc import ( - InvalidGitRepositoryError, - NoSuchPathError - ) - -import stat -import git - -import os -import sys -import time - -import shutil - -__all__ = ["Submodule", "UpdateProgress"] - - -class UpdateProgress(RemoteProgress): - """Class providing detailed progress information to the caller who should - derive from it and implement the ``update(...)`` message""" - CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes+3)] - _num_op_codes = RemoteProgress._num_op_codes + 3 - - __slots__ = tuple() - - -BEGIN = UpdateProgress.BEGIN -END = UpdateProgress.END -CLONE = UpdateProgress.CLONE -FETCH = UpdateProgress.FETCH -UPDWKTREE = UpdateProgress.UPDWKTREE - - -# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import -# mechanism which cause plenty of trouble of the only reason for packages and -# modules is refactoring - subpackages shoudn't depend on parent packages -class Submodule(util.IndexObject, Iterable, Traversable): - """Implements access to a git submodule. They are special in that their sha - represents a commit in the submodule's repository which is to be checked out - at the path of this instance. - The submodule type does not have a string type associated with it, as it exists - solely as a marker in the tree and index. - - All methods work in bare and non-bare repositories.""" - - _id_attribute_ = "name" - k_modules_file = '.gitmodules' - k_head_option = 'branch' - k_head_default = 'master' - k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status - - # this is a bogus type for base class compatability - type = 'submodule' - - __slots__ = ('_parent_commit', '_url', '_branch_path', '_name', '__weakref__') - _cache_attrs = ('path', '_url', '_branch_path') - - def __init__(self, repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, branch_path=None): - """Initialize this instance with its attributes. We only document the ones - that differ from ``IndexObject`` - - :param repo: Our parent repository - :param binsha: binary sha referring to a commit in the remote repository, see url parameter - :param parent_commit: see set_parent_commit() - :param url: The url to the remote repository which is the submodule - :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" - super(Submodule, self).__init__(repo, binsha, mode, path) - self.size = 0 - if parent_commit is not None: - self._parent_commit = parent_commit - if url is not None: - self._url = url - if branch_path is not None: - assert isinstance(branch_path, basestring) - self._branch_path = branch_path - if name is not None: - self._name = name - - def _set_cache_(self, attr): - if attr == '_parent_commit': - # set a default value, which is the root tree of the current head - self._parent_commit = self.repo.commit() - elif attr in ('path', '_url', '_branch_path'): - reader = self.config_reader() - # default submodule values - self.path = reader.get_value('path') - self._url = reader.get_value('url') - # git-python extension values - optional - self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) - elif attr == '_name': - raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") - else: - super(Submodule, self)._set_cache_(attr) - # END handle attribute name - - def _get_intermediate_items(self, item): - """:return: all the submodules of our module repository""" - try: - return type(self).list_items(item.module()) - except InvalidGitRepositoryError: - return list() - # END handle intermeditate items - - def __eq__(self, other): - """Compare with another submodule""" - # we may only compare by name as this should be the ID they are hashed with - # Otherwise this type wouldn't be hashable - # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) - return self._name == other._name - - def __ne__(self, other): - """Compare with another submodule for inequality""" - return not (self == other) - - def __hash__(self): - """Hash this instance using its logical id, not the sha""" - return hash(self._name) - - def __str__(self): - return self._name - - def __repr__(self): - return "git.%s(name=%s, path=%s, url=%s, branch_path=%s)" % (type(self).__name__, self._name, self.path, self.url, self.branch_path) - - @classmethod - def _config_parser(cls, repo, parent_commit, read_only): - """:return: Config Parser constrained to our submodule in read or write mode - :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository - at the given parent commit. Otherwise the exception would be delayed until the first - access of the config parser""" - parent_matches_head = repo.head.commit == parent_commit - if not repo.bare and parent_matches_head: - fp_module = cls.k_modules_file - fp_module_path = os.path.join(repo.working_tree_dir, fp_module) - if not os.path.isfile(fp_module_path): - raise IOError("%s file was not accessible" % fp_module_path) - # END handle existance - fp_module = fp_module_path - else: - try: - fp_module = cls._sio_modules(parent_commit) - except KeyError: - raise IOError("Could not find %s file in the tree of parent commit %s" % (cls.k_modules_file, parent_commit)) - # END handle exceptions - # END handle non-bare working tree - - if not read_only and (repo.bare or not parent_matches_head): - raise ValueError("Cannot write blobs of 'historical' submodule configurations") - # END handle writes of historical submodules - - return SubmoduleConfigParser(fp_module, read_only = read_only) - - def _clear_cache(self): - # clear the possibly changed values - for name in self._cache_attrs: - try: - delattr(self, name) - except AttributeError: - pass - # END try attr deletion - # END for each name to delete - - @classmethod - def _sio_modules(cls, parent_commit): - """:return: Configuration file as StringIO - we only access it through the respective blob's data""" - sio = StringIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) - sio.name = cls.k_modules_file - return sio - - def _config_parser_constrained(self, read_only): - """:return: Config Parser constrained to our submodule in read or write mode""" - parser = self._config_parser(self.repo, self._parent_commit, read_only) - parser.set_submodule(self) - return SectionConstraint(parser, sm_section(self.name)) - - #{ Edit Interface - - @classmethod - def add(cls, repo, name, path, url=None, branch=None, no_checkout=False): - """Add a new submodule to the given repository. This will alter the index - as well as the .gitmodules file, but will not create a new commit. - If the submodule already exists, no matter if the configuration differs - from the one provided, the existing submodule will be returned. - - :param repo: Repository instance which should receive the submodule - :param name: The name/identifier for the submodule - :param path: repository-relative or absolute path at which the submodule - should be located - It will be created as required during the repository initialization. - :param url: git-clone compatible URL, see git-clone reference for more information - If None, the repository is assumed to exist, and the url of the first - remote is taken instead. This is useful if you want to make an existing - repository a submodule of anotherone. - :param branch: branch at which the submodule should (later) be checked out. - The given branch must exist in the remote repository, and will be checked - out locally as a tracking branch. - It will only be written into the configuration if it not None, which is - when the checked out branch will be the one the remote HEAD pointed to. - The result you get in these situation is somewhat fuzzy, and it is recommended - to specify at least 'master' here - :param no_checkout: if True, and if the repository has to be cloned manually, - no checkout will be performed - :return: The newly created submodule instance - :note: works atomically, such that no change will be done if the repository - update fails for instance""" - if repo.bare: - raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") - # END handle bare repos - - path = to_native_path_linux(path) - if path.endswith('/'): - path = path[:-1] - # END handle trailing slash - - # assure we never put backslashes into the url, as some operating systems - # like it ... - if url != None: - url = to_native_path_linux(url) - #END assure url correctness - - # INSTANTIATE INTERMEDIATE SM - sm = cls(repo, cls.NULL_BIN_SHA, cls.k_default_mode, path, name) - if sm.exists(): - # reretrieve submodule from tree - try: - return repo.head.commit.tree[path] - except KeyError: - # could only be in index - index = repo.index - entry = index.entries[index.entry_key(path, 0)] - sm.binsha = entry.binsha - return sm - # END handle exceptions - # END handle existing - - br = git.Head.to_full_path(str(branch) or cls.k_head_default) - has_module = sm.module_exists() - branch_is_default = branch is None - if has_module and url is not None: - if url not in [r.url for r in sm.module().remotes]: - raise ValueError("Specified URL '%s' does not match any remote url of the repository at '%s'" % (url, sm.abspath)) - # END check url - # END verify urls match - - mrepo = None - if url is None: - if not has_module: - raise ValueError("A URL was not given and existing repository did not exsit at %s" % path) - # END check url - mrepo = sm.module() - urls = [r.url for r in mrepo.remotes] - if not urls: - raise ValueError("Didn't find any remote url in repository at %s" % sm.abspath) - # END verify we have url - url = urls[0] - else: - # clone new repo - kwargs = {'n' : no_checkout} - if not branch_is_default: - kwargs['b'] = br - # END setup checkout-branch - mrepo = git.Repo.clone_from(url, path, **kwargs) - # END verify url - - # update configuration and index - index = sm.repo.index - writer = sm.config_writer(index=index, write=False) - writer.set_value('url', url) - writer.set_value('path', path) - - sm._url = url - if not branch_is_default: - # store full path - writer.set_value(cls.k_head_option, br) - sm._branch_path = br - # END handle path - del(writer) - - # we deliberatly assume that our head matches our index ! - pcommit = repo.head.commit - sm._parent_commit = pcommit - sm.binsha = mrepo.head.commit.binsha - index.add([sm], write=True) - - return sm - - def update(self, recursive=False, init=True, to_latest_revision=False, progress=None, - dry_run=False): - """Update the repository of this submodule to point to the checkout - we point at with the binsha of this instance. - - :param recursive: if True, we will operate recursively and update child- - modules as well. - :param init: if True, the module repository will be cloned into place if necessary - :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. - Instead, the remote will be fetched, and the local tracking branch updated. - This only works if we have a local tracking branch, which is the case - if the remote repository had a master branch, or of the 'branch' option - was specified for this submodule and the branch existed remotely - :param progress: UpdateProgress instance or None of no progress should be shown - :param dry_run: if True, the operation will only be simulated, but not performed. - All performed operations are read-only - :note: does nothing in bare repositories - :note: method is definitely not atomic if recurisve is True - :return: self""" - if self.repo.bare: - return self - #END pass in bare mode - - if progress is None: - progress = UpdateProgress() - #END handle progress - prefix = '' - if dry_run: - prefix = "DRY-RUN: " - #END handle prefix - - # to keep things plausible in dry-run mode - if dry_run: - mrepo = None - #END init mrepo - - # ASSURE REPO IS PRESENT AND UPTODATE - ##################################### - try: - mrepo = self.module() - rmts = mrepo.remotes - len_rmts = len(rmts) - for i, remote in enumerate(rmts): - op = FETCH - if i == 0: - op |= BEGIN - #END handle start - - progress.update(op, i, len_rmts, prefix+"Fetching remote %s of submodule %r" % (remote, self.name)) - #=============================== - if not dry_run: - remote.fetch(progress=progress) - #END handle dry-run - #=============================== - if i == len_rmts-1: - op |= END - #END handle end - progress.update(op, i, len_rmts, prefix+"Done fetching remote of submodule %r" % self.name) - #END fetch new data - except InvalidGitRepositoryError: - if not init: - return self - # END early abort if init is not allowed - import git - - # there is no git-repository yet - but delete empty paths - module_path = join_path_native(self.repo.working_tree_dir, self.path) - if not dry_run and os.path.isdir(module_path): - try: - os.rmdir(module_path) - except OSError: - raise OSError("Module directory at %r does already exist and is non-empty" % module_path) - # END handle OSError - # END handle directory removal - - # don't check it out at first - nonetheless it will create a local - # branch according to the remote-HEAD if possible - progress.update(BEGIN|CLONE, 0, 1, prefix+"Cloning %s to %s in submodule %r" % (self.url, module_path, self.name)) - if not dry_run: - mrepo = git.Repo.clone_from(self.url, module_path, n=True) - #END handle dry-run - progress.update(END|CLONE, 0, 1, prefix+"Done cloning to %s" % module_path) - - - if not dry_run: - # see whether we have a valid branch to checkout - try: - # find a remote which has our branch - we try to be flexible - remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) - local_branch = mkhead(mrepo, self.branch_path) - - # have a valid branch, but no checkout - make sure we can figure - # that out by marking the commit with a null_sha - local_branch.set_object(util.Object(mrepo, self.NULL_BIN_SHA)) - # END initial checkout + branch creation - - # make sure HEAD is not detached - mrepo.head.set_reference(local_branch, logmsg="submodule: attaching head to %s" % local_branch) - mrepo.head.ref.set_tracking_branch(remote_branch) - except IndexError: - print >> sys.stderr, "Warning: Failed to checkout tracking branch %s" % self.branch_path - #END handle tracking branch - - # NOTE: Have to write the repo config file as well, otherwise - # the default implementation will be offended and not update the repository - # Maybe this is a good way to assure it doesn't get into our way, but - # we want to stay backwards compatible too ... . Its so redundant ! - self.repo.config_writer().set_value(sm_section(self.name), 'url', self.url) - #END handle dry_run - #END handle initalization - - - # DETERMINE SHAS TO CHECKOUT - ############################ - binsha = self.binsha - hexsha = self.hexsha - if mrepo is not None: - # mrepo is only set if we are not in dry-run mode or if the module existed - is_detached = mrepo.head.is_detached - #END handle dry_run - - if mrepo is not None and to_latest_revision: - msg_base = "Cannot update to latest revision in repository at %r as " % mrepo.working_dir - if not is_detached: - rref = mrepo.head.ref.tracking_branch() - if rref is not None: - rcommit = rref.commit - binsha = rcommit.binsha - hexsha = rcommit.hexsha - else: - print >> sys.stderr, "%s a tracking branch was not set for local branch '%s'" % (msg_base, mrepo.head.ref) - # END handle remote ref - else: - print >> sys.stderr, "%s there was no local tracking branch" % msg_base - # END handle detached head - # END handle to_latest_revision option - - # update the working tree - # handles dry_run - if mrepo is not None and mrepo.head.commit.binsha != binsha: - progress.update(BEGIN|UPDWKTREE, 0, 1, prefix+"Updating working tree at %s for submodule %r to revision %s" % (self.path, self.name, hexsha)) - if not dry_run: - if is_detached: - # NOTE: for now we force, the user is no supposed to change detached - # submodules anyway. Maybe at some point this becomes an option, to - # properly handle user modifications - see below for future options - # regarding rebase and merge. - mrepo.git.checkout(hexsha, force=True) - else: - # TODO: allow to specify a rebase, merge, or reset - # TODO: Warn if the hexsha forces the tracking branch off the remote - # branch - this should be prevented when setting the branch option - mrepo.head.reset(hexsha, index=True, working_tree=True) - # END handle checkout - #END handle dry_run - progress.update(END|UPDWKTREE, 0, 1, prefix+"Done updating working tree for submodule %r" % self.name) - # END update to new commit only if needed - - # HANDLE RECURSION - ################## - if recursive: - # in dry_run mode, the module might not exist - if mrepo is not None: - for submodule in self.iter_items(self.module()): - submodule.update(recursive, init, to_latest_revision, progress=progress, dry_run=dry_run) - # END handle recursive update - #END handle dry run - # END for each submodule - - return self - - @unbare_repo - def move(self, module_path, configuration=True, module=True): - """Move the submodule to a another module path. This involves physically moving - the repository at our current path, changing the configuration, as well as - adjusting our index entry accordingly. - - :param module_path: the path to which to move our module, given as - repository-relative path. Intermediate directories will be created - accordingly. If the path already exists, it must be empty. - Trailling (back)slashes are removed automatically - :param configuration: if True, the configuration will be adjusted to let - the submodule point to the given path. - :param module: if True, the repository managed by this submodule - will be moved, not the configuration. This will effectively - leave your repository in an inconsistent state unless the configuration - and index already point to the target location. - :return: self - :raise ValueError: if the module path existed and was not empty, or was a file - :note: Currently the method is not atomic, and it could leave the repository - in an inconsistent state if a sub-step fails for some reason - """ - if module + configuration < 1: - raise ValueError("You must specify to move at least the module or the configuration of the submodule") - #END handle input - - module_path = to_native_path_linux(module_path) - if module_path.endswith('/'): - module_path = module_path[:-1] - # END handle trailing slash - - # VERIFY DESTINATION - if module_path == self.path: - return self - #END handle no change - - dest_path = join_path_native(self.repo.working_tree_dir, module_path) - if os.path.isfile(dest_path): - raise ValueError("Cannot move repository onto a file: %s" % dest_path) - # END handle target files - - index = self.repo.index - tekey = index.entry_key(module_path, 0) - # if the target item already exists, fail - if configuration and tekey in index.entries: - raise ValueError("Index entry for target path did alredy exist") - #END handle index key already there - - # remove existing destination - if module: - if os.path.exists(dest_path): - if len(os.listdir(dest_path)): - raise ValueError("Destination module directory was not empty") - #END handle non-emptyness - - if os.path.islink(dest_path): - os.remove(dest_path) - else: - os.rmdir(dest_path) - #END handle link - else: - # recreate parent directories - # NOTE: renames() does that now - pass - #END handle existance - # END handle module - - # move the module into place if possible - cur_path = self.abspath - renamed_module = False - if module and os.path.exists(cur_path): - os.renames(cur_path, dest_path) - renamed_module = True - #END move physical module - - - # rename the index entry - have to manipulate the index directly as - # git-mv cannot be used on submodules ... yeah - try: - if configuration: - try: - ekey = index.entry_key(self.path, 0) - entry = index.entries[ekey] - del(index.entries[ekey]) - nentry = git.IndexEntry(entry[:3]+(module_path,)+entry[4:]) - index.entries[tekey] = nentry - except KeyError: - raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) - #END handle submodule doesn't exist - - # update configuration - writer = self.config_writer(index=index) # auto-write - writer.set_value('path', module_path) - self.path = module_path - del(writer) - # END handle configuration flag - except Exception: - if renamed_module: - os.renames(dest_path, cur_path) - # END undo module renaming - raise - #END handle undo rename - - return self - - @unbare_repo - def remove(self, module=True, force=False, configuration=True, dry_run=False): - """Remove this submodule from the repository. This will remove our entry - from the .gitmodules file and the entry in the .git/config file. - - :param module: If True, the module we point to will be deleted - as well. If the module is currently on a commit which is not part - of any branch in the remote, if the currently checked out branch - working tree, or untracked files, - is ahead of its tracking branch, if you have modifications in the - In case the removal of the repository fails for these reasons, the - submodule status will not have been altered. - If this submodule has child-modules on its own, these will be deleted - prior to touching the own module. - :param force: Enforces the deletion of the module even though it contains - modifications. This basically enforces a brute-force file system based - deletion. - :param configuration: if True, the submodule is deleted from the configuration, - otherwise it isn't. Although this should be enabled most of the times, - this flag enables you to safely delete the repository of your submodule. - :param dry_run: if True, we will not actually do anything, but throw the errors - we would usually throw - :return: self - :note: doesn't work in bare repositories - :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted - :raise OSError: if directories or files could not be removed""" - if not (module + configuration): - raise ValueError("Need to specify to delete at least the module, or the configuration") - # END handle params - - # DELETE MODULE REPOSITORY - ########################## - if module and self.module_exists(): - if force: - # take the fast lane and just delete everything in our module path - # TODO: If we run into permission problems, we have a highly inconsistent - # state. Delete the .git folders last, start with the submodules first - mp = self.abspath - method = None - if os.path.islink(mp): - method = os.remove - elif os.path.isdir(mp): - method = shutil.rmtree - elif os.path.exists(mp): - raise AssertionError("Cannot forcibly delete repository as it was neither a link, nor a directory") - #END handle brutal deletion - if not dry_run: - assert method - method(mp) - #END apply deletion method - else: - # verify we may delete our module - mod = self.module() - if mod.is_dirty(untracked_files=True): - raise InvalidGitRepositoryError("Cannot delete module at %s with any modifications, unless force is specified" % mod.working_tree_dir) - # END check for dirt - - # figure out whether we have new commits compared to the remotes - # NOTE: If the user pulled all the time, the remote heads might - # not have been updated, so commits coming from the remote look - # as if they come from us. But we stay strictly read-only and - # don't fetch beforhand. - for remote in mod.remotes: - num_branches_with_new_commits = 0 - rrefs = remote.refs - for rref in rrefs: - num_branches_with_new_commits = len(mod.git.cherry(rref)) != 0 - # END for each remote ref - # not a single remote branch contained all our commits - if num_branches_with_new_commits == len(rrefs): - raise InvalidGitRepositoryError("Cannot delete module at %s as there are new commits" % mod.working_tree_dir) - # END handle new commits - # have to manually delete references as python's scoping is - # not existing, they could keep handles open ( on windows this is a problem ) - if len(rrefs): - del(rref) - #END handle remotes - del(rrefs) - del(remote) - # END for each remote - - # gently remove all submodule repositories - for sm in self.children(): - sm.remove(module=True, force=False, configuration=False, dry_run=dry_run) - del(sm) - # END for each child-submodule - - # finally delete our own submodule - if not dry_run: - wtd = mod.working_tree_dir - del(mod) # release file-handles (windows) - shutil.rmtree(wtd) - # END delete tree if possible - # END handle force - # END handle module deletion - - # DELETE CONFIGURATION - ###################### - if configuration and not dry_run: - # first the index-entry - index = self.repo.index - try: - del(index.entries[index.entry_key(self.path, 0)]) - except KeyError: - pass - #END delete entry - index.write() - - # now git config - need the config intact, otherwise we can't query - # inforamtion anymore - self.repo.config_writer().remove_section(sm_section(self.name)) - self.config_writer().remove_section() - # END delete configuration - - # void our data not to delay invalid access - self._clear_cache() - - return self - - def set_parent_commit(self, commit, check=True): - """Set this instance to use the given commit whose tree is supposed to - contain the .gitmodules blob. - - :param commit: Commit'ish reference pointing at the root_tree - :param check: if True, relatively expensive checks will be performed to verify - validity of the submodule. - :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. - :raise ValueError: if the parent commit didn't store this submodule under the - current path - :return: self""" - pcommit = self.repo.commit(commit) - pctree = pcommit.tree - if self.k_modules_file not in pctree: - raise ValueError("Tree of commit %s did not contain the %s file" % (commit, self.k_modules_file)) - # END handle exceptions - - prev_pc = self._parent_commit - self._parent_commit = pcommit - - if check: - parser = self._config_parser(self.repo, self._parent_commit, read_only=True) - if not parser.has_section(sm_section(self.name)): - self._parent_commit = prev_pc - raise ValueError("Submodule at path %r did not exist in parent commit %s" % (self.path, commit)) - # END handle submodule did not exist - # END handle checking mode - - # update our sha, it could have changed - self.binsha = pctree[self.path].binsha - - self._clear_cache() - - return self - - @unbare_repo - def config_writer(self, index=None, write=True): - """:return: a config writer instance allowing you to read and write the data - belonging to this submodule into the .gitmodules file. - - :param index: if not None, an IndexFile instance which should be written. - defaults to the index of the Submodule's parent repository. - :param write: if True, the index will be written each time a configuration - value changes. - :note: the parameters allow for a more efficient writing of the index, - as you can pass in a modified index on your own, prevent automatic writing, - and write yourself once the whole operation is complete - :raise ValueError: if trying to get a writer on a parent_commit which does not - match the current head commit - :raise IOError: If the .gitmodules file/blob could not be read""" - writer = self._config_parser_constrained(read_only=False) - if index is not None: - writer.config._index = index - writer.config._auto_write = write - return writer - - #} END edit interface - - #{ Query Interface - - @unbare_repo - def module(self): - """:return: Repo instance initialized from the repository at our submodule path - :raise InvalidGitRepositoryError: if a repository was not available. This could - also mean that it was not yet initialized""" - # late import to workaround circular dependencies - module_path = self.abspath - try: - repo = git.Repo(module_path) - if repo != self.repo: - return repo - # END handle repo uninitialized - except (InvalidGitRepositoryError, NoSuchPathError): - raise InvalidGitRepositoryError("No valid repository at %s" % self.path) - else: - raise InvalidGitRepositoryError("Repository at %r was not yet checked out" % module_path) - # END handle exceptions - - def module_exists(self): - """:return: True if our module exists and is a valid git repository. See module() method""" - try: - self.module() - return True - except Exception: - return False - # END handle exception - - def exists(self): - """ - :return: True if the submodule exists, False otherwise. Please note that - a submodule may exist (in the .gitmodules file) even though its module - doesn't exist""" - # keep attributes for later, and restore them if we have no valid data - # this way we do not actually alter the state of the object - loc = locals() - for attr in self._cache_attrs: - if hasattr(self, attr): - loc[attr] = getattr(self, attr) - # END if we have the attribute cache - #END for each attr - self._clear_cache() - - try: - try: - self.path - return True - except Exception: - return False - # END handle exceptions - finally: - for attr in self._cache_attrs: - if attr in loc: - setattr(self, attr, loc[attr]) - # END if we have a cache - # END reapply each attribute - # END handle object state consistency - - @property - def branch(self): - """:return: The branch instance that we are to checkout - :raise InvalidGitRepositoryError: if our module is not yet checked out""" - return mkhead(self.module(), self._branch_path) - - @property - def branch_path(self): - """ - :return: full (relative) path as string to the branch we would checkout - from the remote and track""" - return self._branch_path - - @property - def branch_name(self): - """:return: the name of the branch, which is the shortest possible branch name""" - # use an instance method, for this we create a temporary Head instance - # which uses a repository that is available at least ( it makes no difference ) - return git.Head(self.repo, self._branch_path).name - - @property - def url(self): - """:return: The url to the repository which our module-repository refers to""" - return self._url - - @property - def parent_commit(self): - """:return: Commit instance with the tree containing the .gitmodules file - :note: will always point to the current head's commit if it was not set explicitly""" - return self._parent_commit - - @property - def name(self): - """:return: The name of this submodule. It is used to identify it within the - .gitmodules file. - :note: by default, the name is the path at which to find the submodule, but - in git-python it should be a unique identifier similar to the identifiers - used for remotes, which allows to change the path of the submodule - easily - """ - return self._name - - def config_reader(self): - """ - :return: ConfigReader instance which allows you to qurey the configuration values - of this submodule, as provided by the .gitmodules file - :note: The config reader will actually read the data directly from the repository - and thus does not need nor care about your working tree. - :note: Should be cached by the caller and only kept as long as needed - :raise IOError: If the .gitmodules file/blob could not be read""" - return self._config_parser_constrained(read_only=True) - - def children(self): - """ - :return: IterableList(Submodule, ...) an iterable list of submodules instances - which are children of this submodule or 0 if the submodule is not checked out""" - return self._get_intermediate_items(self) - - #} END query interface - - #{ Iterable Interface - - @classmethod - def iter_items(cls, repo, parent_commit='HEAD'): - """:return: iterator yielding Submodule instances available in the given repository""" - pc = repo.commit(parent_commit) # parent commit instance - try: - parser = cls._config_parser(repo, pc, read_only=True) - except IOError: - raise StopIteration - # END handle empty iterator - - rt = pc.tree # root tree - - for sms in parser.sections(): - n = sm_name(sms) - p = parser.get_value(sms, 'path') - u = parser.get_value(sms, 'url') - b = cls.k_head_default - if parser.has_option(sms, cls.k_head_option): - b = parser.get_value(sms, cls.k_head_option) - # END handle optional information - - # get the binsha - index = repo.index - try: - sm = rt[p] - except KeyError: - # try the index, maybe it was just added - try: - entry = index.entries[index.entry_key(p, 0)] - sm = cls(repo, entry.binsha, entry.mode, entry.path) - except KeyError: - raise InvalidGitRepositoryError("Gitmodule path %r did not exist in revision of parent commit %s" % (p, parent_commit)) - # END handle keyerror - # END handle critical error - - # fill in remaining info - saves time as it doesn't have to be parsed again - sm._name = n - sm._parent_commit = pc - sm._branch_path = git.Head.to_full_path(b) - sm._url = u - - yield sm - # END for each section - - #} END iterable interface - diff --git a/objects/submodule/root.py b/objects/submodule/root.py deleted file mode 100644 index 36cd7209..00000000 --- a/objects/submodule/root.py +++ /dev/null @@ -1,315 +0,0 @@ -from base import Submodule, UpdateProgress -from util import ( - find_first_remote_branch - ) -from git.exc import InvalidGitRepositoryError -import git - -import sys - -__all__ = ["RootModule", "RootUpdateProgress"] - - -class RootUpdateProgress(UpdateProgress): - """Utility class which adds more opcodes to the UpdateProgress""" - REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes+4)] - _num_op_codes = UpdateProgress._num_op_codes+4 - - __slots__ = tuple() - -BEGIN = RootUpdateProgress.BEGIN -END = RootUpdateProgress.END -REMOVE = RootUpdateProgress.REMOVE -BRANCHCHANGE = RootUpdateProgress.BRANCHCHANGE -URLCHANGE = RootUpdateProgress.URLCHANGE -PATHCHANGE = RootUpdateProgress.PATHCHANGE - -class RootModule(Submodule): - """A (virtual) Root of all submodules in the given repository. It can be used - to more easily traverse all submodules of the master repository""" - - __slots__ = tuple() - - k_root_name = '__ROOT__' - - def __init__(self, repo): - # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) - super(RootModule, self).__init__( - repo, - binsha = self.NULL_BIN_SHA, - mode = self.k_default_mode, - path = '', - name = self.k_root_name, - parent_commit = repo.head.commit, - url = '', - branch_path = git.Head.to_full_path(self.k_head_default) - ) - - - def _clear_cache(self): - """May not do anything""" - pass - - #{ Interface - - def update(self, previous_commit=None, recursive=True, force_remove=False, init=True, - to_latest_revision=False, progress=None, dry_run=False): - """Update the submodules of this repository to the current HEAD commit. - This method behaves smartly by determining changes of the path of a submodules - repository, next to changes to the to-be-checked-out commit or the branch to be - checked out. This works if the submodules ID does not change. - Additionally it will detect addition and removal of submodules, which will be handled - gracefully. - - :param previous_commit: If set to a commit'ish, the commit we should use - as the previous commit the HEAD pointed to before it was set to the commit it points to now. - If None, it defaults to HEAD@{1} otherwise - :param recursive: if True, the children of submodules will be updated as well - using the same technique - :param force_remove: If submodules have been deleted, they will be forcibly removed. - Otherwise the update may fail if a submodule's repository cannot be deleted as - changes have been made to it (see Submodule.update() for more information) - :param init: If we encounter a new module which would need to be initialized, then do it. - :param to_latest_revision: If True, instead of checking out the revision pointed to - by this submodule's sha, the checked out tracking branch will be merged with the - newest remote branch fetched from the repository's origin - :param progress: RootUpdateProgress instance or None if no progress should be sent - :param dry_run: if True, operations will not actually be performed. Progress messages - will change accordingly to indicate the WOULD DO state of the operation.""" - if self.repo.bare: - raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") - # END handle bare - - if progress is None: - progress = RootUpdateProgress() - #END assure progress is set - - prefix = '' - if dry_run: - prefix = 'DRY-RUN: ' - - repo = self.repo - - # SETUP BASE COMMIT - ################### - cur_commit = repo.head.commit - if previous_commit is None: - try: - previous_commit = repo.commit(repo.head.log_entry(-1).oldhexsha) - if previous_commit.binsha == previous_commit.NULL_BIN_SHA: - raise IndexError - #END handle initial commit - except IndexError: - # in new repositories, there is no previous commit - previous_commit = cur_commit - #END exception handling - else: - previous_commit = repo.commit(previous_commit) # obtain commit object - # END handle previous commit - - - psms = self.list_items(repo, parent_commit=previous_commit) - sms = self.list_items(self.module()) - spsms = set(psms) - ssms = set(sms) - - # HANDLE REMOVALS - ################### - rrsm = (spsms - ssms) - len_rrsm = len(rrsm) - for i, rsm in enumerate(rrsm): - op = REMOVE - if i == 0: - op |= BEGIN - #END handle begin - - # fake it into thinking its at the current commit to allow deletion - # of previous module. Trigger the cache to be updated before that - progress.update(op, i, len_rrsm, prefix+"Removing submodule %r at %s" % (rsm.name, rsm.abspath)) - rsm._parent_commit = repo.head.commit - if not dry_run: - rsm.remove(configuration=False, module=True, force=force_remove) - #END handle dry-run - - if i == len_rrsm-1: - op |= END - #END handle end - progress.update(op, i, len_rrsm, prefix+"Done removing submodule %r" % rsm.name) - # END for each removed submodule - - # HANDLE PATH RENAMES - ##################### - # url changes + branch changes - csms = (spsms & ssms) - len_csms = len(csms) - for i, csm in enumerate(csms): - psm = psms[csm.name] - sm = sms[csm.name] - - #PATH CHANGES - ############## - if sm.path != psm.path and psm.module_exists(): - progress.update(BEGIN|PATHCHANGE, i, len_csms, prefix+"Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath)) - # move the module to the new path - if not dry_run: - psm.move(sm.path, module=True, configuration=False) - #END handle dry_run - progress.update(END|PATHCHANGE, i, len_csms, prefix+"Done moving repository of submodule %r" % sm.name) - # END handle path changes - - if sm.module_exists(): - # HANDLE URL CHANGE - ################### - if sm.url != psm.url: - # Add the new remote, remove the old one - # This way, if the url just changes, the commits will not - # have to be re-retrieved - nn = '__new_origin__' - smm = sm.module() - rmts = smm.remotes - - # don't do anything if we already have the url we search in place - if len([r for r in rmts if r.url == sm.url]) == 0: - progress.update(BEGIN|URLCHANGE, i, len_csms, prefix+"Changing url of submodule %r from %s to %s" % (sm.name, psm.url, sm.url)) - - if not dry_run: - assert nn not in [r.name for r in rmts] - smr = smm.create_remote(nn, sm.url) - smr.fetch(progress=progress) - - # If we have a tracking branch, it should be available - # in the new remote as well. - if len([r for r in smr.refs if r.remote_head == sm.branch_name]) == 0: - raise ValueError("Submodule branch named %r was not available in new submodule remote at %r" % (sm.branch_name, sm.url)) - # END head is not detached - - # now delete the changed one - rmt_for_deletion = None - for remote in rmts: - if remote.url == psm.url: - rmt_for_deletion = remote - break - # END if urls match - # END for each remote - - # if we didn't find a matching remote, but have exactly one, - # we can safely use this one - if rmt_for_deletion is None: - if len(rmts) == 1: - rmt_for_deletion = rmts[0] - else: - # if we have not found any remote with the original url - # we may not have a name. This is a special case, - # and its okay to fail here - # Alternatively we could just generate a unique name and leave all - # existing ones in place - raise InvalidGitRepositoryError("Couldn't find original remote-repo at url %r" % psm.url) - #END handle one single remote - # END handle check we found a remote - - orig_name = rmt_for_deletion.name - smm.delete_remote(rmt_for_deletion) - # NOTE: Currently we leave tags from the deleted remotes - # as well as separate tracking branches in the possibly totally - # changed repository ( someone could have changed the url to - # another project ). At some point, one might want to clean - # it up, but the danger is high to remove stuff the user - # has added explicitly - - # rename the new remote back to what it was - smr.rename(orig_name) - - # early on, we verified that the our current tracking branch - # exists in the remote. Now we have to assure that the - # sha we point to is still contained in the new remote - # tracking branch. - smsha = sm.binsha - found = False - rref = smr.refs[self.branch_name] - for c in rref.commit.traverse(): - if c.binsha == smsha: - found = True - break - # END traverse all commits in search for sha - # END for each commit - - if not found: - # adjust our internal binsha to use the one of the remote - # this way, it will be checked out in the next step - # This will change the submodule relative to us, so - # the user will be able to commit the change easily - print >> sys.stderr, "WARNING: Current sha %s was not contained in the tracking branch at the new remote, setting it the the remote's tracking branch" % sm.hexsha - sm.binsha = rref.commit.binsha - #END reset binsha - - #NOTE: All checkout is performed by the base implementation of update - #END handle dry_run - progress.update(END|URLCHANGE, i, len_csms, prefix+"Done adjusting url of submodule %r" % (sm.name)) - # END skip remote handling if new url already exists in module - # END handle url - - # HANDLE PATH CHANGES - ##################### - if sm.branch_path != psm.branch_path: - # finally, create a new tracking branch which tracks the - # new remote branch - progress.update(BEGIN|BRANCHCHANGE, i, len_csms, prefix+"Changing branch of submodule %r from %s to %s" % (sm.name, psm.branch_path, sm.branch_path)) - if not dry_run: - smm = sm.module() - smmr = smm.remotes - try: - tbr = git.Head.create(smm, sm.branch_name, logmsg='branch: Created from HEAD') - except OSError: - # ... or reuse the existing one - tbr = git.Head(smm, sm.branch_path) - #END assure tracking branch exists - - tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) - # figure out whether the previous tracking branch contains - # new commits compared to the other one, if not we can - # delete it. - try: - tbr = find_first_remote_branch(smmr, psm.branch_name) - if len(smm.git.cherry(tbr, psm.branch)) == 0: - psm.branch.delete(smm, psm.branch) - #END delete original tracking branch if there are no changes - except InvalidGitRepositoryError: - # ignore it if the previous branch couldn't be found in the - # current remotes, this just means we can't handle it - pass - # END exception handling - - #NOTE: All checkout is done in the base implementation of update - #END handle dry_run - - progress.update(END|BRANCHCHANGE, i, len_csms, prefix+"Done changing branch of submodule %r" % sm.name) - #END handle branch - #END handle - # END for each common submodule - - # FINALLY UPDATE ALL ACTUAL SUBMODULES - ###################################### - for sm in sms: - # update the submodule using the default method - sm.update(recursive=False, init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) - - # update recursively depth first - question is which inconsitent - # state will be better in case it fails somewhere. Defective branch - # or defective depth. The RootSubmodule type will never process itself, - # which was done in the previous expression - if recursive: - # the module would exist by now if we are not in dry_run mode - if sm.module_exists(): - type(self)(sm.module()).update( recursive=True, force_remove=force_remove, - init=init, to_latest_revision=to_latest_revision, - progress=progress, dry_run=dry_run) - #END handle dry_run - #END handle recursive - # END for each submodule to update - - def module(self): - """:return: the actual repository containing the submodules""" - return self.repo - #} END interface -#} END classes diff --git a/objects/submodule/util.py b/objects/submodule/util.py deleted file mode 100644 index 9b32807a..00000000 --- a/objects/submodule/util.py +++ /dev/null @@ -1,101 +0,0 @@ -import git -from git.exc import InvalidGitRepositoryError -from git.config import GitConfigParser -from StringIO import StringIO -import weakref - -__all__ = ( 'sm_section', 'sm_name', 'mkhead', 'unbare_repo', 'find_first_remote_branch', - 'SubmoduleConfigParser') - -#{ Utilities - -def sm_section(name): - """:return: section title used in .gitmodules configuration file""" - return 'submodule "%s"' % name - -def sm_name(section): - """:return: name of the submodule as parsed from the section name""" - section = section.strip() - return section[11:-1] - -def mkhead(repo, path): - """:return: New branch/head instance""" - return git.Head(repo, git.Head.to_full_path(path)) - -def unbare_repo(func): - """Methods with this decorator raise InvalidGitRepositoryError if they - encounter a bare repository""" - def wrapper(self, *args, **kwargs): - if self.repo.bare: - raise InvalidGitRepositoryError("Method '%s' cannot operate on bare repositories" % func.__name__) - #END bare method - return func(self, *args, **kwargs) - # END wrapper - wrapper.__name__ = func.__name__ - return wrapper - -def find_first_remote_branch(remotes, branch_name): - """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" - for remote in remotes: - try: - return remote.refs[branch_name] - except IndexError: - continue - # END exception handling - #END for remote - raise InvalidGitRepositoryError("Didn't find remote branch %r in any of the given remotes", branch_name) - -#} END utilities - - -#{ Classes - -class SubmoduleConfigParser(GitConfigParser): - """ - Catches calls to _write, and updates the .gitmodules blob in the index - with the new data, if we have written into a stream. Otherwise it will - add the local file to the index to make it correspond with the working tree. - Additionally, the cache must be cleared - - Please note that no mutating method will work in bare mode - """ - - def __init__(self, *args, **kwargs): - self._smref = None - self._index = None - self._auto_write = True - super(SubmoduleConfigParser, self).__init__(*args, **kwargs) - - #{ Interface - def set_submodule(self, submodule): - """Set this instance's submodule. It must be called before - the first write operation begins""" - self._smref = weakref.ref(submodule) - - def flush_to_index(self): - """Flush changes in our configuration file to the index""" - assert self._smref is not None - # should always have a file here - assert not isinstance(self._file_or_files, StringIO) - - sm = self._smref() - if sm is not None: - index = self._index - if index is None: - index = sm.repo.index - # END handle index - index.add([sm.k_modules_file], write=self._auto_write) - sm._clear_cache() - # END handle weakref - - #} END interface - - #{ Overridden Methods - def write(self): - rval = super(SubmoduleConfigParser, self).write() - self.flush_to_index() - return rval - # END overridden methods - - -#} END classes diff --git a/objects/tag.py b/objects/tag.py deleted file mode 100644 index c7d02abe..00000000 --- a/objects/tag.py +++ /dev/null @@ -1,76 +0,0 @@ -# objects.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -""" Module containing all object based types. """ -import base -from gitdb.util import hex_to_bin -from util import ( - get_object_type_by_name, - parse_actor_and_date - ) - -__all__ = ("TagObject", ) - -class TagObject(base.Object): - """Non-Lightweight tag carrying additional information about an object we are pointing to.""" - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" ) - - def __init__(self, repo, binsha, object=None, tag=None, - tagger=None, tagged_date=None, tagger_tz_offset=None, message=None): - """Initialize a tag object with additional data - - :param repo: repository this object is located in - :param binsha: 20 byte SHA1 - :param object: Object instance of object we are pointing to - :param tag: name of this tag - :param tagger: Actor identifying the tagger - :param tagged_date: int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the - authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha ) - if object is not None: - self.object = object - if tag is not None: - self.tag = tag - if tagger is not None: - self.tagger = tagger - if tagged_date is not None: - self.tagged_date = tagged_date - if tagger_tz_offset is not None: - self.tagger_tz_offset = tagger_tz_offset - if message is not None: - self.message = message - - def _set_cache_(self, attr): - """Cache all our attributes at once""" - if attr in TagObject.__slots__: - ostream = self.repo.odb.stream(self.binsha) - lines = ostream.read().splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = get_object_type_by_name(type_name)(self.repo, hex_to_bin(hexsha)) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in case there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - - diff --git a/objects/tree.py b/objects/tree.py deleted file mode 100644 index 67431686..00000000 --- a/objects/tree.py +++ /dev/null @@ -1,280 +0,0 @@ -# tree.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -import util -from base import IndexObject -from git.util import join_path -from blob import Blob -from submodule.base import Submodule -import git.diff as diff - -from fun import ( - tree_entries_from_data, - tree_to_stream - ) - -from gitdb.util import ( - to_bin_sha, - ) - -__all__ = ("TreeModifier", "Tree") - -class TreeModifier(object): - """A utility class providing methods to alter the underlying cache in a list-like fashion. - - Once all adjustments are complete, the _cache, which really is a refernce to - the cache of a tree, will be sorted. Assuring it will be in a serializable state""" - __slots__ = '_cache' - - def __init__(self, cache): - self._cache = cache - - def _index_by_name(self, name): - """:return: index of an item with name, or -1 if not found""" - for i, t in enumerate(self._cache): - if t[2] == name: - return i - # END found item - # END for each item in cache - return -1 - - #{ Interface - def set_done(self): - """Call this method once you are done modifying the tree information. - It may be called several times, but be aware that each call will cause - a sort operation - :return self:""" - self._cache.sort(key=lambda t: t[2]) # sort by name - return self - #} END interface - - #{ Mutators - def add(self, sha, mode, name, force=False): - """Add the given item to the tree. If an item with the given name already - exists, nothing will be done, but a ValueError will be raised if the - sha and mode of the existing item do not match the one you add, unless - force is True - - :param sha: The 20 or 40 byte sha of the item to add - :param mode: int representing the stat compatible mode of the item - :param force: If True, an item with your name and information will overwrite - any existing item with the same name, no matter which information it has - :return: self""" - if '/' in name: - raise ValueError("Name must not contain '/' characters") - if (mode >> 12) not in Tree._map_id_to_type: - raise ValueError("Invalid object type according to mode %o" % mode) - - sha = to_bin_sha(sha) - index = self._index_by_name(name) - item = (sha, mode, name) - if index == -1: - self._cache.append(item) - else: - if force: - self._cache[index] = item - else: - ex_item = self._cache[index] - if ex_item[0] != sha or ex_item[1] != mode: - raise ValueError("Item %r existed with different properties" % name) - # END handle mismatch - # END handle force - # END handle name exists - return self - - def add_unchecked(self, binsha, mode, name): - """Add the given item to the tree, its correctness is assumed, which - puts the caller into responsibility to assure the input is correct. - For more information on the parameters, see ``add`` - :param binsha: 20 byte binary sha""" - self._cache.append((binsha, mode, name)) - - def __delitem__(self, name): - """Deletes an item with the given name if it exists""" - index = self._index_by_name(name) - if index > -1: - del(self._cache[index]) - - #} END mutators - - -class Tree(IndexObject, diff.Diffable, util.Traversable, util.Serializable): - """Tree objects represent an ordered list of Blobs and other Trees. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - """ - - type = "tree" - __slots__ = "_cache" - - # actual integer ids for comparison - commit_id = 016 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - _map_id_to_type = { - commit_id : Submodule, - blob_id : Blob, - symlink_id : Blob - # tree id added once Tree is defined - } - - - def __init__(self, repo, binsha, mode=tree_id<<12, path=None): - super(Tree, self).__init__(repo, binsha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return tuple(index_object._iter_convert_to_object(index_object._cache)) - return tuple() - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - ostream = self.repo.odb.stream(self.binsha) - self._cache = tree_entries_from_data(ostream.read()) - else: - super(Tree, self)._set_cache_(attr) - # END handle attribute - - def _iter_convert_to_object(self, iterable): - """Iterable yields tuples of (binsha, mode, name), which will be converted - to the respective object representation""" - for binsha, mode, name in iterable: - path = join_path(self.path, name) - try: - yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path) - except KeyError: - raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path)) - # END for each item - - def __div__(self, file): - """Find the named object in this tree's contents - :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - - :raise KeyError: if given file or tree does not exist in tree""" - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for info in self._cache: - if info[2] == file: # [2] == name - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - @property - def trees(self): - """:return: list(Tree, ...) list of trees directly below this tree""" - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """:return: list(Blob, ...) list of blobs directly below this tree""" - return [ i for i in self if i.type == "blob" ] - - @property - def cache(self): - """ - :return: An object allowing to modify the internal cache. This can be used - to change the tree's contents. When done, make sure you call ``set_done`` - on the tree modifier, or serialization behaviour will be incorrect. - See the ``TreeModifier`` for more information on how to alter the cache""" - return TreeModifier(self._cache) - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see util.Traversable.traverse - Trees are set to visit_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self, i, j): - return list(self._iter_convert_to_object(self._cache[i:j])) - - def __iter__(self): - return self._iter_convert_to_object(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self, item): - if isinstance(item, int): - info = self._cache[item] - return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2])) - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self, item): - if isinstance(item, IndexObject): - for info in self._cache: - if item.binsha == info[0]: - return True - # END compare sha - # END for each entry - # END handle item is index object - # compatability - - # treat item as repo-relative path - path = self.path - for info in self._cache: - if item == join_path(path, info[2]): - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._iter_convert_to_object(self._cache)) - - def _serialize(self, stream): - """Serialize this tree into the stream. Please note that we will assume - our tree data to be in a sorted state. If this is not the case, serialization - will not generate a correct tree representation as these are assumed to be sorted - by algorithms""" - tree_to_stream(self._cache, stream.write) - return self - - def _deserialize(self, stream): - self._cache = tree_entries_from_data(stream.read()) - return self - - -# END tree - -# finalize map definition -Tree._map_id_to_type[Tree.tree_id] = Tree diff --git a/objects/util.py b/objects/util.py deleted file mode 100644 index 4c9323b8..00000000 --- a/objects/util.py +++ /dev/null @@ -1,315 +0,0 @@ -# util.py -# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors -# -# This module is part of GitPython and is released under -# the BSD License: http://www.opensource.org/licenses/bsd-license.php -"""Module for general utility functions""" -from git.util import ( - IterableList, - Actor - ) - -import re -from collections import deque as Deque - -from string import digits -import time -import os - -__all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', - 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', - 'verify_utctz', 'Actor') - -#{ Functions - -def mode_str_to_int(modestr): - """ - :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used - :return: - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example.""" - mode = 0 - for iteration, char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - -def get_object_type_by_name(object_type_name): - """ - :return: type suitable to handle the given object type name. - Use the type to create new instances. - - :param object_type_name: Member of TYPES - - :raise ValueError: In case object_type_name is unknown""" - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - -def utctz_to_altz(utctz): - """we convert utctz to the timezone in seconds, it is the format time.altzone - returns. Git stores it as UTC timezone which has the opposite sign as well, - which explains the -1 * ( that was made explicit here ) - :param utctz: git utc timezone string, i.e. +0200""" - return -1 * int(float(utctz)/100*3600) - -def altz_to_utctz_str(altz): - """As above, but inverses the operation, returning a string that can be used - in commit objects""" - utci = -1 * int((altz / 3600)*100) - utcs = str(abs(utci)) - utcs = "0"*(4-len(utcs)) + utcs - prefix = (utci < 0 and '-') or '+' - return prefix + utcs - - -def verify_utctz(offset): - """:raise ValueError: if offset is incorrect - :return: offset""" - fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) - if len(offset) != 5: - raise fmt_exc - if offset[0] not in "+-": - raise fmt_exc - if offset[1] not in digits or \ - offset[2] not in digits or \ - offset[3] not in digits or \ - offset[4] not in digits: - raise fmt_exc - # END for each char - return offset - -def parse_date(string_date): - """ - Parse the given date as one of the following - - * Git internal format: timestamp offset - * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. - * ISO 8601 2005-04-07T22:13:13 - The T can be a space as well - - :return: Tuple(int(timestamp), int(offset)), both in seconds since epoch - :raise ValueError: If the format could not be understood - :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY""" - # git time - try: - if string_date.count(' ') == 1 and string_date.rfind(':') == -1: - timestamp, offset = string_date.split() - timestamp = int(timestamp) - return timestamp, utctz_to_altz(verify_utctz(offset)) - else: - offset = "+0000" # local time by default - if string_date[-5] in '-+': - offset = verify_utctz(string_date[-5:]) - string_date = string_date[:-6] # skip space as well - # END split timezone info - - # now figure out the date and time portion - split time - date_formats = list() - splitter = -1 - if ',' in string_date: - date_formats.append("%a, %d %b %Y") - splitter = string_date.rfind(' ') - else: - # iso plus additional - date_formats.append("%Y-%m-%d") - date_formats.append("%Y.%m.%d") - date_formats.append("%m/%d/%Y") - date_formats.append("%d.%m.%Y") - - splitter = string_date.rfind('T') - if splitter == -1: - splitter = string_date.rfind(' ') - # END handle 'T' and ' ' - # END handle rfc or iso - - assert splitter > -1 - - # split date and time - time_part = string_date[splitter+1:] # skip space - date_part = string_date[:splitter] - - # parse time - tstruct = time.strptime(time_part, "%H:%M:%S") - - for fmt in date_formats: - try: - dtstruct = time.strptime(date_part, fmt) - fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, - tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec, - dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst)) - return int(time.mktime(fstruct)), utctz_to_altz(offset) - except ValueError: - continue - # END exception handling - # END for each fmt - - # still here ? fail - raise ValueError("no format matched") - # END handle format - except Exception: - raise ValueError("Unsupported date format: %s" % string_date) - # END handle exceptions - - -# precompiled regex -_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$') - -def parse_actor_and_date(line): - """Parse out the actor (author or committer) info from a line like:: - - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" - m = _re_actor_epoch.search(line) - actor, epoch, offset = m.groups() - return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset)) - - -#} END functions - - -#{ Classes - -class ProcessStreamAdapter(object): - """Class wireing all calls to the contained Process instance. - - Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope.""" - __slots__ = ("_proc", "_stream") - def __init__(self, process, stream_name): - self._proc = process - self._stream = getattr(process, stream_name) - - def __getattr__(self, attr): - return getattr(self._stream, attr) - - -class Traversable(object): - """Simple interface to perforam depth-first or breadth-first traversals - into one direction. - Subclasses only need to implement one function. - Instances of the Subclass must be hashable""" - __slots__ = tuple() - - @classmethod - def _get_intermediate_items(cls, item): - """ - Returns: - List of items connected to the given item. - Must be implemented in subclass - """ - raise NotImplementedError("To be implemented in subclass") - - def list_traverse(self, *args, **kwargs): - """ - :return: IterableList with the results of the traversal as produced by - traverse()""" - out = IterableList(self._id_attribute_) - out.extend(self.traverse(*args, **kwargs)) - return out - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = True, ignore_self=1, as_edge = False ): - """:return: iterator yieling of items found when traversing self - - :param predicate: f(i,d) returns False if item i at depth d should not be included in the result - - :param prune: - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. - - :param depth: - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predessessors/successors - - :param branch_first: - if True, items will be returned branch first, otherwise depth first - - :param visit_once: - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. - - :param ignore_self: - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None - - :param as_edge: - if True, return a pair of items, first being the source, second the - destinatination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - visited = set() - stack = Deque() - stack.append( ( 0 ,self, None ) ) # self is always depth level 0 - - def addToStack( stack, item, branch_first, depth ): - lst = self._get_intermediate_items( item ) - if not lst: - return - if branch_first: - stack.extendleft( ( depth , i, item ) for i in lst ) - else: - reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) - stack.extend( reviter ) - # END addToStack local method - - while stack: - d, item, src = stack.pop() # depth of item, item, item_source - - if visit_once and item in visited: - continue - - if visit_once: - visited.add(item) - - rval = ( as_edge and (src, item) ) or item - if prune( rval, d ): - continue - - skipStartItem = ignore_self and ( item is self ) - if not skipStartItem and predicate( rval, d ): - yield rval - - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: - continue - - addToStack( stack, item, branch_first, nd ) - # END for each item on work stack - - -class Serializable(object): - """Defines methods to serialize and deserialize objects from and into a data stream""" - __slots__ = tuple() - - def _serialize(self, stream): - """Serialize the data of this object into the given data stream - :note: a serialized object would ``_deserialize`` into the same objet - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") - - def _deserialize(self, stream): - """Deserialize all information regarding this object from the stream - :param stream: a file-like object - :return: self""" - raise NotImplementedError("To be implemented in subclass") |