diff options
Diffstat (limited to 'lib/git/objects')
-rw-r--r-- | lib/git/objects/__init__.py | 2 | ||||
-rw-r--r-- | lib/git/objects/base.py | 416 | ||||
-rw-r--r-- | lib/git/objects/blob.py | 42 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 698 | ||||
-rw-r--r-- | lib/git/objects/tag.py | 124 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 454 | ||||
-rw-r--r-- | lib/git/objects/utils.py | 296 |
7 files changed, 1016 insertions, 1016 deletions
diff --git a/lib/git/objects/__init__.py b/lib/git/objects/__init__.py index 192750e3..717fa808 100644 --- a/lib/git/objects/__init__.py +++ b/lib/git/objects/__init__.py @@ -9,4 +9,4 @@ from tree import * from commit import * __all__ = [ name for name, obj in locals().items() - if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file + if not (name.startswith('_') or inspect.ismodule(obj)) ]
\ No newline at end of file diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 8d6860de..6a51eed3 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,222 +6,222 @@ import os from git.utils import LazyMixin, join_path_native import utils - + _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - - This Object also serves as a constructor for instances of the correct type:: - - inst = Object.new(repo,id) - inst.sha # objects sha in hex - inst.size # objects uncompressed data size - inst.data # byte string containing the whole data of the object - """ - NULL_HEX_SHA = '0'*40 - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "sha", "size", "data" ) - type = None # to be set by subclass - - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - """ - super(Object,self).__init__() - self.repo = repo - self.sha = id + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + + This Object also serves as a constructor for instances of the correct type:: + + inst = Object.new(repo,id) + inst.sha # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object + """ + NULL_HEX_SHA = '0'*40 + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "sha", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.sha = id - @classmethod - def new(cls, repo, id): - """ - Return - New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a hexsha even though - the input id may have been a Reference or Rev-Spec - - Note - This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a hexsha. - """ - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = obj_type(repo, hexsha) - inst.size = size - return inst - - def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None - """ - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - - def _set_cache_(self, attr): - """ - Retrieve object information - """ - if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.sha == other.sha - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.sha != other.sha - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.sha) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.sha - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '<git.%s "%s">' % (self.__class__.__name__, self.sha) + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + elif attr == "data": + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.sha == other.sha + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.sha != other.sha + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.sha) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.sha + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '<git.%s "%s">' % (self.__class__.__name__, self.sha) - @property - def data_stream(self): - """ - Returns - File Object compatible stream to the uncompressed raw data of the object - """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") - def stream_data(self, ostream): - """ - Writes our data directly to the given output stream - - ``ostream`` - File object compatible stream object. - - Returns - self - """ - self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) - return self + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + return self class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ - __slots__ = ("path", "mode") - - def __init__(self, repo, sha, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, sha, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in - ``sha`` : string - is the git object id as hex sha + ``sha`` : string + is the git object id as hex sha - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration - ``path`` : str - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - - NOTE - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, sha) - self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) - - def __hash__(self): - """ - Returns - Hash of our path as index items are uniquely identifyable by path, not - by their data ! - """ - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - - @classmethod - def _mode_str_to_int(cls, modestr): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 6 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example. - """ - mode = 0 - for iteration,char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - - @property - def name(self): - """ - Returns - Name portion of the path, effectively being the basename - """ - return os.path.basename(self.path) - - @property - def abspath(self): - """ - Returns - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. - """ - return join_path_native(self.repo.working_tree_dir, self.path) - + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, sha) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + def __hash__(self): + """ + Returns + Hash of our path as index items are uniquely identifyable by path, not + by their data ! + """ + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + + @classmethod + def _mode_str_to_int(cls, modestr): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 6 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example. + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. + """ + return join_path_native(self.repo.working_tree_dir, self.path) + diff --git a/lib/git/objects/blob.py b/lib/git/objects/blob.py index 11dee323..3f91d078 100644 --- a/lib/git/objects/blob.py +++ b/lib/git/objects/blob.py @@ -8,29 +8,29 @@ import mimetypes import base class Blob(base.IndexObject): - """A Blob encapsulates a git blob object""" - DEFAULT_MIME_TYPE = "text/plain" - type = "blob" + """A Blob encapsulates a git blob object""" + DEFAULT_MIME_TYPE = "text/plain" + type = "blob" - __slots__ = tuple() + __slots__ = tuple() - - @property - def mime_type(self): - """ - The mime type of this file (based on the filename) + + @property + def mime_type(self): + """ + The mime type of this file (based on the filename) - Returns - str - - NOTE - Defaults to 'text/plain' in case the actual file type is unknown. - """ - guesses = None - if self.path: - guesses = mimetypes.guess_type(self.path) - return guesses and guesses[0] or self.DEFAULT_MIME_TYPE + Returns + str + + NOTE + Defaults to 'text/plain' in case the actual file type is unknown. + """ + guesses = None + if self.path: + guesses = mimetypes.guess_type(self.path) + return guesses and guesses[0] or self.DEFAULT_MIME_TYPE - def __repr__(self): - return '<git.Blob "%s">' % self.sha + def __repr__(self): + return '<git.Blob "%s">' % self.sha diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 4d0f808d..d1bbb889 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -14,353 +14,353 @@ import tempfile import os class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): - """ - Wraps a git Commit object. - - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary. - """ - - # object configuration - type = "commit" - __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", - "message", "parents") - _id_attribute_ = "sha" - - def __init__(self, repo, sha, tree=None, author=None, authored_date=None, - committer=None, committed_date=None, message=None, parents=None): - """ - Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set if id names a valid sha. - - The parameter documentation indicates the type of the argument after a colon ':'. - - ``sha`` - is the sha id of the commit or a ref - - ``parents`` : tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - - ``tree`` : Tree - is the corresponding tree id or an actual Tree - - ``author`` : Actor - is the author string ( will be implicitly converted into an Actor object ) - - ``authored_date`` : int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format - - ``committer`` : Actor - is the committer string - - ``committed_date`` : int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format - - ``message`` : string - is the commit message - - Returns - git.Commit - """ - super(Commit,self).__init__(repo, sha) - self._set_self_from_args_(locals()) - - if parents is not None: - self.parents = tuple( self.__class__(repo, p) for p in parents ) - # END for each parent to convert - - if self.sha and tree is not None: - self.tree = Tree(repo, tree, path='') - # END id to tree conversion - - @classmethod - def _get_intermediate_items(cls, commit): - return commit.parents - - def _set_cache_(self, attr): - """ - Called by LazyMixin superclass when the given uninitialized member needs - to be set. - We set all values at once. - """ - if attr in Commit.__slots__: - # prepare our data lines to match rev-list - data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.sha) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.committer = temp.committer - self.committed_date = temp.committed_date - self.message = temp.message - else: - super(Commit, self)._set_cache_(attr) - - @property - def summary(self): - """ - Returns - First line of the commit message. - """ - return self.message.split('\n', 1)[0] - - def count(self, paths='', **kwargs): - """ - Count the number of commits reachable from this commit - - ``paths`` - is an optinal path or a list of paths restricting the return value - to commits actually containing the paths - - ``kwargs`` - Additional options to be passed to git-rev-list. They must not alter - the ouput style of the command, or parsing will yield incorrect results - Returns - int - """ - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. - if paths: - return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) - else: - return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) - - - @property - def name_rev(self): - """ - Returns - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes - """ - return self.repo.git.name_rev(self) - - @classmethod - def iter_items(cls, repo, rev, paths='', **kwargs): - """ - Find all commits matching the given criteria. - - ``repo`` - is the Repo - - ``rev`` - revision specifier, see git-rev-parse for viable options - - ``paths`` - is an optinal path or list of paths, if set only Commits that include the path - or paths will be considered - - ``kwargs`` - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - - Returns - iterator yielding Commit items - """ - options = {'pretty': 'raw', 'as_process' : True } - options.update(kwargs) - - args = list() - if paths: - args.extend(('--', paths)) - # END if paths - - proc = repo.git.rev_list(rev, args, **options) - return cls._iter_from_process_or_stream(repo, proc, True) - - def iter_parents(self, paths='', **kwargs): - """ - Iterate _all_ parents of this commit. - - ``paths`` - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - - ``kwargs`` - All arguments allowed by git-rev-list - - Return: - Iterator yielding Commit objects which are parents of self - """ - # skip ourselves - skip = kwargs.get("skip", 1) - if skip == 0: # skip ourselves - skip = 1 - kwargs['skip'] = skip - - return self.iter_items( self.repo, self, paths, **kwargs ) - - @property - def stats(self): - """ - Create a git stat from changes between this commit and its first parent - or from all changes done if this is the very first commit. - - Return - git.Stats - """ - if not self.parents: - text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: - (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 - else: - text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) - return stats.Stats._list_from_string(self.repo, text) - - @classmethod - def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): - """ - Parse out commit information into a list of Commit objects - - ``repo`` - is the Repo - - ``proc`` - git-rev-list process instance (raw format) - - ``from_rev_list`` - If True, the stream was created by rev-list in which case we parse - the message differently - Returns - iterator returning Commit objects - """ - stream = proc_or_stream - if not hasattr(stream,'next'): - stream = proc_or_stream.stdout - - for line in stream: - commit_tokens = line.split() - id = commit_tokens[1] - assert commit_tokens[0] == "commit" - tree = stream.next().split()[1] - - parents = [] - next_line = None - for parent_line in stream: - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - parents.append(parent_line.split()[-1]) - # END for each parent line - - author, authored_date = utils.parse_actor_and_date(next_line) - committer, committed_date = utils.parse_actor_and_date(stream.next()) - - # empty line - stream.next() - - message_lines = [] - if from_rev_list: - for msg_line in stream: - if not msg_line.startswith(' '): - # and forget about this empty marker - break - # END abort message reading - # strip leading 4 spaces - message_lines.append(msg_line[4:]) - # END while there are message lines - else: - # a stream from our data simply gives us the plain message - for msg_line in stream: - message_lines.append(msg_line) - # END message parsing - message = '\n'.join(message_lines) - - yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message) - # END for each line in stream - - - @classmethod - def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): - """ - Commit the given tree, creating a commit object. - - ``repo`` - is the Repo - - ``tree`` - Sha of a tree or a tree object to become the tree of the new commit - - ``message`` - Commit message. It may be an empty string if no message is provided. - It will be converted to a string in any case. - - ``parent_commits`` - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object - - ``head`` - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could - lead to undesired results when diffing files. - - Returns - Commit object representing the new commit - - Note: - Additional information about hte committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information - """ - parents = parent_commits - if parent_commits is None: - try: - parent_commits = [ repo.head.commit ] - except ValueError: - # empty repositories have no head commit - parent_commits = list() - # END handle parent commits - # END if parent commits are unset - - parent_args = [ ("-p", str(commit)) for commit in parent_commits ] - - # create message stream - tmp_file_path = tempfile.mktemp() - fp = open(tmp_file_path,"wb") - fp.write(str(message)) - fp.close() - fp = open(tmp_file_path,"rb") - fp.seek(0) - - try: - # write the current index as tree - commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp) - new_commit = cls(repo, commit_sha) - - if head: - try: - repo.head.commit = new_commit - except ValueError: - # head is not yet set to master - create it and set it - import git.refs - master = git.refs.Head.create(repo, 'master', commit=new_commit) - repo.head.reference = master - # END handle empty repositories - # END advance head handling - - return new_commit - finally: - fp.close() - os.remove(tmp_file_path) - - def __str__(self): - """ Convert commit to string which is SHA1 """ - return self.sha - - def __repr__(self): - return '<git.Commit "%s">' % self.sha + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + + # object configuration + type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") + _id_attribute_ = "sha" + + def __init__(self, repo, sha, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``sha`` + is the sha id of the commit or a ref + + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + + ``tree`` : Tree + is the corresponding tree id or an actual Tree + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : int_seconds_since_epoch + is the authored DateTime - use time.gmtime() to convert it into a + different format + + ``committer`` : Actor + is the committer string + + ``committed_date`` : int_seconds_since_epoch + is the committed DateTime - use time.gmtime() to convert it into a + different format + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, sha) + self._set_self_from_args_(locals()) + + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.sha and tree is not None: + self.tree = Tree(repo, tree, path='') + # END id to tree conversion + + @classmethod + def _get_intermediate_items(cls, commit): + return commit.parents + + def _set_cache_(self, attr): + """ + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. + """ + if attr in Commit.__slots__: + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.sha) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + def count(self, paths='', **kwargs): + """ + Count the number of commits reachable from this commit + + ``paths`` + is an optinal path or a list of paths restricting the return value + to commits actually containing the paths + + ``kwargs`` + Additional options to be passed to git-rev-list. They must not alter + the ouput style of the command, or parsing will yield incorrect results + Returns + int + """ + # yes, it makes a difference whether empty paths are given or not in our case + # as the empty paths version will ignore merge commits for some reason. + if paths: + return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines()) + else: + return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines()) + + + @property + def name_rev(self): + """ + Returns + String describing the commits hex sha based on the closest Reference. + Mostly useful for UI purposes + """ + return self.repo.git.name_rev(self) + + @classmethod + def iter_items(cls, repo, rev, paths='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``rev`` + revision specifier, see git-rev-parse for viable options + + ``paths`` + is an optinal path or list of paths, if set only Commits that include the path + or paths will be considered + + ``kwargs`` + optional keyword arguments to git rev-list where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + ``since`` all commits since i.e. '1970-01-01' + + Returns + iterator yielding Commit items + """ + options = {'pretty': 'raw', 'as_process' : True } + options.update(kwargs) + + args = list() + if paths: + args.extend(('--', paths)) + # END if paths + + proc = repo.git.rev_list(rev, args, **options) + return cls._iter_from_process_or_stream(repo, proc, True) + + def iter_parents(self, paths='', **kwargs): + """ + Iterate _all_ parents of this commit. + + ``paths`` + Optional path or list of paths limiting the Commits to those that + contain at least one of the paths + + ``kwargs`` + All arguments allowed by git-rev-list + + Return: + Iterator yielding Commit objects which are parents of self + """ + # skip ourselves + skip = kwargs.get("skip", 1) + if skip == 0: # skip ourselves + skip = 1 + kwargs['skip'] = skip + + return self.iter_items( self.repo, self, paths, **kwargs ) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True) + return stats.Stats._list_from_string(self.repo, text) + + @classmethod + def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``proc`` + git-rev-list process instance (raw format) + + ``from_rev_list`` + If True, the stream was created by rev-list in which case we parse + the message differently + Returns + iterator returning Commit objects + """ + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + + for line in stream: + commit_tokens = line.split() + id = commit_tokens[1] + assert commit_tokens[0] == "commit" + tree = stream.next().split()[1] + + parents = [] + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = utils.parse_actor_and_date(next_line) + committer, committed_date = utils.parse_actor_and_date(stream.next()) + + # empty line + stream.next() + + message_lines = [] + if from_rev_list: + for msg_line in stream: + if not msg_line.startswith(' '): + # and forget about this empty marker + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + else: + # a stream from our data simply gives us the plain message + for msg_line in stream: + message_lines.append(msg_line) + # END message parsing + message = '\n'.join(message_lines) + + yield Commit(repo, id, parents=tuple(parents), tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream + + + @classmethod + def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): + """ + Commit the given tree, creating a commit object. + + ``repo`` + is the Repo + + ``tree`` + Sha of a tree or a tree object to become the tree of the new commit + + ``message`` + Commit message. It may be an empty string if no message is provided. + It will be converted to a string in any case. + + ``parent_commits`` + Optional Commit objects to use as parents for the new commit. + If empty list, the commit will have no parents at all and become + a root commit. + If None , the current head commit will be the parent of the + new commit object + + ``head`` + If True, the HEAD will be advanced to the new commit automatically. + Else the HEAD will remain pointing on the previous commit. This could + lead to undesired results when diffing files. + + Returns + Commit object representing the new commit + + Note: + Additional information about hte committer and Author are taken from the + environment or from the git configuration, see git-commit-tree for + more information + """ + parents = parent_commits + if parent_commits is None: + try: + parent_commits = [ repo.head.commit ] + except ValueError: + # empty repositories have no head commit + parent_commits = list() + # END handle parent commits + # END if parent commits are unset + + parent_args = [ ("-p", str(commit)) for commit in parent_commits ] + + # create message stream + tmp_file_path = tempfile.mktemp() + fp = open(tmp_file_path,"wb") + fp.write(str(message)) + fp.close() + fp = open(tmp_file_path,"rb") + fp.seek(0) + + try: + # write the current index as tree + commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp) + new_commit = cls(repo, commit_sha) + + if head: + try: + repo.head.commit = new_commit + except ValueError: + # head is not yet set to master - create it and set it + import git.refs + master = git.refs.Head.create(repo, 'master', commit=new_commit) + repo.head.reference = master + # END handle empty repositories + # END advance head handling + + return new_commit + finally: + fp.close() + os.remove(tmp_file_path) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.sha + + def __repr__(self): + return '<git.Commit "%s">' % self.sha diff --git a/lib/git/objects/tag.py b/lib/git/objects/tag.py index c329edf7..e880bbe5 100644 --- a/lib/git/objects/tag.py +++ b/lib/git/objects/tag.py @@ -10,66 +10,66 @@ import base import utils class TagObject(base.Object): - """ - Non-Lightweight tag carrying additional information about an object we are pointing - to. - """ - type = "tag" - __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) - - def __init__(self, repo, sha, object=None, tag=None, - tagger=None, tagged_date=None, message=None): - """ - Initialize a tag object with additional data - - ``repo`` - repository this object is located in - - ``sha`` - SHA1 or ref suitable for git-rev-parse - - ``object`` - Object instance of object we are pointing to - - ``tag`` - name of this tag - - ``tagger`` - Actor identifying the tagger - - ``tagged_date`` : int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - """ - super(TagObject, self).__init__(repo, sha ) - self._set_self_from_args_(locals()) - - def _set_cache_(self, attr): - """ - Cache all our attributes at once - """ - if attr in TagObject.__slots__: - lines = self.data.splitlines() - - obj, hexsha = lines[0].split(" ") # object <hexsha> - type_token, type_name = lines[1].split(" ") # type <type_name> - self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) - - self.tag = lines[2][4:] # tag <tag name> - - tagger_info = lines[3][7:]# tagger <actor> <date> - self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) - - # line 4 empty - it could mark the beginning of the next header - # in csse there really is no message, it would not exist. Otherwise - # a newline separates header from message - if len(lines) > 5: - self.message = "\n".join(lines[5:]) - else: - self.message = '' - # END check our attributes - else: - super(TagObject, self)._set_cache_(attr) - - + """ + Non-Lightweight tag carrying additional information about an object we are pointing + to. + """ + type = "tag" + __slots__ = ( "object", "tag", "tagger", "tagged_date", "message" ) + + def __init__(self, repo, sha, object=None, tag=None, + tagger=None, tagged_date=None, message=None): + """ + Initialize a tag object with additional data + + ``repo`` + repository this object is located in + + ``sha`` + SHA1 or ref suitable for git-rev-parse + + ``object`` + Object instance of object we are pointing to + + ``tag`` + name of this tag + + ``tagger`` + Actor identifying the tagger + + ``tagged_date`` : int_seconds_since_epoch + is the DateTime of the tag creation - use time.gmtime to convert + it into a different format + """ + super(TagObject, self).__init__(repo, sha ) + self._set_self_from_args_(locals()) + + def _set_cache_(self, attr): + """ + Cache all our attributes at once + """ + if attr in TagObject.__slots__: + lines = self.data.splitlines() + + obj, hexsha = lines[0].split(" ") # object <hexsha> + type_token, type_name = lines[1].split(" ") # type <type_name> + self.object = utils.get_object_type_by_name(type_name)(self.repo, hexsha) + + self.tag = lines[2][4:] # tag <tag name> + + tagger_info = lines[3][7:]# tagger <actor> <date> + self.tagger, self.tagged_date = utils.parse_actor_and_date(tagger_info) + + # line 4 empty - it could mark the beginning of the next header + # in csse there really is no message, it would not exist. Otherwise + # a newline separates header from message + if len(lines) > 5: + self.message = "\n".join(lines[5:]) + else: + self.message = '' + # END check our attributes + else: + super(TagObject, self)._set_cache_(attr) + + diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index a8ad0ced..5bd29a2a 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -20,230 +20,230 @@ def sha_to_hex(sha): class Tree(base.IndexObject, diff.Diffable, utils.Traversable): - """ - Tress represent a ordered list of Blobs and other Trees. Hence it can be - accessed like a list. - - Tree's will cache their contents after first retrieval to improve efficiency. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - - - """ - - type = "tree" - __slots__ = "_cache" - - # using ascii codes for comparison - commit_id = 016 - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - - def __init__(self, repo, sha, mode=0, path=None): - super(Tree, self).__init__(repo, sha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return index_object._cache - return tuple() - - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - self._cache = self._get_tree_cache() - else: - super(Tree, self)._set_cache_(attr) - - def _get_tree_cache(self): - """ - Return - list(object_instance, ...) - - ``treeish`` - sha or ref identifying a tree - """ - out = list() - for obj in self._iter_from_data(): - if obj is not None: - out.append(obj) - # END if object was handled - # END for each line from ls-tree - return out - - - def _iter_from_data(self): - """ - Reads the binary non-pretty printed representation of a tree and converts - it into Blob, Tree or Commit objects. - - Note: This method was inspired by the parse_tree method in dulwich. - - Returns - list(IndexObject, ...) - """ - ord_zero = ord('0') - data = self.data - len_data = len(data) - i = 0 - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - type_id = mode >> 12 - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - name = data[ns:i] - path = join_path(self.path, name) - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - - hexsha = sha_to_hex(sha) - if type_id == self.blob_id or type_id == self.symlink_id: - yield blob.Blob(self.repo, hexsha, mode, path) - elif type_id == self.tree_id: - yield Tree(self.repo, hexsha, mode, path) - elif type_id == self.commit_id: - # todo - yield None - else: - raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) - # END for each byte in data stream - - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> - >>> Repo('/path/to/python-git').tree/'README.txt' - <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> - - Returns - ``git.Blob`` or ``git.Tree`` - - Raise - KeyError if given file or tree does not exist in tree - """ - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for obj in self._cache: - if obj.name == file: - return obj - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - def __repr__(self): - return '<git.Tree "%s">' % self.sha - - @property - def trees(self): - """ - Returns - list(Tree, ...) list of trees directly below this tree - """ - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """ - Returns - list(Blob, ...) list of blobs directly below this tree - """ - return [ i for i in self if i.type == "blob" ] - - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see utils.Traversable.traverse - - Trees are set to visist_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self,i,j): - return self._cache[i:j] - - def __iter__(self): - return iter(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self,item): - if isinstance(item, int): - return self._cache[item] - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self,item): - if isinstance(item, base.IndexObject): - return item in self._cache - - # compatability - for obj in self._cache: - if item == obj.path: - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._cache) + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ + + type = "tree" + __slots__ = "_cache" + + # using ascii codes for comparison + commit_id = 016 + blob_id = 010 + symlink_id = 012 + tree_id = 004 + + + def __init__(self, repo, sha, mode=0, path=None): + super(Tree, self).__init__(repo, sha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return index_object._cache + return tuple() + + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() + else: + super(Tree, self)._set_cache_(attr) + + def _get_tree_cache(self): + """ + Return + list(object_instance, ...) + + ``treeish`` + sha or ref identifying a tree + """ + out = list() + for obj in self._iter_from_data(): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + + Returns + list(IndexObject, ...) + """ + ord_zero = ord('0') + data = self.data + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + type_id = mode >> 12 + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + path = join_path(self.path, name) + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.blob_id or type_id == self.symlink_id: + yield blob.Blob(self.repo, hexsha, mode, path) + elif type_id == self.tree_id: + yield Tree(self.repo, hexsha, mode, path) + elif type_id == self.commit_id: + # todo + yield None + else: + raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) + # END for each byte in data stream + + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> + >>> Repo('/path/to/python-git').tree/'README.txt' + <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> + + Returns + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree + """ + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for obj in self._cache: + if obj.name == file: + return obj + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + def __repr__(self): + return '<git.Tree "%s">' % self.sha + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] + + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see utils.Traversable.traverse + + Trees are set to visist_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + + def __iter__(self): + return iter(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py index ada34cc0..ec5453f1 100644 --- a/lib/git/objects/utils.py +++ b/lib/git/objects/utils.py @@ -11,157 +11,157 @@ from collections import deque as Deque from git.actor import Actor def get_object_type_by_name(object_type_name): - """ - Returns - type suitable to handle the given object type name. - Use the type to create new instances. - - ``object_type_name`` - Member of TYPES - - Raises - ValueError: In case object_type_name is unknown - """ - if object_type_name == "commit": - import commit - return commit.Commit - elif object_type_name == "tag": - import tag - return tag.TagObject - elif object_type_name == "blob": - import blob - return blob.Blob - elif object_type_name == "tree": - import tree - return tree.Tree - else: - raise ValueError("Cannot handle unknown object type: %s" % object_type_name) - - + """ + Returns + type suitable to handle the given object type name. + Use the type to create new instances. + + ``object_type_name`` + Member of TYPES + + Raises + ValueError: In case object_type_name is unknown + """ + if object_type_name == "commit": + import commit + return commit.Commit + elif object_type_name == "tag": + import tag + return tag.TagObject + elif object_type_name == "blob": + import blob + return blob.Blob + elif object_type_name == "tree": + import tree + return tree.Tree + else: + raise ValueError("Cannot handle unknown object type: %s" % object_type_name) + + # precompiled regex _re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') def parse_actor_and_date(line): - """ - Parse out the actor (author or committer) info from a line like:: - - author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - - Returns - [Actor, int_seconds_since_epoch] - """ - m = _re_actor_epoch.search(line) - actor, epoch = m.groups() - return (Actor._from_string(actor), int(epoch)) - - - + """ + Parse out the actor (author or committer) info from a line like:: + + author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 + + Returns + [Actor, int_seconds_since_epoch] + """ + m = _re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor._from_string(actor), int(epoch)) + + + class ProcessStreamAdapter(object): - """ - Class wireing all calls to the contained Process instance. - - Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope. - """ - __slots__ = ("_proc", "_stream") - def __init__(self, process, stream_name): - self._proc = process - self._stream = getattr(process, stream_name) - - def __getattr__(self, attr): - return getattr(self._stream, attr) - - + """ + Class wireing all calls to the contained Process instance. + + Use this type to hide the underlying process to provide access only to a specified + stream. The process is usually wrapped into an AutoInterrupt class to kill + it if the instance goes out of scope. + """ + __slots__ = ("_proc", "_stream") + def __init__(self, process, stream_name): + self._proc = process + self._stream = getattr(process, stream_name) + + def __getattr__(self, attr): + return getattr(self._stream, attr) + + class Traversable(object): - """Simple interface to perforam depth-first or breadth-first traversals - into one direction. - Subclasses only need to implement one function. - Instances of the Subclass must be hashable""" - __slots__ = tuple() - - @classmethod - def _get_intermediate_items(cls, item): - """ - Returns: - List of items connected to the given item. - Must be implemented in subclass - """ - raise NotImplementedError("To be implemented in subclass") - - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = True, ignore_self=1, as_edge = False ): - """ - ``Returns`` - iterator yieling of items found when traversing self - - ``predicate`` - f(i,d) returns False if item i at depth d should not be included in the result - - ``prune`` - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. - - ``depth`` - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predessessors/successors - - ``branch_first`` - if True, items will be returned branch first, otherwise depth first - - ``visit_once`` - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. - - ``ignore_self`` - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None - - ``as_edge`` - if True, return a pair of items, first being the source, second the - destinatination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - visited = set() - stack = Deque() - stack.append( ( 0 ,self, None ) ) # self is always depth level 0 - - def addToStack( stack, item, branch_first, depth ): - lst = self._get_intermediate_items( item ) - if not lst: - return - if branch_first: - stack.extendleft( ( depth , i, item ) for i in lst ) - else: - reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) - stack.extend( reviter ) - # END addToStack local method - - while stack: - d, item, src = stack.pop() # depth of item, item, item_source - - if visit_once and item in visited: - continue - - if visit_once: - visited.add(item) - - rval = ( as_edge and (src, item) ) or item - if prune( rval, d ): - continue - - skipStartItem = ignore_self and ( item == self ) - if not skipStartItem and predicate( rval, d ): - yield rval - - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: - continue - - addToStack( stack, item, branch_first, nd ) - # END for each item on work stack + """Simple interface to perforam depth-first or breadth-first traversals + into one direction. + Subclasses only need to implement one function. + Instances of the Subclass must be hashable""" + __slots__ = tuple() + + @classmethod + def _get_intermediate_items(cls, item): + """ + Returns: + List of items connected to the given item. + Must be implemented in subclass + """ + raise NotImplementedError("To be implemented in subclass") + + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = True, ignore_self=1, as_edge = False ): + """ + ``Returns`` + iterator yieling of items found when traversing self + + ``predicate`` + f(i,d) returns False if item i at depth d should not be included in the result + + ``prune`` + f(i,d) return True if the search should stop at item i at depth d. + Item i will not be returned. + + ``depth`` + define at which level the iteration should not go deeper + if -1, there is no limit + if 0, you would effectively only get self, the root of the iteration + i.e. if 1, you would only get the first level of predessessors/successors + + ``branch_first`` + if True, items will be returned branch first, otherwise depth first + + ``visit_once`` + if True, items will only be returned once, although they might be encountered + several times. Loops are prevented that way. + + ``ignore_self`` + if True, self will be ignored and automatically pruned from + the result. Otherwise it will be the first item to be returned. + If as_edge is True, the source of the first edge is None + + ``as_edge`` + if True, return a pair of items, first being the source, second the + destinatination, i.e. tuple(src, dest) with the edge spanning from + source to destination""" + visited = set() + stack = Deque() + stack.append( ( 0 ,self, None ) ) # self is always depth level 0 + + def addToStack( stack, item, branch_first, depth ): + lst = self._get_intermediate_items( item ) + if not lst: + return + if branch_first: + stack.extendleft( ( depth , i, item ) for i in lst ) + else: + reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) ) + stack.extend( reviter ) + # END addToStack local method + + while stack: + d, item, src = stack.pop() # depth of item, item, item_source + + if visit_once and item in visited: + continue + + if visit_once: + visited.add(item) + + rval = ( as_edge and (src, item) ) or item + if prune( rval, d ): + continue + + skipStartItem = ignore_self and ( item == self ) + if not skipStartItem and predicate( rval, d ): + yield rval + + # only continue to next level if this is appropriate ! + nd = d + 1 + if depth > -1 and nd > depth: + continue + + addToStack( stack, item, branch_first, nd ) + # END for each item on work stack |