summaryrefslogtreecommitdiff
path: root/lib/git/objects
diff options
context:
space:
mode:
Diffstat (limited to 'lib/git/objects')
-rw-r--r--lib/git/objects/base.py414
-rw-r--r--lib/git/objects/commit.py788
-rw-r--r--lib/git/objects/tree.py2
-rw-r--r--lib/git/objects/utils.py439
4 files changed, 922 insertions, 721 deletions
diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py
index 6a51eed3..5a3a15a7 100644
--- a/lib/git/objects/base.py
+++ b/lib/git/objects/base.py
@@ -4,224 +4,220 @@
# This module is part of GitPython and is released under
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
import os
-from git.utils import LazyMixin, join_path_native
+from git.utils import LazyMixin, join_path_native, stream_copy
import utils
-
+
_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
class Object(LazyMixin):
- """
- Implements an Object which may be Blobs, Trees, Commits and Tags
-
- This Object also serves as a constructor for instances of the correct type::
-
- inst = Object.new(repo,id)
- inst.sha # objects sha in hex
- inst.size # objects uncompressed data size
- inst.data # byte string containing the whole data of the object
- """
- NULL_HEX_SHA = '0'*40
- TYPES = ("blob", "tree", "commit", "tag")
- __slots__ = ("repo", "sha", "size", "data" )
- type = None # to be set by subclass
-
- def __init__(self, repo, id):
- """
- Initialize an object by identifying it by its id. All keyword arguments
- will be set on demand if None.
-
- ``repo``
- repository this object is located in
-
- ``id``
- SHA1 or ref suitable for git-rev-parse
- """
- super(Object,self).__init__()
- self.repo = repo
- self.sha = id
+ """
+ Implements an Object which may be Blobs, Trees, Commits and Tags
+
+ This Object also serves as a constructor for instances of the correct type::
+
+ inst = Object.new(repo,id)
+ inst.sha # objects sha in hex
+ inst.size # objects uncompressed data size
+ inst.data # byte string containing the whole data of the object
+ """
+ NULL_HEX_SHA = '0'*40
+ TYPES = ("blob", "tree", "commit", "tag")
+ __slots__ = ("repo", "sha", "size", "data" )
+ type = None # to be set by subclass
+
+ def __init__(self, repo, id):
+ """
+ Initialize an object by identifying it by its id. All keyword arguments
+ will be set on demand if None.
+
+ ``repo``
+ repository this object is located in
+
+ ``id``
+ SHA1 or ref suitable for git-rev-parse
+ """
+ super(Object,self).__init__()
+ self.repo = repo
+ self.sha = id
- @classmethod
- def new(cls, repo, id):
- """
- Return
- New Object instance of a type appropriate to the object type behind
- id. The id of the newly created object will be a hexsha even though
- the input id may have been a Reference or Rev-Spec
-
- Note
- This cannot be a __new__ method as it would always call __init__
- with the input id which is not necessarily a hexsha.
- """
- hexsha, typename, size = repo.git.get_object_header(id)
- obj_type = utils.get_object_type_by_name(typename)
- inst = obj_type(repo, hexsha)
- inst.size = size
- return inst
-
- def _set_self_from_args_(self, args_dict):
- """
- Initialize attributes on self from the given dict that was retrieved
- from locals() in the calling method.
-
- Will only set an attribute on self if the corresponding value in args_dict
- is not None
- """
- for attr, val in args_dict.items():
- if attr != "self" and val is not None:
- setattr( self, attr, val )
- # END set all non-None attributes
-
- def _set_cache_(self, attr):
- """
- Retrieve object information
- """
- if attr == "size":
- hexsha, typename, self.size = self.repo.git.get_object_header(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- elif attr == "data":
- hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha)
- assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type)
- else:
- super(Object,self)._set_cache_(attr)
-
- def __eq__(self, other):
- """
- Returns
- True if the objects have the same SHA1
- """
- return self.sha == other.sha
-
- def __ne__(self, other):
- """
- Returns
- True if the objects do not have the same SHA1
- """
- return self.sha != other.sha
-
- def __hash__(self):
- """
- Returns
- Hash of our id allowing objects to be used in dicts and sets
- """
- return hash(self.sha)
-
- def __str__(self):
- """
- Returns
- string of our SHA1 as understood by all git commands
- """
- return self.sha
-
- def __repr__(self):
- """
- Returns
- string with pythonic representation of our object
- """
- return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
+ @classmethod
+ def new(cls, repo, id):
+ """
+ Return
+ New Object instance of a type appropriate to the object type behind
+ id. The id of the newly created object will be a hexsha even though
+ the input id may have been a Reference or Rev-Spec
+
+ Note
+ This cannot be a __new__ method as it would always call __init__
+ with the input id which is not necessarily a hexsha.
+ """
+ hexsha, typename, size = repo.git.get_object_header(id)
+ obj_type = utils.get_object_type_by_name(typename)
+ inst = obj_type(repo, hexsha)
+ inst.size = size
+ return inst
+
+ def _set_self_from_args_(self, args_dict):
+ """
+ Initialize attributes on self from the given dict that was retrieved
+ from locals() in the calling method.
+
+ Will only set an attribute on self if the corresponding value in args_dict
+ is not None
+ """
+ for attr, val in args_dict.items():
+ if attr != "self" and val is not None:
+ setattr( self, attr, val )
+ # END set all non-None attributes
+
+ def _set_cache_(self, attr):
+ """
+ Retrieve object information
+ """
+ if attr == "size":
+ oinfo = self.repo.odb.info(self.sha)
+ self.size = oinfo.size
+ assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type)
+ elif attr == "data":
+ ostream = self.repo.odb.stream(self.sha)
+ self.size = ostream.size
+ self.data = ostream.read()
+ assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type)
+ else:
+ super(Object,self)._set_cache_(attr)
+
+ def __eq__(self, other):
+ """
+ Returns
+ True if the objects have the same SHA1
+ """
+ return self.sha == other.sha
+
+ def __ne__(self, other):
+ """
+ Returns
+ True if the objects do not have the same SHA1
+ """
+ return self.sha != other.sha
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our id allowing objects to be used in dicts and sets
+ """
+ return hash(self.sha)
+
+ def __str__(self):
+ """
+ Returns
+ string of our SHA1 as understood by all git commands
+ """
+ return self.sha
+
+ def __repr__(self):
+ """
+ Returns
+ string with pythonic representation of our object
+ """
+ return '<git.%s "%s">' % (self.__class__.__name__, self.sha)
- @property
- def data_stream(self):
- """
- Returns
- File Object compatible stream to the uncompressed raw data of the object
- """
- proc = self.repo.git.cat_file(self.type, self.sha, as_process=True)
- return utils.ProcessStreamAdapter(proc, "stdout")
+ @property
+ def data_stream(self):
+ """ :return: File Object compatible stream to the uncompressed raw data of the object
+ :note: returned streams must be read in order"""
+ return self.repo.odb.stream(self.sha)
- def stream_data(self, ostream):
- """
- Writes our data directly to the given output stream
-
- ``ostream``
- File object compatible stream object.
-
- Returns
- self
- """
- self.repo.git.cat_file(self.type, self.sha, output_stream=ostream)
- return self
+ def stream_data(self, ostream):
+ """Writes our data directly to the given output stream
+ :param ostream: File object compatible stream object.
+ :return: self"""
+ istream = self.repo.odb.stream(self.sha)
+ stream_copy(istream, ostream)
+ return self
+
class IndexObject(Object):
- """
- Base for all objects that can be part of the index file , namely Tree, Blob and
- SubModule objects
- """
- __slots__ = ("path", "mode")
-
- def __init__(self, repo, sha, mode=None, path=None):
- """
- Initialize a newly instanced IndexObject
- ``repo``
- is the Repo we are located in
+ """
+ Base for all objects that can be part of the index file , namely Tree, Blob and
+ SubModule objects
+ """
+ __slots__ = ("path", "mode")
+
+ def __init__(self, repo, sha, mode=None, path=None):
+ """
+ Initialize a newly instanced IndexObject
+ ``repo``
+ is the Repo we are located in
- ``sha`` : string
- is the git object id as hex sha
+ ``sha`` : string
+ is the git object id as hex sha
- ``mode`` : int
- is the file mode as int, use the stat module to evaluate the infomration
+ ``mode`` : int
+ is the file mode as int, use the stat module to evaluate the infomration
- ``path`` : str
- is the path to the file in the file system, relative to the git repository root, i.e.
- file.ext or folder/other.ext
-
- NOTE
- Path may not be set of the index object has been created directly as it cannot
- be retrieved without knowing the parent tree.
- """
- super(IndexObject, self).__init__(repo, sha)
- self._set_self_from_args_(locals())
- if isinstance(mode, basestring):
- self.mode = self._mode_str_to_int(mode)
-
- def __hash__(self):
- """
- Returns
- Hash of our path as index items are uniquely identifyable by path, not
- by their data !
- """
- return hash(self.path)
-
- def _set_cache_(self, attr):
- if attr in IndexObject.__slots__:
- # they cannot be retrieved lateron ( not without searching for them )
- raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
- else:
- super(IndexObject, self)._set_cache_(attr)
-
- @classmethod
- def _mode_str_to_int(cls, modestr):
- """
- ``modestr``
- string like 755 or 644 or 100644 - only the last 6 chars will be used
-
- Returns
- String identifying a mode compatible to the mode methods ids of the
- stat module regarding the rwx permissions for user, group and other,
- special flags and file system flags, i.e. whether it is a symlink
- for example.
- """
- mode = 0
- for iteration,char in enumerate(reversed(modestr[-6:])):
- mode += int(char) << iteration*3
- # END for each char
- return mode
-
- @property
- def name(self):
- """
- Returns
- Name portion of the path, effectively being the basename
- """
- return os.path.basename(self.path)
-
- @property
- def abspath(self):
- """
- Returns
- Absolute path to this index object in the file system ( as opposed to the
- .path field which is a path relative to the git repository ).
-
- The returned path will be native to the system and contains '\' on windows.
- """
- return join_path_native(self.repo.working_tree_dir, self.path)
-
+ ``path`` : str
+ is the path to the file in the file system, relative to the git repository root, i.e.
+ file.ext or folder/other.ext
+
+ NOTE
+ Path may not be set of the index object has been created directly as it cannot
+ be retrieved without knowing the parent tree.
+ """
+ super(IndexObject, self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+ if isinstance(mode, basestring):
+ self.mode = self._mode_str_to_int(mode)
+
+ def __hash__(self):
+ """
+ Returns
+ Hash of our path as index items are uniquely identifyable by path, not
+ by their data !
+ """
+ return hash(self.path)
+
+ def _set_cache_(self, attr):
+ if attr in IndexObject.__slots__:
+ # they cannot be retrieved lateron ( not without searching for them )
+ raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
+ else:
+ super(IndexObject, self)._set_cache_(attr)
+
+ @classmethod
+ def _mode_str_to_int(cls, modestr):
+ """
+ ``modestr``
+ string like 755 or 644 or 100644 - only the last 6 chars will be used
+
+ Returns
+ String identifying a mode compatible to the mode methods ids of the
+ stat module regarding the rwx permissions for user, group and other,
+ special flags and file system flags, i.e. whether it is a symlink
+ for example.
+ """
+ mode = 0
+ for iteration,char in enumerate(reversed(modestr[-6:])):
+ mode += int(char) << iteration*3
+ # END for each char
+ return mode
+
+ @property
+ def name(self):
+ """
+ Returns
+ Name portion of the path, effectively being the basename
+ """
+ return os.path.basename(self.path)
+
+ @property
+ def abspath(self):
+ """
+ Returns
+ Absolute path to this index object in the file system ( as opposed to the
+ .path field which is a path relative to the git repository ).
+
+ The returned path will be native to the system and contains '\' on windows.
+ """
+ return join_path_native(self.repo.working_tree_dir, self.path)
+
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 826f684c..9a3c2c95 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -7,372 +7,434 @@
from git.utils import Iterable
import git.diff as diff
import git.stats as stats
+from git.actor import Actor
from tree import Tree
+from git.odb import IStream
+from cStringIO import StringIO
import base
import utils
-import tempfile
+import time
import os
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
- """
- Wraps a git Commit object.
-
- This class will act lazily on some of its attributes and will query the
- value on demand only if it involves calling the git binary.
- """
-
- # object configuration
- type = "commit"
- __slots__ = ("tree",
- "author", "authored_date", "author_tz_offset",
- "committer", "committed_date", "committer_tz_offset",
- "message", "parents")
- _id_attribute_ = "sha"
-
- def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
- committer=None, committed_date=None, committer_tz_offset=None, message=None, parents=None):
- """
- Instantiate a new Commit. All keyword arguments taking None as default will
- be implicitly set if id names a valid sha.
-
- The parameter documentation indicates the type of the argument after a colon ':'.
-
- ``sha``
- is the sha id of the commit or a ref
-
- ``parents`` : tuple( Commit, ... )
- is a tuple of commit ids or actual Commits
-
- ``tree`` : Tree
- is the corresponding tree id or an actual Tree
-
- ``author`` : Actor
- is the author string ( will be implicitly converted into an Actor object )
-
- ``authored_date`` : int_seconds_since_epoch
- is the authored DateTime - use time.gmtime() to convert it into a
- different format
-
- ``author_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``committer`` : Actor
- is the committer string
-
- ``committed_date`` : int_seconds_since_epoch
- is the committed DateTime - use time.gmtime() to convert it into a
- different format
-
- ``committer_tz_offset``: int_seconds_west_of_utc
- is the timezone that the authored_date is in
-
- ``message`` : string
- is the commit message
-
- Returns
- git.Commit
- """
- super(Commit,self).__init__(repo, sha)
- self._set_self_from_args_(locals())
-
- if parents is not None:
- self.parents = tuple( self.__class__(repo, p) for p in parents )
- # END for each parent to convert
-
- if self.sha and tree is not None:
- self.tree = Tree(repo, tree, path='')
- # END id to tree conversion
-
- @classmethod
- def _get_intermediate_items(cls, commit):
- return commit.parents
-
- def _set_cache_(self, attr):
- """
- Called by LazyMixin superclass when the given uninitialized member needs
- to be set.
- We set all values at once.
- """
- if attr in Commit.__slots__:
- # prepare our data lines to match rev-list
- data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.sha)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.author_tz_offset = temp.author_tz_offset
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.committer_tz_offset = temp.committer_tz_offset
- self.message = temp.message
- else:
- super(Commit, self)._set_cache_(attr)
-
- @property
- def summary(self):
- """
- Returns
- First line of the commit message.
- """
- return self.message.split('\n', 1)[0]
-
- def count(self, paths='', **kwargs):
- """
- Count the number of commits reachable from this commit
-
- ``paths``
- is an optinal path or a list of paths restricting the return value
- to commits actually containing the paths
-
- ``kwargs``
- Additional options to be passed to git-rev-list. They must not alter
- the ouput style of the command, or parsing will yield incorrect results
- Returns
- int
- """
- # yes, it makes a difference whether empty paths are given or not in our case
- # as the empty paths version will ignore merge commits for some reason.
- if paths:
- return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
- else:
- return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
-
-
- @property
- def name_rev(self):
- """
- Returns
- String describing the commits hex sha based on the closest Reference.
- Mostly useful for UI purposes
- """
- return self.repo.git.name_rev(self)
-
- @classmethod
- def iter_items(cls, repo, rev, paths='', **kwargs):
- """
- Find all commits matching the given criteria.
-
- ``repo``
- is the Repo
-
- ``rev``
- revision specifier, see git-rev-parse for viable options
-
- ``paths``
- is an optinal path or list of paths, if set only Commits that include the path
- or paths will be considered
-
- ``kwargs``
- optional keyword arguments to git rev-list where
- ``max_count`` is the maximum number of commits to fetch
- ``skip`` is the number of commits to skip
- ``since`` all commits since i.e. '1970-01-01'
-
- Returns
- iterator yielding Commit items
- """
- options = {'pretty': 'raw', 'as_process' : True }
- options.update(kwargs)
-
- args = list()
- if paths:
- args.extend(('--', paths))
- # END if paths
-
- proc = repo.git.rev_list(rev, args, **options)
- return cls._iter_from_process_or_stream(repo, proc, True)
-
- def iter_parents(self, paths='', **kwargs):
- """
- Iterate _all_ parents of this commit.
-
- ``paths``
- Optional path or list of paths limiting the Commits to those that
- contain at least one of the paths
-
- ``kwargs``
- All arguments allowed by git-rev-list
-
- Return:
- Iterator yielding Commit objects which are parents of self
- """
- # skip ourselves
- skip = kwargs.get("skip", 1)
- if skip == 0: # skip ourselves
- skip = 1
- kwargs['skip'] = skip
-
- return self.iter_items( self.repo, self, paths, **kwargs )
-
- @property
- def stats(self):
- """
- Create a git stat from changes between this commit and its first parent
- or from all changes done if this is the very first commit.
-
- Return
- git.Stats
- """
- if not self.parents:
- text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
- text2 = ""
- for line in text.splitlines()[1:]:
- (insertions, deletions, filename) = line.split("\t")
- text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
- text = text2
- else:
- text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
- return stats.Stats._list_from_string(self.repo, text)
-
- @classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``proc``
- git-rev-list process instance (raw format)
-
- ``from_rev_list``
- If True, the stream was created by rev-list in which case we parse
- the message differently
- Returns
- iterator returning Commit objects
- """
- stream = proc_or_stream
- if not hasattr(stream,'next'):
- stream = proc_or_stream.stdout
-
- for line in stream:
- commit_tokens = line.split()
- id = commit_tokens[1]
- assert commit_tokens[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
- committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-
- # empty line
- stream.next()
-
- message_lines = []
- if from_rev_list:
- for msg_line in stream:
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- else:
- # a stream from our data simply gives us the plain message
- for msg_line in stream:
- message_lines.append(msg_line)
- # END message parsing
- message = '\n'.join(message_lines)
-
- yield Commit(repo, id, parents=tuple(parents), tree=tree,
- author=author, authored_date=authored_date, author_tz_offset=author_tz_offset,
- committer=committer, committed_date=committed_date, committer_tz_offset=committer_tz_offset,
- message=message)
- # END for each line in stream
-
-
- @classmethod
- def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
- """
- Commit the given tree, creating a commit object.
-
- ``repo``
- is the Repo
-
- ``tree``
- Sha of a tree or a tree object to become the tree of the new commit
-
- ``message``
- Commit message. It may be an empty string if no message is provided.
- It will be converted to a string in any case.
-
- ``parent_commits``
- Optional Commit objects to use as parents for the new commit.
- If empty list, the commit will have no parents at all and become
- a root commit.
- If None , the current head commit will be the parent of the
- new commit object
-
- ``head``
- If True, the HEAD will be advanced to the new commit automatically.
- Else the HEAD will remain pointing on the previous commit. This could
- lead to undesired results when diffing files.
-
- Returns
- Commit object representing the new commit
-
- Note:
- Additional information about hte committer and Author are taken from the
- environment or from the git configuration, see git-commit-tree for
- more information
- """
- parents = parent_commits
- if parent_commits is None:
- try:
- parent_commits = [ repo.head.commit ]
- except ValueError:
- # empty repositories have no head commit
- parent_commits = list()
- # END handle parent commits
- # END if parent commits are unset
-
- parent_args = [ ("-p", str(commit)) for commit in parent_commits ]
-
- # create message stream
- tmp_file_path = tempfile.mktemp()
- fp = open(tmp_file_path,"wb")
- fp.write(str(message))
- fp.close()
- fp = open(tmp_file_path,"rb")
- fp.seek(0)
-
- try:
- # write the current index as tree
- commit_sha = repo.git.commit_tree(tree, parent_args, istream=fp)
- new_commit = cls(repo, commit_sha)
-
- if head:
- try:
- repo.head.commit = new_commit
- except ValueError:
- # head is not yet set to the ref our HEAD points to.
- import git.refs
- master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
- repo.head.reference = master
- # END handle empty repositories
- # END advance head handling
-
- return new_commit
- finally:
- fp.close()
- os.remove(tmp_file_path)
-
- def __str__(self):
- """ Convert commit to string which is SHA1 """
- return self.sha
-
- def __repr__(self):
- return '<git.Commit "%s">' % self.sha
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
+ """
+ Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary.
+ """
+
+ # ENVIRONMENT VARIABLES
+ # read when creating new commits
+ env_author_name = "GIT_AUTHOR_NAME"
+ env_author_email = "GIT_AUTHOR_EMAIL"
+ env_author_date = "GIT_AUTHOR_DATE"
+ env_committer_name = "GIT_COMMITTER_NAME"
+ env_committer_email = "GIT_COMMITTER_EMAIL"
+ env_committer_date = "GIT_COMMITTER_DATE"
+ env_email = "EMAIL"
+
+ # CONFIGURATION KEYS
+ conf_email = 'email'
+ conf_name = 'name'
+ conf_encoding = 'i18n.commitencoding'
+
+ # INVARIANTS
+ default_encoding = "UTF-8"
+
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree",
+ "author", "authored_date", "author_tz_offset",
+ "committer", "committed_date", "committer_tz_offset",
+ "message", "parents", "encoding")
+ _id_attribute_ = "sha"
+
+ def __init__(self, repo, sha, tree=None, author=None, authored_date=None, author_tz_offset=None,
+ committer=None, committed_date=None, committer_tz_offset=None,
+ message=None, parents=None, encoding=None):
+ """
+ Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set if id names a valid sha.
+
+ The parameter documentation indicates the type of the argument after a colon ':'.
+
+ :param sha: is the sha id of the commit or a ref
+ :param parents: tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+ :param tree: Tree
+ is the corresponding tree id or an actual Tree
+ :param author: Actor
+ is the author string ( will be implicitly converted into an Actor object )
+ :param authored_date: int_seconds_since_epoch
+ is the authored DateTime - use time.gmtime() to convert it into a
+ different format
+ :param author_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param committer: Actor
+ is the committer string
+ :param committed_date: int_seconds_since_epoch
+ is the committed DateTime - use time.gmtime() to convert it into a
+ different format
+ :param committer_tz_offset: int_seconds_west_of_utc
+ is the timezone that the authored_date is in
+ :param message: string
+ is the commit message
+ :param encoding: string
+ encoding of the message, defaults to UTF-8
+ :return: git.Commit
+
+ :note: Timezone information is in the same format and in the same sign
+ as what time.altzone returns. The sign is inverted compared to git's
+ UTC timezone.
+ """
+ super(Commit,self).__init__(repo, sha)
+ self._set_self_from_args_(locals())
+
+ @classmethod
+ def _get_intermediate_items(cls, commit):
+ return commit.parents
+
+ def _set_cache_(self, attr):
+ """ Called by LazyMixin superclass when the given uninitialized member needs
+ to be set.
+ We set all values at once. """
+ if attr in Commit.__slots__:
+ # read the data in a chunk, its faster - then provide a file wrapper
+ # Could use self.data, but lets try to get it with less calls
+ hexsha, typename, size, data = self.repo.git.get_object_data(self)
+ self._deserialize(StringIO(data))
+ else:
+ super(Commit, self)._set_cache_(attr)
+
+ @property
+ def summary(self):
+ """
+ Returns
+ First line of the commit message.
+ """
+ return self.message.split('\n', 1)[0]
+
+ def count(self, paths='', **kwargs):
+ """
+ Count the number of commits reachable from this commit
+
+ ``paths``
+ is an optinal path or a list of paths restricting the return value
+ to commits actually containing the paths
+
+ ``kwargs``
+ Additional options to be passed to git-rev-list. They must not alter
+ the ouput style of the command, or parsing will yield incorrect results
+ Returns
+ int
+ """
+ # yes, it makes a difference whether empty paths are given or not in our case
+ # as the empty paths version will ignore merge commits for some reason.
+ if paths:
+ return len(self.repo.git.rev_list(self.sha, '--', paths, **kwargs).splitlines())
+ else:
+ return len(self.repo.git.rev_list(self.sha, **kwargs).splitlines())
+
+
+ @property
+ def name_rev(self):
+ """
+ Returns
+ String describing the commits hex sha based on the closest Reference.
+ Mostly useful for UI purposes
+ """
+ return self.repo.git.name_rev(self)
+
+ @classmethod
+ def iter_items(cls, repo, rev, paths='', **kwargs):
+ """
+ Find all commits matching the given criteria.
+
+ ``repo``
+ is the Repo
+
+ ``rev``
+ revision specifier, see git-rev-parse for viable options
+
+ ``paths``
+ is an optinal path or list of paths, if set only Commits that include the path
+ or paths will be considered
+
+ ``kwargs``
+ optional keyword arguments to git rev-list where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+ ``since`` all commits since i.e. '1970-01-01'
+
+ Returns
+ iterator yielding Commit items
+ """
+ if 'pretty' in kwargs:
+ raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+ # END handle pretty
+ args = list()
+ if paths:
+ args.extend(('--', paths))
+ # END if paths
+
+ proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+ return cls._iter_from_process_or_stream(repo, proc)
+
+ def iter_parents(self, paths='', **kwargs):
+ """
+ Iterate _all_ parents of this commit.
+
+ ``paths``
+ Optional path or list of paths limiting the Commits to those that
+ contain at least one of the paths
+
+ ``kwargs``
+ All arguments allowed by git-rev-list
+
+ Return:
+ Iterator yielding Commit objects which are parents of self
+ """
+ # skip ourselves
+ skip = kwargs.get("skip", 1)
+ if skip == 0: # skip ourselves
+ skip = 1
+ kwargs['skip'] = skip
+
+ return self.iter_items( self.repo, self, paths, **kwargs )
+
+ @property
+ def stats(self):
+ """
+ Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ Return
+ git.Stats
+ """
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.sha, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].sha, self.sha, '--', numstat=True)
+ return stats.Stats._list_from_string(self.repo, text)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
+
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
+ stream = proc_or_stream
+ if not hasattr(stream,'readline'):
+ stream = proc_or_stream.stdout
+
+ readline = stream.readline
+ while True:
+ line = readline()
+ if not line:
+ break
+ sha = line.strip()
+ if len(sha) > 40:
+ # split additional information, as returned by bisect for instance
+ sha, rest = line.split(None, 1)
+ # END handle extra info
+
+ assert len(sha) == 40, "Invalid line: %s" % sha
+ yield Commit(repo, sha)
+ # END for each line in stream
+
+
+ @classmethod
+ def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False):
+ """Commit the given tree, creating a commit object.
+
+ :param repo: Repo object the commit should be part of
+ :param tree: Sha of a tree or a tree object to become the tree of the new commit
+ :param message: Commit message. It may be an empty string if no message is provided.
+ It will be converted to a string in any case.
+ :param parent_commits:
+ Optional Commit objects to use as parents for the new commit.
+ If empty list, the commit will have no parents at all and become
+ a root commit.
+ If None , the current head commit will be the parent of the
+ new commit object
+ :param head:
+ If True, the HEAD will be advanced to the new commit automatically.
+ Else the HEAD will remain pointing on the previous commit. This could
+ lead to undesired results when diffing files.
+
+ :return: Commit object representing the new commit
+
+ :note:
+ Additional information about the committer and Author are taken from the
+ environment or from the git configuration, see git-commit-tree for
+ more information
+ """
+ parents = parent_commits
+ if parent_commits is None:
+ try:
+ parent_commits = [ repo.head.commit ]
+ except ValueError:
+ # empty repositories have no head commit
+ parent_commits = list()
+ # END handle parent commits
+ # END if parent commits are unset
+
+ # retrieve all additional information, create a commit object, and
+ # serialize it
+ # Generally:
+ # * Environment variables override configuration values
+ # * Sensible defaults are set according to the git documentation
+
+ # COMMITER AND AUTHOR INFO
+ cr = repo.config_reader()
+ env = os.environ
+ default_email = utils.get_user_id()
+ default_name = default_email.split('@')[0]
+
+ conf_name = cr.get_value('user', cls.conf_name, default_name)
+ conf_email = cr.get_value('user', cls.conf_email, default_email)
+
+ author_name = env.get(cls.env_author_name, conf_name)
+ author_email = env.get(cls.env_author_email, default_email)
+
+ committer_name = env.get(cls.env_committer_name, conf_name)
+ committer_email = env.get(cls.env_committer_email, conf_email)
+
+ # PARSE THE DATES
+ unix_time = int(time.time())
+ offset = time.altzone
+
+ author_date_str = env.get(cls.env_author_date, '')
+ if author_date_str:
+ author_time, author_offset = utils.parse_date(author_date_str)
+ else:
+ author_time, author_offset = unix_time, offset
+ # END set author time
+
+ committer_date_str = env.get(cls.env_committer_date, '')
+ if committer_date_str:
+ committer_time, committer_offset = utils.parse_date(committer_date_str)
+ else:
+ committer_time, committer_offset = unix_time, offset
+ # END set committer time
+
+ # assume utf8 encoding
+ enc_section, enc_option = cls.conf_encoding.split('.')
+ conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
+
+ author = Actor(author_name, author_email)
+ committer = Actor(committer_name, committer_email)
+
+
+ # CREATE NEW COMMIT
+ new_commit = cls(repo, cls.NULL_HEX_SHA, tree,
+ author, author_time, author_offset,
+ committer, committer_time, committer_offset,
+ message, parent_commits, conf_encoding)
+
+ stream = StringIO()
+ new_commit._serialize(stream)
+ streamlen = stream.tell()
+ stream.seek(0)
+
+ istream = repo.odb.store(IStream(cls.type, streamlen, stream))
+ new_commit.sha = istream.sha
+
+ if head:
+ try:
+ repo.head.commit = new_commit
+ except ValueError:
+ # head is not yet set to the ref our HEAD points to
+ # Happens on first commit
+ import git.refs
+ master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit)
+ repo.head.reference = master
+ # END handle empty repositories
+ # END advance head handling
+
+ return new_commit
+
+
+ def __str__(self):
+ """ Convert commit to string which is SHA1 """
+ return self.sha
+
+ def __repr__(self):
+ return '<git.Commit "%s">' % self.sha
+
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ write = stream.write
+ write("tree %s\n" % self.tree)
+ for p in self.parents:
+ write("parent %s\n" % p)
+
+ a = self.author
+ c = self.committer
+ fmt = "%s %s <%s> %s %s\n"
+ write(fmt % ("author", a.name, a.email,
+ self.authored_date,
+ utils.altz_to_utctz_str(self.author_tz_offset)))
+
+ write(fmt % ("committer", c.name, c.email,
+ self.committed_date,
+ utils.altz_to_utctz_str(self.committer_tz_offset)))
+
+ if self.encoding != self.default_encoding:
+ write("encoding %s\n" % self.encoding)
+
+ write("\n")
+ write(self.message)
+ return self
+
+ def _deserialize(self, stream):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ readline = stream.readline
+ self.tree = Tree(self.repo, readline().split()[1], 0, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
+
+
+ # now we can have the encoding line, or an empty line followed by the optional
+ # message.
+ self.encoding = self.default_encoding
+ # read encoding or empty line to separate message
+ enc = readline()
+ enc = enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # now comes the message separator
+ readline()
+ # END handle encoding
+
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()
+ return self
+
+ #} END serializable implementation
diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py
index a9e60981..285d3b5b 100644
--- a/lib/git/objects/tree.py
+++ b/lib/git/objects/tree.py
@@ -209,7 +209,7 @@ class Tree(base.IndexObject, diff.Diffable, utils.Traversable):
visit_once = False, ignore_self=1 ):
"""For documentation, see utils.Traversable.traverse
- Trees are set to visist_once = False to gain more performance in the traversal"""
+ Trees are set to visit_once = False to gain more performance in the traversal"""
return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
# List protocol
diff --git a/lib/git/objects/utils.py b/lib/git/objects/utils.py
index 4f17b652..c93f2091 100644
--- a/lib/git/objects/utils.py
+++ b/lib/git/objects/utils.py
@@ -9,159 +9,302 @@ Module for general utility functions
import re
from collections import deque as Deque
from git.actor import Actor
+import platform
+
+from string import digits
+import time
+import os
+
+__all__ = ('get_object_type_by_name', 'get_user_id', 'parse_date', 'parse_actor_and_date',
+ 'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz',
+ 'verify_utctz')
def get_object_type_by_name(object_type_name):
- """
- Returns
- type suitable to handle the given object type name.
- Use the type to create new instances.
-
- ``object_type_name``
- Member of TYPES
-
- Raises
- ValueError: In case object_type_name is unknown
- """
- if object_type_name == "commit":
- import commit
- return commit.Commit
- elif object_type_name == "tag":
- import tag
- return tag.TagObject
- elif object_type_name == "blob":
- import blob
- return blob.Blob
- elif object_type_name == "tree":
- import tree
- return tree.Tree
- else:
- raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-
-
+ """
+ Returns
+ type suitable to handle the given object type name.
+ Use the type to create new instances.
+
+ ``object_type_name``
+ Member of TYPES
+
+ Raises
+ ValueError: In case object_type_name is unknown
+ """
+ if object_type_name == "commit":
+ import commit
+ return commit.Commit
+ elif object_type_name == "tag":
+ import tag
+ return tag.TagObject
+ elif object_type_name == "blob":
+ import blob
+ return blob.Blob
+ elif object_type_name == "tree":
+ import tree
+ return tree.Tree
+ else:
+ raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
+
+
+def get_user_id():
+ """:return: string identifying the currently active system user as name@node
+ :note: user can be set with the 'USER' environment variable, usually set on windows"""
+ ukn = 'UNKNOWN'
+ username = os.environ.get('USER', ukn)
+ if username == ukn and hasattr(os, 'getlogin'):
+ username = os.getlogin()
+ # END get username from login
+ return "%s@%s" % (username, platform.node())
+
+
+def utctz_to_altz(utctz):
+ """we convert utctz to the timezone in seconds, it is the format time.altzone
+ returns. Git stores it as UTC timezon which has the opposite sign as well,
+ which explains the -1 * ( that was made explicit here )
+ :param utctz: git utc timezone string, i.e. +0200"""
+ return -1 * int(float(utctz)/100*3600)
+
+def altz_to_utctz_str(altz):
+ """As above, but inverses the operation, returning a string that can be used
+ in commit objects"""
+ utci = -1 * int((altz / 3600)*100)
+ utcs = str(abs(utci))
+ utcs = "0"*(4-len(utcs)) + utcs
+ prefix = (utci < 0 and '-') or '+'
+ return prefix + utcs
+
+
+def verify_utctz(offset):
+ """:raise ValueError: if offset is incorrect
+ :return: offset"""
+ fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
+ if len(offset) != 5:
+ raise fmt_exc
+ if offset[0] not in "+-":
+ raise fmt_exc
+ if offset[1] not in digits or \
+ offset[2] not in digits or \
+ offset[3] not in digits or \
+ offset[4] not in digits:
+ raise fmt_exc
+ # END for each char
+ return offset
+
+def parse_date(string_date):
+ """
+ Parse the given date as one of the following
+ * Git internal format: timestamp offset
+ * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200.
+ * ISO 8601 2005-04-07T22:13:13
+ The T can be a space as well
+
+ :return: Tuple(int(timestamp), int(offset), both in seconds since epoch
+ :raise ValueError: If the format could not be understood
+ :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY
+ """
+ # git time
+ try:
+ if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
+ timestamp, offset = string_date.split()
+ timestamp = int(timestamp)
+ return timestamp, utctz_to_altz(verify_utctz(offset))
+ else:
+ offset = "+0000" # local time by default
+ if string_date[-5] in '-+':
+ offset = verify_utctz(string_date[-5:])
+ string_date = string_date[:-6] # skip space as well
+ # END split timezone info
+
+ # now figure out the date and time portion - split time
+ date_formats = list()
+ splitter = -1
+ if ',' in string_date:
+ date_formats.append("%a, %d %b %Y")
+ splitter = string_date.rfind(' ')
+ else:
+ # iso plus additional
+ date_formats.append("%Y-%m-%d")
+ date_formats.append("%Y.%m.%d")
+ date_formats.append("%m/%d/%Y")
+ date_formats.append("%d.%m.%Y")
+
+ splitter = string_date.rfind('T')
+ if splitter == -1:
+ splitter = string_date.rfind(' ')
+ # END handle 'T' and ' '
+ # END handle rfc or iso
+
+ assert splitter > -1
+
+ # split date and time
+ time_part = string_date[splitter+1:] # skip space
+ date_part = string_date[:splitter]
+
+ # parse time
+ tstruct = time.strptime(time_part, "%H:%M:%S")
+
+ for fmt in date_formats:
+ try:
+ dtstruct = time.strptime(date_part, fmt)
+ fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday,
+ tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
+ dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
+ return int(time.mktime(fstruct)), utctz_to_altz(offset)
+ except ValueError:
+ continue
+ # END exception handling
+ # END for each fmt
+
+ # still here ? fail
+ raise ValueError("no format matched")
+ # END handle format
+ except Exception:
+ raise ValueError("Unsupported date format: %s" % string_date)
+ # END handle exceptions
+
+
# precompiled regex
_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
def parse_actor_and_date(line):
- """
- Parse out the actor (author or committer) info from a line like::
-
- author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-
- Returns
- [Actor, int_seconds_since_epoch, int_timezone_offset]
- """
- m = _re_actor_epoch.search(line)
- actor, epoch, offset = m.groups()
- return (Actor._from_string(actor), int(epoch), -int(float(offset)/100*3600))
-
-
-
+ """
+ Parse out the actor (author or committer) info from a line like::
+
+ author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
+
+ Returns
+ [Actor, int_seconds_since_epoch, int_timezone_offset]
+ """
+ m = _re_actor_epoch.search(line)
+ actor, epoch, offset = m.groups()
+ return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
+
+
+
class ProcessStreamAdapter(object):
- """
- Class wireing all calls to the contained Process instance.
-
- Use this type to hide the underlying process to provide access only to a specified
- stream. The process is usually wrapped into an AutoInterrupt class to kill
- it if the instance goes out of scope.
- """
- __slots__ = ("_proc", "_stream")
- def __init__(self, process, stream_name):
- self._proc = process
- self._stream = getattr(process, stream_name)
-
- def __getattr__(self, attr):
- return getattr(self._stream, attr)
-
-
+ """
+ Class wireing all calls to the contained Process instance.
+
+ Use this type to hide the underlying process to provide access only to a specified
+ stream. The process is usually wrapped into an AutoInterrupt class to kill
+ it if the instance goes out of scope.
+ """
+ __slots__ = ("_proc", "_stream")
+ def __init__(self, process, stream_name):
+ self._proc = process
+ self._stream = getattr(process, stream_name)
+
+ def __getattr__(self, attr):
+ return getattr(self._stream, attr)
+
+
class Traversable(object):
- """Simple interface to perforam depth-first or breadth-first traversals
- into one direction.
- Subclasses only need to implement one function.
- Instances of the Subclass must be hashable"""
- __slots__ = tuple()
-
- @classmethod
- def _get_intermediate_items(cls, item):
- """
- Returns:
- List of items connected to the given item.
- Must be implemented in subclass
- """
- raise NotImplementedError("To be implemented in subclass")
-
-
- def traverse( self, predicate = lambda i,d: True,
- prune = lambda i,d: False, depth = -1, branch_first=True,
- visit_once = True, ignore_self=1, as_edge = False ):
- """
- ``Returns``
- iterator yieling of items found when traversing self
-
- ``predicate``
- f(i,d) returns False if item i at depth d should not be included in the result
-
- ``prune``
- f(i,d) return True if the search should stop at item i at depth d.
- Item i will not be returned.
-
- ``depth``
- define at which level the iteration should not go deeper
- if -1, there is no limit
- if 0, you would effectively only get self, the root of the iteration
- i.e. if 1, you would only get the first level of predessessors/successors
-
- ``branch_first``
- if True, items will be returned branch first, otherwise depth first
-
- ``visit_once``
- if True, items will only be returned once, although they might be encountered
- several times. Loops are prevented that way.
-
- ``ignore_self``
- if True, self will be ignored and automatically pruned from
- the result. Otherwise it will be the first item to be returned.
- If as_edge is True, the source of the first edge is None
-
- ``as_edge``
- if True, return a pair of items, first being the source, second the
- destinatination, i.e. tuple(src, dest) with the edge spanning from
- source to destination"""
- visited = set()
- stack = Deque()
- stack.append( ( 0 ,self, None ) ) # self is always depth level 0
-
- def addToStack( stack, item, branch_first, depth ):
- lst = self._get_intermediate_items( item )
- if not lst:
- return
- if branch_first:
- stack.extendleft( ( depth , i, item ) for i in lst )
- else:
- reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
- stack.extend( reviter )
- # END addToStack local method
-
- while stack:
- d, item, src = stack.pop() # depth of item, item, item_source
-
- if visit_once and item in visited:
- continue
-
- if visit_once:
- visited.add(item)
-
- rval = ( as_edge and (src, item) ) or item
- if prune( rval, d ):
- continue
-
- skipStartItem = ignore_self and ( item == self )
- if not skipStartItem and predicate( rval, d ):
- yield rval
-
- # only continue to next level if this is appropriate !
- nd = d + 1
- if depth > -1 and nd > depth:
- continue
-
- addToStack( stack, item, branch_first, nd )
- # END for each item on work stack
+ """Simple interface to perforam depth-first or breadth-first traversals
+ into one direction.
+ Subclasses only need to implement one function.
+ Instances of the Subclass must be hashable"""
+ __slots__ = tuple()
+
+ @classmethod
+ def _get_intermediate_items(cls, item):
+ """
+ Returns:
+ List of items connected to the given item.
+ Must be implemented in subclass
+ """
+ raise NotImplementedError("To be implemented in subclass")
+
+
+ def traverse( self, predicate = lambda i,d: True,
+ prune = lambda i,d: False, depth = -1, branch_first=True,
+ visit_once = True, ignore_self=1, as_edge = False ):
+ """
+ ``Returns``
+ iterator yieling of items found when traversing self
+
+ ``predicate``
+ f(i,d) returns False if item i at depth d should not be included in the result
+
+ ``prune``
+ f(i,d) return True if the search should stop at item i at depth d.
+ Item i will not be returned.
+
+ ``depth``
+ define at which level the iteration should not go deeper
+ if -1, there is no limit
+ if 0, you would effectively only get self, the root of the iteration
+ i.e. if 1, you would only get the first level of predessessors/successors
+
+ ``branch_first``
+ if True, items will be returned branch first, otherwise depth first
+
+ ``visit_once``
+ if True, items will only be returned once, although they might be encountered
+ several times. Loops are prevented that way.
+
+ ``ignore_self``
+ if True, self will be ignored and automatically pruned from
+ the result. Otherwise it will be the first item to be returned.
+ If as_edge is True, the source of the first edge is None
+
+ ``as_edge``
+ if True, return a pair of items, first being the source, second the
+ destinatination, i.e. tuple(src, dest) with the edge spanning from
+ source to destination"""
+ visited = set()
+ stack = Deque()
+ stack.append( ( 0 ,self, None ) ) # self is always depth level 0
+
+ def addToStack( stack, item, branch_first, depth ):
+ lst = self._get_intermediate_items( item )
+ if not lst:
+ return
+ if branch_first:
+ stack.extendleft( ( depth , i, item ) for i in lst )
+ else:
+ reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
+ stack.extend( reviter )
+ # END addToStack local method
+
+ while stack:
+ d, item, src = stack.pop() # depth of item, item, item_source
+
+ if visit_once and item in visited:
+ continue
+
+ if visit_once:
+ visited.add(item)
+
+ rval = ( as_edge and (src, item) ) or item
+ if prune( rval, d ):
+ continue
+
+ skipStartItem = ignore_self and ( item == self )
+ if not skipStartItem and predicate( rval, d ):
+ yield rval
+
+ # only continue to next level if this is appropriate !
+ nd = d + 1
+ if depth > -1 and nd > depth:
+ continue
+
+ addToStack( stack, item, branch_first, nd )
+ # END for each item on work stack
+
+
+class Serializable(object):
+ """Defines methods to serialize and deserialize objects from and into a data stream"""
+
+ def _serialize(self, stream):
+ """Serialize the data of this object into the given data stream
+ :note: a serialized object would ``_deserialize`` into the same objet
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def _deserialize(self, stream):
+ """Deserialize all information regarding this object from the stream
+ :param stream: a file-like object
+ :return: self"""
+ raise NotImplementedError("To be implemented in subclass")