summaryrefslogtreecommitdiff
path: root/lib/git/objects/commit.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2009-10-15 10:04:17 +0200
committerSebastian Thiel <byronimo@gmail.com>2009-10-15 10:04:17 +0200
commit6226720b0e6a5f7cb9223fc50363def487831315 (patch)
tree10f70f8e41c91f5bf57f04b616f3e5afdb9f8407 /lib/git/objects/commit.py
parentb0e84a3401c84507dc017d6e4f57a9dfdb31de53 (diff)
parent4186a2dbbd48fd67ff88075c63bbd3e6c1d8a2df (diff)
downloadgitpython-6226720b0e6a5f7cb9223fc50363def487831315.tar.gz
Initial set of improvementes merged into master, including a class hierarchy redesign and performance improvements
Merge commit 'origin/improvements' * commit 'origin/improvements': (38 commits) test_performance: module containing benchmarks to get an idea of the achieved throughput Removed plenty of mocked tree tests as they cannot work anymore with persistent commands that require stdin AND binary data - not even an adapter would help here. These tests will have to be replaced. tree: now reads tress directly by parsing the binary data, allowing it to safe possibly hundreds of command calls Refs are now truly dynamic - this costs a little bit of (persistent command) work, but assures refs behave as expected persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend Implemented git command facility to keep persistent commands for fast object information retrieval test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow cmd: added option to return the process directly, allowing to read the output directly from the output stream added Iterable interface to Ref type renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes Added base for all iteratable objects unified name of utils module, recently it was named util and utils in different packages tree: renamed content_from_string to _from_string to make it private. Removed tests that were testing that method tree: now behaves like a list with string indexing functionality - using a dict as cache is a problem as the tree is ordered, added blobs, trees and traverse method test_base: Improved basic object creation as well as set hash tests repo.active_branch now returns a Head object, not a string IndexObjects are now checking their slots to raise a proper error message in case someone tries to access an unset path or mode - this information cannot be retrieved afterwards as IndexObject information is kept in the object that pointed at them. To find this information, one would have to search all objects which is not feasible refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects renamed from_string and list_from_string to _from_string and _list_from_string to indicate their new status as private method, adjusted all callers respectively ...
Diffstat (limited to 'lib/git/objects/commit.py')
-rw-r--r--lib/git/objects/commit.py299
1 files changed, 299 insertions, 0 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
new file mode 100644
index 00000000..101014ab
--- /dev/null
+++ b/lib/git/objects/commit.py
@@ -0,0 +1,299 @@
+# commit.py
+# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors
+#
+# This module is part of GitPython and is released under
+# the BSD License: http://www.opensource.org/licenses/bsd-license.php
+
+import re
+import time
+from git.utils import Iterable
+from git.actor import Actor
+import git.diff as diff
+import git.stats as stats
+from tree import Tree
+import base
+
+class Commit(base.Object, Iterable):
+ """
+ Wraps a git Commit object.
+
+ This class will act lazily on some of its attributes and will query the
+ value on demand only if it involves calling the git binary.
+ """
+ # precompiled regex
+ re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$')
+
+ # object configuration
+ type = "commit"
+ __slots__ = ("tree", "author", "authored_date", "committer", "committed_date",
+ "message", "parents")
+
+ def __init__(self, repo, id, tree=None, author=None, authored_date=None,
+ committer=None, committed_date=None, message=None, parents=None):
+ """
+ Instantiate a new Commit. All keyword arguments taking None as default will
+ be implicitly set if id names a valid sha.
+
+ The parameter documentation indicates the type of the argument after a colon ':'.
+
+ ``id``
+ is the sha id of the commit or a ref
+
+ ``parents`` : tuple( Commit, ... )
+ is a tuple of commit ids or actual Commits
+
+ ``tree`` : Tree
+ is the corresponding tree id or an actual Tree
+
+ ``author`` : Actor
+ is the author string ( will be implicitly converted into an Actor object )
+
+ ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst )
+ is the authored DateTime
+
+ ``committer`` : Actor
+ is the committer string
+
+ ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst)
+ is the committed DateTime
+
+ ``message`` : string
+ is the commit message
+
+ Returns
+ git.Commit
+ """
+ super(Commit,self).__init__(repo, id)
+ self._set_self_from_args_(locals())
+
+ if parents is not None:
+ self.parents = tuple( self.__class__(repo, p) for p in parents )
+ # END for each parent to convert
+
+ if self.id and tree is not None:
+ self.tree = Tree(repo, id=tree, path='')
+ # END id to tree conversion
+
+ def _set_cache_(self, attr):
+ """
+ Called by LazyMixin superclass when the given uninitialized member needs
+ to be set.
+ We set all values at once.
+ """
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.id)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
+ self.parents = temp.parents
+ self.tree = temp.tree
+ self.author = temp.author
+ self.authored_date = temp.authored_date
+ self.committer = temp.committer
+ self.committed_date = temp.committed_date
+ self.message = temp.message
+ else:
+ super(Commit, self)._set_cache_(attr)
+
+ @property
+ def summary(self):
+ """
+ Returns
+ First line of the commit message.
+ """
+ return self.message.split('\n', 1)[0]
+
+ @classmethod
+ def count(cls, repo, ref, path=''):
+ """
+ Count the number of commits reachable from this ref
+
+ ``repo``
+ is the Repo
+
+ ``ref``
+ is the ref from which to begin (SHA1 or name)
+
+ ``path``
+ is an optinal path
+
+ Returns
+ int
+ """
+ return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
+
+ @classmethod
+ def iter_items(cls, repo, ref, path='', **kwargs):
+ """
+ Find all commits matching the given criteria.
+
+ ``repo``
+ is the Repo
+
+ ``ref``
+ is the ref from which to begin (SHA1, Head or name)
+
+ ``path``
+ is an optinal path, if set only Commits that include the path
+ will be considered
+
+ ``kwargs``
+ optional keyword arguments to git where
+ ``max_count`` is the maximum number of commits to fetch
+ ``skip`` is the number of commits to skip
+
+ Returns
+ iterator yielding Commit items
+ """
+ options = {'pretty': 'raw', 'as_process' : True }
+ options.update(kwargs)
+
+ # the test system might confront us with string values -
+ proc = repo.git.rev_list(ref, '--', path, **options)
+ return cls._iter_from_process_or_stream(repo, proc)
+
+ @classmethod
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """
+ Parse out commit information into a list of Commit objects
+
+ ``repo``
+ is the Repo
+
+ ``proc``
+ git-rev-list process instance (raw format)
+
+ Returns
+ iterator returning Commit objects
+ """
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
+
+ parents = []
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = cls._actor(next_line)
+ committer, committed_date = cls._actor(stream.next())
+
+ # empty line
+ stream.next()
+
+ message_lines = []
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
+ # END while there are message lines
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
+
+ @classmethod
+ def diff(cls, repo, a, b=None, paths=None):
+ """
+ Creates diffs between a tree and the index or between two trees:
+
+ ``repo``
+ is the Repo
+
+ ``a``
+ is a named commit
+
+ ``b``
+ is an optional named commit. Passing a list assumes you
+ wish to omit the second named commit and limit the diff to the
+ given paths.
+
+ ``paths``
+ is a list of paths to limit the diff to.
+
+ Returns
+ git.Diff[]::
+
+ between tree and the index if only a is given
+ between two trees if a and b are given and are commits
+ """
+ paths = paths or []
+
+ if isinstance(b, list):
+ paths = b
+ b = None
+
+ if paths:
+ paths.insert(0, "--")
+
+ if b:
+ paths.insert(0, b)
+ paths.insert(0, a)
+ text = repo.git.diff('-M', full_index=True, *paths)
+ return diff.Diff._list_from_string(repo, text)
+
+ @property
+ def diffs(self):
+ """
+ Returns
+ git.Diff[]
+ Diffs between this commit and its first parent or all changes if this
+ commit is the first commit and has no parent.
+ """
+ if not self.parents:
+ d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw')
+ return diff.Diff._list_from_string(self.repo, d)
+ else:
+ return self.diff(self.repo, self.parents[0].id, self.id)
+
+ @property
+ def stats(self):
+ """
+ Create a git stat from changes between this commit and its first parent
+ or from all changes done if this is the very first commit.
+
+ Return
+ git.Stats
+ """
+ if not self.parents:
+ text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True)
+ text2 = ""
+ for line in text.splitlines()[1:]:
+ (insertions, deletions, filename) = line.split("\t")
+ text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename)
+ text = text2
+ else:
+ text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True)
+ return stats.Stats._list_from_string(self.repo, text)
+
+ def __str__(self):
+ """ Convert commit to string which is SHA1 """
+ return self.id
+
+ def __repr__(self):
+ return '<git.Commit "%s">' % self.id
+
+ @classmethod
+ def _actor(cls, line):
+ """
+ Parse out the actor (author or committer) info
+
+ Returns
+ [Actor, gmtime(acted at time)]
+ """
+ m = cls.re_actor_epoch.search(line)
+ actor, epoch = m.groups()
+ return (Actor._from_string(actor), time.gmtime(int(epoch)))