summaryrefslogtreecommitdiff
path: root/lib/git/objects/commit.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2009-10-15 00:06:30 +0200
committerSebastian Thiel <byronimo@gmail.com>2009-10-15 00:06:30 +0200
commit4186a2dbbd48fd67ff88075c63bbd3e6c1d8a2df (patch)
tree10f70f8e41c91f5bf57f04b616f3e5afdb9f8407 /lib/git/objects/commit.py
parent637eadce54ca8bbe536bcf7c570c025e28e47129 (diff)
parent1a4bfd979e5d4ea0d0457e552202eb2effc36cac (diff)
downloadgitpython-4186a2dbbd48fd67ff88075c63bbd3e6c1d8a2df.tar.gz
Merge branch 'iteration_and_retrieval' into improvements
* iteration_and_retrieval: test_performance: module containing benchmarks to get an idea of the achieved throughput Removed plenty of mocked tree tests as they cannot work anymore with persistent commands that require stdin AND binary data - not even an adapter would help here. These tests will have to be replaced. tree: now reads tress directly by parsing the binary data, allowing it to safe possibly hundreds of command calls Refs are now truly dynamic - this costs a little bit of (persistent command) work, but assures refs behave as expected persistent command signature changed to also return the hexsha from a possible input ref - the objects pointed to by refs are now baked on demand - perhaps it should change to always be re-retrieved using a property as it is relatively fast - this way refs can always be cached test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend Implemented git command facility to keep persistent commands for fast object information retrieval test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow cmd: added option to return the process directly, allowing to read the output directly from the output stream added Iterable interface to Ref type renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes Added base for all iteratable objects unified name of utils module, recently it was named util and utils in different packages tree: renamed content_from_string to _from_string to make it private. Removed tests that were testing that method tree: now behaves like a list with string indexing functionality - using a dict as cache is a problem as the tree is ordered, added blobs, trees and traverse method test_base: Improved basic object creation as well as set hash tests repo.active_branch now returns a Head object, not a string IndexObjects are now checking their slots to raise a proper error message in case someone tries to access an unset path or mode - this information cannot be retrieved afterwards as IndexObject information is kept in the object that pointed at them. To find this information, one would have to search all objects which is not feasible refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects
Diffstat (limited to 'lib/git/objects/commit.py')
-rw-r--r--lib/git/objects/commit.py91
1 files changed, 53 insertions, 38 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index c3e97bf9..101014ab 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -6,14 +6,14 @@
import re
import time
-
+from git.utils import Iterable
from git.actor import Actor
-from tree import Tree
import git.diff as diff
import git.stats as stats
+from tree import Tree
import base
-class Commit(base.Object):
+class Commit(base.Object, Iterable):
"""
Wraps a git Commit object.
@@ -37,7 +37,7 @@ class Commit(base.Object):
The parameter documentation indicates the type of the argument after a colon ':'.
``id``
- is the sha id of the commit
+ is the sha id of the commit or a ref
``parents`` : tuple( Commit, ... )
is a tuple of commit ids or actual Commits
@@ -71,7 +71,7 @@ class Commit(base.Object):
# END for each parent to convert
if self.id and tree is not None:
- self.tree = Tree(repo, id=tree)
+ self.tree = Tree(repo, id=tree, path='')
# END id to tree conversion
def _set_cache_(self, attr):
@@ -80,8 +80,11 @@ class Commit(base.Object):
to be set.
We set all values at once.
"""
- if attr in self.__slots__:
- temp = Commit.find_all(self.repo, self.id, max_count=1)[0]
+ if attr in Commit.__slots__:
+ # prepare our data lines to match rev-list
+ data_lines = self.data.splitlines()
+ data_lines.insert(0, "commit %s" % self.id)
+ temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next()
self.parents = temp.parents
self.tree = temp.tree
self.author = temp.author
@@ -120,7 +123,7 @@ class Commit(base.Object):
return len(repo.git.rev_list(ref, '--', path).strip().splitlines())
@classmethod
- def find_all(cls, repo, ref, path='', **kwargs):
+ def iter_items(cls, repo, ref, path='', **kwargs):
"""
Find all commits matching the given criteria.
@@ -128,7 +131,7 @@ class Commit(base.Object):
is the Repo
``ref``
- is the ref from which to begin (SHA1 or name)
+ is the ref from which to begin (SHA1, Head or name)
``path``
is an optinal path, if set only Commits that include the path
@@ -140,55 +143,67 @@ class Commit(base.Object):
``skip`` is the number of commits to skip
Returns
- git.Commit[]
+ iterator yielding Commit items
"""
- options = {'pretty': 'raw'}
+ options = {'pretty': 'raw', 'as_process' : True }
options.update(kwargs)
- output = repo.git.rev_list(ref, '--', path, **options)
- return cls._list_from_string(repo, output)
+ # the test system might confront us with string values -
+ proc = repo.git.rev_list(ref, '--', path, **options)
+ return cls._iter_from_process_or_stream(repo, proc)
@classmethod
- def _list_from_string(cls, repo, text):
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
"""
Parse out commit information into a list of Commit objects
``repo``
is the Repo
- ``text``
- is the text output from the git-rev-list command (raw format)
+ ``proc``
+ git-rev-list process instance (raw format)
Returns
- git.Commit[]
+ iterator returning Commit objects
"""
- lines =text.splitlines(False)
- commits = []
-
- while lines:
- id = lines.pop(0).split()[1]
- tree = lines.pop(0).split()[1]
+ stream = proc_or_stream
+ if not hasattr(stream,'next'):
+ stream = proc_or_stream.stdout
+
+ for line in stream:
+ id = line.split()[1]
+ assert line.split()[0] == "commit"
+ tree = stream.next().split()[1]
parents = []
- while lines and lines[0].startswith('parent'):
- parents.append(lines.pop(0).split()[-1])
- # END while there are parent lines
- author, authored_date = cls._actor(lines.pop(0))
- committer, committed_date = cls._actor(lines.pop(0))
+ next_line = None
+ for parent_line in stream:
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ parents.append(parent_line.split()[-1])
+ # END for each parent line
+
+ author, authored_date = cls._actor(next_line)
+ committer, committed_date = cls._actor(stream.next())
- # free line
- lines.pop(0)
+ # empty line
+ stream.next()
message_lines = []
- while lines and not lines[0].startswith('commit'):
- message_lines.append(lines.pop(0).strip())
+ next_line = None
+ for msg_line in stream:
+ if not msg_line.startswith(' '):
+ break
+ # END abort message reading
+ message_lines.append(msg_line.strip())
# END while there are message lines
- message = '\n'.join(message_lines[:-1]) # last line is empty
-
- commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
- committer=committer, committed_date=committed_date, message=message))
- # END while lines
- return commits
+ message = '\n'.join(message_lines)
+
+ yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date,
+ committer=committer, committed_date=committed_date, message=message)
+ # END for each line in stream
@classmethod
def diff(cls, repo, a, b=None, paths=None):