summaryrefslogtreecommitdiff
path: root/lib/git/objects/commit.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-02 16:41:28 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-02 16:41:28 +0200
commitae5a69f67822d81bbbd8f4af93be68703e730b37 (patch)
tree52201bea881aee06ac3f1cf64fd4d9d30eea7463 /lib/git/objects/commit.py
parent4e1c89ec97ec90037583e85d0e9e71e9c845a19b (diff)
downloadgitpython-ae5a69f67822d81bbbd8f4af93be68703e730b37.tar.gz
commit: redesigned revlist and commit parsing, commits are always retrieved from their object information directly. This is faster, and resolves issues with the rev-list format and empty commit messages
Adjusted many tests to go with the changes, as they were still mocked. The mock was removed if necessary and replaced by code that actually executes
Diffstat (limited to 'lib/git/objects/commit.py')
-rw-r--r--lib/git/objects/commit.py98
1 files changed, 40 insertions, 58 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 948e9a54..98aca360 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -106,13 +106,12 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return commit.parents
def _set_cache_(self, attr):
- """
- Called by LazyMixin superclass when the given uninitialized member needs
+ """ Called by LazyMixin superclass when the given uninitialized member needs
to be set.
- We set all values at once.
- """
+ We set all values at once. """
if attr in Commit.__slots__:
# read the data in a chunk, its faster - then provide a file wrapper
+ # Could use self.data, but lets try to get it with less calls
hexsha, typename, size, data = self.repo.git.get_object_data(self)
self._deserialize(StringIO(data))
else:
@@ -181,16 +180,16 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
Returns
iterator yielding Commit items
"""
- options = {'pretty': 'raw', 'as_process' : True }
- options.update(kwargs)
-
+ if 'pretty' in kwargs:
+ raise ValueError("--pretty cannot be used as parsing expects single sha's only")
+ # END handle pretty
args = list()
if paths:
args.extend(('--', paths))
# END if paths
- proc = repo.git.rev_list(rev, args, **options)
- return cls._iter_from_process_or_stream(repo, proc, True)
+ proc = repo.git.rev_list(rev, args, as_process=True, **kwargs)
+ return cls._iter_from_process_or_stream(repo, proc)
def iter_parents(self, paths='', **kwargs):
"""
@@ -235,35 +234,30 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
return stats.Stats._list_from_string(self.repo, text)
@classmethod
- def _iter_from_process_or_stream(cls, repo, proc_or_stream, from_rev_list):
- """
- Parse out commit information into a list of Commit objects
-
- ``repo``
- is the Repo
-
- ``proc``
- git-rev-list process instance (raw format)
+ def _iter_from_process_or_stream(cls, repo, proc_or_stream):
+ """Parse out commit information into a list of Commit objects
+ We expect one-line per commit, and parse the actual commit information directly
+ from our lighting fast object database
- ``from_rev_list``
- If True, the stream was created by rev-list in which case we parse
- the message differently
- Returns
- iterator returning Commit objects
- """
+ :param proc: git-rev-list process instance - one sha per line
+ :return: iterator returning Commit objects"""
stream = proc_or_stream
if not hasattr(stream,'readline'):
stream = proc_or_stream.stdout
+ readline = stream.readline
while True:
- line = stream.readline()
+ line = readline()
if not line:
break
- commit_tokens = line.split()
- id = commit_tokens[1]
- assert commit_tokens[0] == "commit"
+ sha = line.strip()
+ if len(sha) > 40:
+ # split additional information, as returned by bisect for instance
+ sha, rest = line.split(None, 1)
+ # END handle extra info
- yield Commit(repo, id)._deserialize(stream, from_rev_list)
+ assert len(sha) == 40, "Invalid line: %s" % sha
+ yield Commit(repo, sha)
# END for each line in stream
@@ -386,15 +380,16 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
# for now, this is very inefficient and in fact shouldn't be used like this
return super(Commit, self)._serialize(stream)
- def _deserialize(self, stream, from_rev_list=False):
+ def _deserialize(self, stream):
""":param from_rev_list: if true, the stream format is coming from the rev-list command
Otherwise it is assumed to be a plain data stream from our object"""
- self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
+ readline = stream.readline
+ self.tree = Tree(self.repo, readline().split()[1], 0, '')
self.parents = list()
next_line = None
while True:
- parent_line = stream.readline()
+ parent_line = readline()
if not parent_line.startswith('parent'):
next_line = parent_line
break
@@ -404,37 +399,24 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri
self.parents = tuple(self.parents)
self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
- self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
+ self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(readline())
- # empty line
+ # now we can have the encoding line, or an empty line followed by the optional
+ # message.
self.encoding = self.default_encoding
- enc = stream.readline()
- enc.strip()
+ # read encoding or empty line to separate message
+ enc = readline()
+ enc = enc.strip()
if enc:
self.encoding = enc[enc.find(' ')+1:]
- # END parse encoding
-
- message_lines = list()
- if from_rev_list:
- while True:
- msg_line = stream.readline()
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- # cut the last newline to get rid of the artificial newline added
- # by rev-list command. Lets hope its just linux style \n
- message_lines[-1] = message_lines[-1][:-1]
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- self.message = ''.join(message_lines)
- else:
- # a stream from our data simply gives us the plain message
- # The end of our message stream is marked with a newline that we strip
- self.message = stream.read()[:-1]
- # END message parsing
+ # now comes the message separator
+ readline()
+ # END handle encoding
+
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()[:-1]
return self
#} END serializable implementation