diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 12:30:33 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 12:51:05 +0200 |
commit | 8c1a87d11df666d308d14e4ae7ee0e9d614296b6 (patch) | |
tree | 87481ab28367db496886a3801bda37227c10f7ed /lib/git/objects/commit.py | |
parent | df0892351a394d768489b5647d47b73c24d3ef5f (diff) | |
download | gitpython-8c1a87d11df666d308d14e4ae7ee0e9d614296b6.tar.gz |
commit: refactored existing code to decode commits from streams - performance is slightly better
git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used
test.helpers: removed unnecessary code
Diffstat (limited to 'lib/git/objects/commit.py')
-rw-r--r-- | lib/git/objects/commit.py | 139 |
1 files changed, 74 insertions, 65 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 87eed49b..948e9a54 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -9,12 +9,14 @@ import git.diff as diff import git.stats as stats from git.actor import Actor from tree import Tree +from cStringIO import StringIO import base import utils import time import os -class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): + +class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable): """ Wraps a git Commit object. @@ -91,7 +93,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): self._set_self_from_args_(locals()) if parents is not None: - self.parents = tuple( self.__class__(repo, p) for p in parents ) + cls = type(self) + self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls)) # END for each parent to convert if self.sha and tree is not None: @@ -109,20 +112,9 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): We set all values at once. """ if attr in Commit.__slots__: - # prepare our data lines to match rev-list - data_lines = self.data.splitlines() - data_lines.insert(0, "commit %s" % self.sha) - temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next() - self.parents = temp.parents - self.tree = temp.tree - self.author = temp.author - self.authored_date = temp.authored_date - self.author_tz_offset = temp.author_tz_offset - self.committer = temp.committer - self.committed_date = temp.committed_date - self.committer_tz_offset = temp.committer_tz_offset - self.message = temp.message - self.encoding = temp.encoding + # read the data in a chunk, its faster - then provide a file wrapper + hexsha, typename, size, data = self.repo.git.get_object_data(self) + self._deserialize(StringIO(data)) else: super(Commit, self)._set_cache_(attr) @@ -260,59 +252,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): iterator returning Commit objects """ stream = proc_or_stream - if not hasattr(stream,'next'): + if not hasattr(stream,'readline'): stream = proc_or_stream.stdout - for line in stream: - commit_tokens = line.split() + while True: + line = stream.readline() + if not line: + break + commit_tokens = line.split() id = commit_tokens[1] assert commit_tokens[0] == "commit" - tree = stream.next().split()[1] - - parents = [] - next_line = None - for parent_line in stream: - if not parent_line.startswith('parent'): - next_line = parent_line - break - # END abort reading parents - parents.append(parent_line.split()[-1]) - # END for each parent line - - author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line) - committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next()) - - # empty line - encoding = stream.next() - encoding.strip() - if encoding: - encoding = encoding[encoding.find(' ')+1:] - # END parse encoding - - message_lines = list() - if from_rev_list: - for msg_line in stream: - if not msg_line.startswith(' '): - # and forget about this empty marker - break - # END abort message reading - # strip leading 4 spaces - message_lines.append(msg_line[4:]) - # END while there are message lines - else: - # a stream from our data simply gives us the plain message - for msg_line in stream: - message_lines.append(msg_line) - # END message parsing - message = '\n'.join(message_lines) - - - yield Commit(repo, id, tree, - author, authored_date, author_tz_offset, - committer, committed_date, committer_tz_offset, - message, tuple(parents), - encoding or cls.default_encoding) + yield Commit(repo, id)._deserialize(stream, from_rev_list) # END for each line in stream @@ -393,7 +344,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') - conf_encoding = cr.get_value(enc_section, enc_option, default_encoding) + conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) author = Actor(author_name, author_email) committer = Actor(committer_name, committer_email) @@ -429,3 +380,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable): def __repr__(self): return '<git.Commit "%s">' % self.sha + #{ Serializable Implementation + + def _serialize(self, stream): + # for now, this is very inefficient and in fact shouldn't be used like this + return super(Commit, self)._serialize(stream) + + def _deserialize(self, stream, from_rev_list=False): + """:param from_rev_list: if true, the stream format is coming from the rev-list command + Otherwise it is assumed to be a plain data stream from our object""" + self.tree = Tree(self.repo, stream.readline().split()[1], 0, '') + + self.parents = list() + next_line = None + while True: + parent_line = stream.readline() + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + self.parents.append(type(self)(self.repo, parent_line.split()[-1])) + # END for each parent line + self.parents = tuple(self.parents) + + self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line) + self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline()) + + + # empty line + self.encoding = self.default_encoding + enc = stream.readline() + enc.strip() + if enc: + self.encoding = enc[enc.find(' ')+1:] + # END parse encoding + + message_lines = list() + if from_rev_list: + while True: + msg_line = stream.readline() + if not msg_line.startswith(' '): + # and forget about this empty marker + # cut the last newline to get rid of the artificial newline added + # by rev-list command. Lets hope its just linux style \n + message_lines[-1] = message_lines[-1][:-1] + break + # END abort message reading + # strip leading 4 spaces + message_lines.append(msg_line[4:]) + # END while there are message lines + self.message = ''.join(message_lines) + else: + # a stream from our data simply gives us the plain message + # The end of our message stream is marked with a newline that we strip + self.message = stream.read()[:-1] + # END message parsing + return self + + #} END serializable implementation |