summaryrefslogtreecommitdiff
path: root/lib/git/objects/commit.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-02 12:30:33 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-02 12:51:05 +0200
commit8c1a87d11df666d308d14e4ae7ee0e9d614296b6 (patch)
tree87481ab28367db496886a3801bda37227c10f7ed /lib/git/objects/commit.py
parentdf0892351a394d768489b5647d47b73c24d3ef5f (diff)
downloadgitpython-8c1a87d11df666d308d14e4ae7ee0e9d614296b6.tar.gz
commit: refactored existing code to decode commits from streams - performance is slightly better
git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code
Diffstat (limited to 'lib/git/objects/commit.py')
-rw-r--r--lib/git/objects/commit.py139
1 files changed, 74 insertions, 65 deletions
diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py
index 87eed49b..948e9a54 100644
--- a/lib/git/objects/commit.py
+++ b/lib/git/objects/commit.py
@@ -9,12 +9,14 @@ import git.diff as diff
import git.stats as stats
from git.actor import Actor
from tree import Tree
+from cStringIO import StringIO
import base
import utils
import time
import os
-class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
+
+class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Serializable):
"""
Wraps a git Commit object.
@@ -91,7 +93,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
self._set_self_from_args_(locals())
if parents is not None:
- self.parents = tuple( self.__class__(repo, p) for p in parents )
+ cls = type(self)
+ self.parents = tuple(cls(repo, p) for p in parents if not isinstance(p, cls))
# END for each parent to convert
if self.sha and tree is not None:
@@ -109,20 +112,9 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
We set all values at once.
"""
if attr in Commit.__slots__:
- # prepare our data lines to match rev-list
- data_lines = self.data.splitlines()
- data_lines.insert(0, "commit %s" % self.sha)
- temp = self._iter_from_process_or_stream(self.repo, iter(data_lines), False).next()
- self.parents = temp.parents
- self.tree = temp.tree
- self.author = temp.author
- self.authored_date = temp.authored_date
- self.author_tz_offset = temp.author_tz_offset
- self.committer = temp.committer
- self.committed_date = temp.committed_date
- self.committer_tz_offset = temp.committer_tz_offset
- self.message = temp.message
- self.encoding = temp.encoding
+ # read the data in a chunk, its faster - then provide a file wrapper
+ hexsha, typename, size, data = self.repo.git.get_object_data(self)
+ self._deserialize(StringIO(data))
else:
super(Commit, self)._set_cache_(attr)
@@ -260,59 +252,18 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
iterator returning Commit objects
"""
stream = proc_or_stream
- if not hasattr(stream,'next'):
+ if not hasattr(stream,'readline'):
stream = proc_or_stream.stdout
- for line in stream:
- commit_tokens = line.split()
+ while True:
+ line = stream.readline()
+ if not line:
+ break
+ commit_tokens = line.split()
id = commit_tokens[1]
assert commit_tokens[0] == "commit"
- tree = stream.next().split()[1]
-
- parents = []
- next_line = None
- for parent_line in stream:
- if not parent_line.startswith('parent'):
- next_line = parent_line
- break
- # END abort reading parents
- parents.append(parent_line.split()[-1])
- # END for each parent line
-
- author, authored_date, author_tz_offset = utils.parse_actor_and_date(next_line)
- committer, committed_date, committer_tz_offset = utils.parse_actor_and_date(stream.next())
-
- # empty line
- encoding = stream.next()
- encoding.strip()
- if encoding:
- encoding = encoding[encoding.find(' ')+1:]
- # END parse encoding
-
- message_lines = list()
- if from_rev_list:
- for msg_line in stream:
- if not msg_line.startswith(' '):
- # and forget about this empty marker
- break
- # END abort message reading
- # strip leading 4 spaces
- message_lines.append(msg_line[4:])
- # END while there are message lines
- else:
- # a stream from our data simply gives us the plain message
- for msg_line in stream:
- message_lines.append(msg_line)
- # END message parsing
- message = '\n'.join(message_lines)
-
-
- yield Commit(repo, id, tree,
- author, authored_date, author_tz_offset,
- committer, committed_date, committer_tz_offset,
- message, tuple(parents),
- encoding or cls.default_encoding)
+ yield Commit(repo, id)._deserialize(stream, from_rev_list)
# END for each line in stream
@@ -393,7 +344,7 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
# assume utf8 encoding
enc_section, enc_option = cls.conf_encoding.split('.')
- conf_encoding = cr.get_value(enc_section, enc_option, default_encoding)
+ conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding)
author = Actor(author_name, author_email)
committer = Actor(committer_name, committer_email)
@@ -429,3 +380,61 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable):
def __repr__(self):
return '<git.Commit "%s">' % self.sha
+ #{ Serializable Implementation
+
+ def _serialize(self, stream):
+ # for now, this is very inefficient and in fact shouldn't be used like this
+ return super(Commit, self)._serialize(stream)
+
+ def _deserialize(self, stream, from_rev_list=False):
+ """:param from_rev_list: if true, the stream format is coming from the rev-list command
+ Otherwise it is assumed to be a plain data stream from our object"""
+ self.tree = Tree(self.repo, stream.readline().split()[1], 0, '')
+
+ self.parents = list()
+ next_line = None
+ while True:
+ parent_line = stream.readline()
+ if not parent_line.startswith('parent'):
+ next_line = parent_line
+ break
+ # END abort reading parents
+ self.parents.append(type(self)(self.repo, parent_line.split()[-1]))
+ # END for each parent line
+ self.parents = tuple(self.parents)
+
+ self.author, self.authored_date, self.author_tz_offset = utils.parse_actor_and_date(next_line)
+ self.committer, self.committed_date, self.committer_tz_offset = utils.parse_actor_and_date(stream.readline())
+
+
+ # empty line
+ self.encoding = self.default_encoding
+ enc = stream.readline()
+ enc.strip()
+ if enc:
+ self.encoding = enc[enc.find(' ')+1:]
+ # END parse encoding
+
+ message_lines = list()
+ if from_rev_list:
+ while True:
+ msg_line = stream.readline()
+ if not msg_line.startswith(' '):
+ # and forget about this empty marker
+ # cut the last newline to get rid of the artificial newline added
+ # by rev-list command. Lets hope its just linux style \n
+ message_lines[-1] = message_lines[-1][:-1]
+ break
+ # END abort message reading
+ # strip leading 4 spaces
+ message_lines.append(msg_line[4:])
+ # END while there are message lines
+ self.message = ''.join(message_lines)
+ else:
+ # a stream from our data simply gives us the plain message
+ # The end of our message stream is marked with a newline that we strip
+ self.message = stream.read()[:-1]
+ # END message parsing
+ return self
+
+ #} END serializable implementation