From f2834177c0fdf6b1af659e460fd3348f468b8ab0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 11:50:14 +0200 Subject: Reorganized package structure and cleaned up imports --- lib/git/objects/commit.py | 284 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 lib/git/objects/commit.py (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py new file mode 100644 index 00000000..c70b03e4 --- /dev/null +++ b/lib/git/objects/commit.py @@ -0,0 +1,284 @@ +# commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import re +import time + +from git.actor import Actor +from tree import Tree +import git.diff as diff +import git.stats as stats +import base + +class Commit(base.Object): + """ + Wraps a git Commit object. + + This class will act lazily on some of its attributes and will query the + value on demand only if it involves calling the git binary. + """ + # precompiled regex + re_actor_epoch = re.compile(r'^.+? (.*) (\d+) .*$') + + # object configuration + type = "commit" + __slots__ = ("tree", "author", "authored_date", "committer", "committed_date", + "message", "parents") + + def __init__(self, repo, id, tree=None, author=None, authored_date=None, + committer=None, committed_date=None, message=None, parents=None): + """ + Instantiate a new Commit. All keyword arguments taking None as default will + be implicitly set if id names a valid sha. + + The parameter documentation indicates the type of the argument after a colon ':'. + + ``id`` + is the sha id of the commit + + ``parents`` : tuple( Commit, ... ) + is a tuple of commit ids or actual Commits + + ``tree`` : Tree + is the corresponding tree id or an actual Tree + + ``author`` : Actor + is the author string ( will be implicitly converted into an Actor object ) + + ``authored_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst ) + is the authored DateTime + + ``committer`` : Actor + is the committer string + + ``committed_date`` : (tm_year, tm_mon, tm_mday, tm_hour, tm_min, tm_sec, tm_wday, tm_yday, tm_isdst) + is the committed DateTime + + ``message`` : string + is the commit message + + Returns + git.Commit + """ + super(Commit,self).__init__(repo, id) + self._set_self_from_args_(locals()) + + if parents is not None: + self.parents = tuple( self.__class__(repo, p) for p in parents ) + # END for each parent to convert + + if self.id and tree is not None: + self.tree = Tree(repo, id=tree) + # END id to tree conversion + + def _set_cache_(self, attr): + """ + Called by LazyMixin superclass when the given uninitialized member needs + to be set. + We set all values at once. + """ + if attr in self.__slots__: + temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + self.parents = temp.parents + self.tree = temp.tree + self.author = temp.author + self.authored_date = temp.authored_date + self.committer = temp.committer + self.committed_date = temp.committed_date + self.message = temp.message + else: + super(Commit, self)._set_cache_(attr) + + @property + def summary(self): + """ + Returns + First line of the commit message. + """ + return self.message.split('\n', 1)[0] + + @classmethod + def count(cls, repo, ref, path=''): + """ + Count the number of commits reachable from this ref + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path + + Returns + int + """ + return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) + + @classmethod + def find_all(cls, repo, ref, path='', **kwargs): + """ + Find all commits matching the given criteria. + + ``repo`` + is the Repo + + ``ref`` + is the ref from which to begin (SHA1 or name) + + ``path`` + is an optinal path, if set only Commits that include the path + will be considered + + ``kwargs`` + optional keyword arguments to git where + ``max_count`` is the maximum number of commits to fetch + ``skip`` is the number of commits to skip + + Returns + git.Commit[] + """ + options = {'pretty': 'raw'} + options.update(kwargs) + + output = repo.git.rev_list(ref, '--', path, **options) + return cls.list_from_string(repo, output) + + @classmethod + def list_from_string(cls, repo, text): + """ + Parse out commit information into a list of Commit objects + + ``repo`` + is the Repo + + ``text`` + is the text output from the git-rev-list command (raw format) + + Returns + git.Commit[] + """ + lines =text.splitlines(False) + commits = [] + + while lines: + id = lines.pop(0).split()[1] + tree = lines.pop(0).split()[1] + + parents = [] + while lines and lines[0].startswith('parent'): + parents.append(lines.pop(0).split()[-1]) + # END while there are parent lines + author, authored_date = cls._actor(lines.pop(0)) + committer, committed_date = cls._actor(lines.pop(0)) + + # free line + lines.pop(0) + + message_lines = [] + while lines and not lines[0].startswith('commit'): + message_lines.append(lines.pop(0).strip()) + # END while there are message lines + message = '\n'.join(message_lines[:-1]) # last line is empty + + commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message)) + # END while lines + return commits + + @classmethod + def diff(cls, repo, a, b=None, paths=None): + """ + Creates diffs between a tree and the index or between two trees: + + ``repo`` + is the Repo + + ``a`` + is a named commit + + ``b`` + is an optional named commit. Passing a list assumes you + wish to omit the second named commit and limit the diff to the + given paths. + + ``paths`` + is a list of paths to limit the diff to. + + Returns + git.Diff[]:: + + between tree and the index if only a is given + between two trees if a and b are given and are commits + """ + paths = paths or [] + + if isinstance(b, list): + paths = b + b = None + + if paths: + paths.insert(0, "--") + + if b: + paths.insert(0, b) + paths.insert(0, a) + text = repo.git.diff('-M', full_index=True, *paths) + return diff.Diff.list_from_string(repo, text) + + @property + def diffs(self): + """ + Returns + git.Diff[] + Diffs between this commit and its first parent or all changes if this + commit is the first commit and has no parent. + """ + if not self.parents: + d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') + return diff.Diff.list_from_string(self.repo, d) + else: + return self.diff(self.repo, self.parents[0].id, self.id) + + @property + def stats(self): + """ + Create a git stat from changes between this commit and its first parent + or from all changes done if this is the very first commit. + + Return + git.Stats + """ + if not self.parents: + text = self.repo.git.diff_tree(self.id, '--', numstat=True, root=True) + text2 = "" + for line in text.splitlines()[1:]: + (insertions, deletions, filename) = line.split("\t") + text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) + text = text2 + else: + text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) + return stats.Stats.list_from_string(self.repo, text) + + def __str__(self): + """ Convert commit to string which is SHA1 """ + return self.id + + def __repr__(self): + return '' % self.id + + @classmethod + def _actor(cls, line): + """ + Parse out the actor (author or committer) info + + Returns + [Actor, gmtime(acted at time)] + """ + m = cls.re_actor_epoch.search(line) + actor, epoch = m.groups() + return (Actor.from_string(actor), time.gmtime(int(epoch))) -- cgit v1.2.1 From 637eadce54ca8bbe536bcf7c570c025e28e47129 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 14:56:47 +0200 Subject: renamed from_string and list_from_string to _from_string and _list_from_string to indicate their new status as private method, adjusted all callers respectively --- lib/git/objects/commit.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c70b03e4..c3e97bf9 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -146,10 +146,10 @@ class Commit(base.Object): options.update(kwargs) output = repo.git.rev_list(ref, '--', path, **options) - return cls.list_from_string(repo, output) + return cls._list_from_string(repo, output) @classmethod - def list_from_string(cls, repo, text): + def _list_from_string(cls, repo, text): """ Parse out commit information into a list of Commit objects @@ -228,7 +228,7 @@ class Commit(base.Object): paths.insert(0, b) paths.insert(0, a) text = repo.git.diff('-M', full_index=True, *paths) - return diff.Diff.list_from_string(repo, text) + return diff.Diff._list_from_string(repo, text) @property def diffs(self): @@ -240,7 +240,7 @@ class Commit(base.Object): """ if not self.parents: d = self.repo.git.show(self.id, '-M', full_index=True, pretty='raw') - return diff.Diff.list_from_string(self.repo, d) + return diff.Diff._list_from_string(self.repo, d) else: return self.diff(self.repo, self.parents[0].id, self.id) @@ -262,7 +262,7 @@ class Commit(base.Object): text = text2 else: text = self.repo.git.diff(self.parents[0].id, self.id, '--', numstat=True) - return stats.Stats.list_from_string(self.repo, text) + return stats.Stats._list_from_string(self.repo, text) def __str__(self): """ Convert commit to string which is SHA1 """ @@ -281,4 +281,4 @@ class Commit(base.Object): """ m = cls.re_actor_epoch.search(line) actor, epoch = m.groups() - return (Actor.from_string(actor), time.gmtime(int(epoch))) + return (Actor._from_string(actor), time.gmtime(int(epoch))) -- cgit v1.2.1 From 4c73e9cd66c77934f8a262b0c1bab9c2f15449ba Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 12 Oct 2009 17:03:01 +0200 Subject: refs now take repo as first argument and derive from LazyMixin to allow them to dynamically retrieve their objects Improved way commits are returned by refs as they now use the path to be sure they always point to the ref even if it changes - previously it would use the sha intead so it would not update after being cached on the ref object --- lib/git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c3e97bf9..f1f878d7 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -37,7 +37,7 @@ class Commit(base.Object): The parameter documentation indicates the type of the argument after a colon ':'. ``id`` - is the sha id of the commit + is the sha id of the commit or a ref ``parents`` : tuple( Commit, ... ) is a tuple of commit ids or actual Commits -- cgit v1.2.1 From 6acec357c7609fdd2cb0f5fdb1d2756726c7fe98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:26:19 +0200 Subject: renamed find_all to list_all, changed commit to use iterable interface in preparation for command changes --- lib/git/objects/commit.py | 71 ++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f1f878d7..c289b825 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -6,14 +6,14 @@ import re import time - +from git.utils import Iterable from git.actor import Actor -from tree import Tree import git.diff as diff import git.stats as stats +from tree import Tree import base -class Commit(base.Object): +class Commit(base.Object, Iterable): """ Wraps a git Commit object. @@ -81,7 +81,7 @@ class Commit(base.Object): We set all values at once. """ if attr in self.__slots__: - temp = Commit.find_all(self.repo, self.id, max_count=1)[0] + temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -120,7 +120,7 @@ class Commit(base.Object): return len(repo.git.rev_list(ref, '--', path).strip().splitlines()) @classmethod - def find_all(cls, repo, ref, path='', **kwargs): + def iter_items(cls, repo, ref, path='', **kwargs): """ Find all commits matching the given criteria. @@ -128,7 +128,7 @@ class Commit(base.Object): is the Repo ``ref`` - is the ref from which to begin (SHA1 or name) + is the ref from which to begin (SHA1, Head or name) ``path`` is an optinal path, if set only Commits that include the path @@ -146,49 +146,56 @@ class Commit(base.Object): options.update(kwargs) output = repo.git.rev_list(ref, '--', path, **options) - return cls._list_from_string(repo, output) + return cls._iter_from_stream(repo, iter(output.splitlines(False))) @classmethod - def _list_from_string(cls, repo, text): + def _iter_from_stream(cls, repo, stream): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``text`` - is the text output from the git-rev-list command (raw format) + ``stream`` + output stream from the git-rev-list command (raw format) Returns - git.Commit[] + iterator returning Commit objects """ - lines =text.splitlines(False) - commits = [] - - while lines: - id = lines.pop(0).split()[1] - tree = lines.pop(0).split()[1] + for line in stream: + id = line.split()[1] + assert line.split()[0] == "commit" + tree = stream.next().split()[1] parents = [] - while lines and lines[0].startswith('parent'): - parents.append(lines.pop(0).split()[-1]) - # END while there are parent lines - author, authored_date = cls._actor(lines.pop(0)) - committer, committed_date = cls._actor(lines.pop(0)) + next_line = None + for parent_line in stream: + if not parent_line.startswith('parent'): + next_line = parent_line + break + # END abort reading parents + parents.append(parent_line.split()[-1]) + # END for each parent line + + author, authored_date = cls._actor(next_line) + committer, committed_date = cls._actor(stream.next()) - # free line - lines.pop(0) + # empty line + stream.next() message_lines = [] - while lines and not lines[0].startswith('commit'): - message_lines.append(lines.pop(0).strip()) + next_line = None + for msg_line in stream: + if not msg_line.startswith(' '): + break + # END abort message reading + message_lines.append(msg_line.strip()) # END while there are message lines - message = '\n'.join(message_lines[:-1]) # last line is empty - - commits.append(Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, - committer=committer, committed_date=committed_date, message=message)) - # END while lines - return commits + message = '\n'.join(message_lines) + + yield Commit(repo, id=id, parents=parents, tree=tree, author=author, authored_date=authored_date, + committer=committer, committed_date=committed_date, message=message) + # END for each line in stream @classmethod def diff(cls, repo, a, b=None, paths=None): -- cgit v1.2.1 From ac1cec7066eaa12a8d1a61562bfc6ee77ff5f54d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 13 Oct 2009 21:49:33 +0200 Subject: added Iterable interface to Ref type --- lib/git/objects/commit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index c289b825..f9245217 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -140,7 +140,7 @@ class Commit(base.Object, Iterable): ``skip`` is the number of commits to skip Returns - git.Commit[] + iterator yielding Commit items """ options = {'pretty': 'raw'} options.update(kwargs) -- cgit v1.2.1 From ead94f267065bb55303f79a0a6df477810b3c68d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 14:33:51 +0200 Subject: cmd: added option to return the process directly, allowing to read the output directly from the output stream commit: now reads commit information directly from the output stream of the process by implementing its iterator method repo: removed log method as it was redundant ( equal to the commits method ) --- lib/git/objects/commit.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index f9245217..340686ea 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -142,26 +142,28 @@ class Commit(base.Object, Iterable): Returns iterator yielding Commit items """ - options = {'pretty': 'raw'} + options = {'pretty': 'raw', 'as_process' : True } options.update(kwargs) - output = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_stream(repo, iter(output.splitlines(False))) + # the test system might confront us with string values - + proc = repo.git.rev_list(ref, '--', path, **options) + return cls._iter_from_process(repo, proc) @classmethod - def _iter_from_stream(cls, repo, stream): + def _iter_from_process(cls, repo, proc): """ Parse out commit information into a list of Commit objects ``repo`` is the Repo - ``stream`` - output stream from the git-rev-list command (raw format) + ``proc`` + git-rev-list process instance (raw format) Returns iterator returning Commit objects """ + stream = proc.stdout for line in stream: id = line.split()[1] assert line.split()[0] == "commit" -- cgit v1.2.1 From 6eeae8b24135b4de05f6d725b009c287577f053d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 17:24:15 +0200 Subject: test: Added time-consuming test which could also be a benchmark in fact - currently it cause hundreds of command invocations which is slow Fixed issue with trees not properly initialized with their default mode _set_cache_: some objects checked whether the attribute was within their __slots__ although it should have been accessed through its class --- lib/git/objects/commit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 340686ea..69fb3710 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -71,7 +71,7 @@ class Commit(base.Object, Iterable): # END for each parent to convert if self.id and tree is not None: - self.tree = Tree(repo, id=tree) + self.tree = Tree(repo, id=tree, path='') # END id to tree conversion def _set_cache_(self, attr): @@ -80,7 +80,7 @@ class Commit(base.Object, Iterable): to be set. We set all values at once. """ - if attr in self.__slots__: + if attr in Commit.__slots__: temp = Commit.list_items(self.repo, self.id, max_count=1)[0] self.parents = temp.parents self.tree = temp.tree -- cgit v1.2.1 From 6745f4542cfb74bbf3b933dba7a59ef2f54a4380 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 14 Oct 2009 19:34:45 +0200 Subject: test_blob: removed many redundant tests that would fail now as the mock cannot handle the complexity of the command backend All objects but Tree now use the persistent command to read their object information - Trees get binary data and would need their own pretty-printing or they need to parse the data themselves which is my favorite --- lib/git/objects/commit.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'lib/git/objects/commit.py') diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index 69fb3710..101014ab 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -81,7 +81,10 @@ class Commit(base.Object, Iterable): We set all values at once. """ if attr in Commit.__slots__: - temp = Commit.list_items(self.repo, self.id, max_count=1)[0] + # prepare our data lines to match rev-list + data_lines = self.data.splitlines() + data_lines.insert(0, "commit %s" % self.id) + temp = self._iter_from_process_or_stream(self.repo, iter(data_lines)).next() self.parents = temp.parents self.tree = temp.tree self.author = temp.author @@ -147,10 +150,10 @@ class Commit(base.Object, Iterable): # the test system might confront us with string values - proc = repo.git.rev_list(ref, '--', path, **options) - return cls._iter_from_process(repo, proc) + return cls._iter_from_process_or_stream(repo, proc) @classmethod - def _iter_from_process(cls, repo, proc): + def _iter_from_process_or_stream(cls, repo, proc_or_stream): """ Parse out commit information into a list of Commit objects @@ -163,7 +166,10 @@ class Commit(base.Object, Iterable): Returns iterator returning Commit objects """ - stream = proc.stdout + stream = proc_or_stream + if not hasattr(stream,'next'): + stream = proc_or_stream.stdout + for line in stream: id = line.split()[1] assert line.split()[0] == "commit" -- cgit v1.2.1