diff options
-rw-r--r-- | git/compat.py | 15 | ||||
-rw-r--r-- | git/repo/base.py | 66 | ||||
-rw-r--r-- | git/test/fixtures/blame_incremental | 30 | ||||
-rw-r--r-- | git/test/test_repo.py | 24 |
4 files changed, 133 insertions, 2 deletions
diff --git a/git/compat.py b/git/compat.py index 1ea2119e..146bfd4b 100644 --- a/git/compat.py +++ b/git/compat.py @@ -8,6 +8,7 @@ # flake8: noqa import sys +import six from gitdb.utils.compat import ( PY3, @@ -46,6 +47,20 @@ else: def mviter(d): return d.itervalues() +PRE_PY27 = sys.version_info < (2, 7) + + +def safe_decode(s): + """Safely decodes a binary string to unicode""" + if isinstance(s, six.text_type): + return s + elif isinstance(s, six.binary_type): + if PRE_PY27: + return s.decode(defenc) # we're screwed + else: + return s.decode(defenc, errors='replace') + raise TypeError('Expected bytes or text, but got %r' % (s,)) + def with_metaclass(meta, *bases): """copied from https://github.com/Byron/bcore/blob/master/src/python/butility/future.py#L15""" diff --git a/git/repo/base.py b/git/repo/base.py index a23e767a..64374f80 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -52,12 +52,14 @@ from .fun import ( from git.compat import ( text_type, defenc, - PY3 + PY3, + safe_decode, ) import os import sys import re +from six.moves import range DefaultDBType = GitCmdObjectDB if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity @@ -655,7 +657,64 @@ class Repo(object): :return: Head to the active branch""" return self.head.reference - def blame(self, rev, file): + def blame_incremental(self, rev, file, **kwargs): + """Iterator for blame information for the given file at the given revision. + + Unlike .blame(), this does not return the actual file's contents, only + a stream of (commit, range) tuples. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: lazy iterator of (git.Commit, range) tuples, where the commit + indicates the commit to blame for the line, and range + indicates a span of line numbers in the resulting file. + + If you combine all line number ranges outputted by this command, you + should get a continuous range spanning all line numbers in the file. + """ + data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) + commits = dict() + + stream = iter(data.splitlines()) + while True: + line = next(stream) # when exhausted, casues a StopIteration, terminating this function + + hexsha, _, lineno, num_lines = line.split() + lineno = int(lineno) + num_lines = int(num_lines) + if hexsha not in commits: + # Now read the next few lines and build up a dict of properties + # for this commit + props = dict() + while True: + line = next(stream) + if line == b'boundary': + # "boundary" indicates a root commit and occurs + # instead of the "previous" tag + continue + + tag, value = line.split(b' ', 1) + props[tag] = value + if tag == b'filename': + # "filename" formally terminates the entry for --incremental + break + + c = Commit(self, hex_to_bin(hexsha), + author=Actor(safe_decode(props[b'author']), + safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), + authored_date=int(props[b'author-time']), + committer=Actor(safe_decode(props[b'committer']), + safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))), + committed_date=int(props[b'committer-time']), + message=safe_decode(props[b'summary'])) + commits[hexsha] = c + else: + # Discard the next line (it's a filename end tag) + line = next(stream) + assert line.startswith(b'filename'), 'Unexpected git blame output' + + yield commits[hexsha], range(lineno, lineno + num_lines) + + def blame(self, rev, file, incremental=False): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. @@ -664,6 +723,9 @@ class Repo(object): A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" + if incremental: + return self.blame_incremental(rev, file) + data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() diff --git a/git/test/fixtures/blame_incremental b/git/test/fixtures/blame_incremental new file mode 100644 index 00000000..9a0d9e35 --- /dev/null +++ b/git/test/fixtures/blame_incremental @@ -0,0 +1,30 @@ +82b8902e033430000481eb355733cd7065342037 2 2 1 +author Sebastian Thiel +author-mail <byronimo@gmail.com> +author-time 1270634931 +author-tz +0200 +committer Sebastian Thiel +committer-mail <byronimo@gmail.com> +committer-time 1270634931 +committer-tz +0200 +summary Used this release for a first beta of the 0.2 branch of development +previous 501bf602abea7d21c3dbb409b435976e92033145 AUTHORS +filename AUTHORS +82b8902e033430000481eb355733cd7065342037 14 14 1 +filename AUTHORS +c76852d0bff115720af3f27acdb084c59361e5f6 1 1 1 +author Michael Trier +author-mail <mtrier@gmail.com> +author-time 1232829627 +author-tz -0500 +committer Michael Trier +committer-mail <mtrier@gmail.com> +committer-time 1232829627 +committer-tz -0500 +summary Lots of spring cleaning and added in Sphinx documentation. +previous bcd57e349c08bd7f076f8d6d2f39b702015358c1 AUTHORS +filename AUTHORS +c76852d0bff115720af3f27acdb084c59361e5f6 2 3 11 +filename AUTHORS +c76852d0bff115720af3f27acdb084c59361e5f6 13 15 2 +filename AUTHORS diff --git a/git/test/test_repo.py b/git/test/test_repo.py index 177aa176..ab6c502f 100644 --- a/git/test/test_repo.py +++ b/git/test/test_repo.py @@ -50,6 +50,16 @@ from io import BytesIO from nose import SkipTest +def iter_flatten(lol): + for items in lol: + for item in items: + yield item + + +def flatten(lol): + return list(iter_flatten(lol)) + + class TestRepo(TestBase): @raises(InvalidGitRepositoryError) @@ -324,6 +334,20 @@ class TestRepo(TestBase): assert nml, "There should at least be one blame commit that contains multiple lines" @patch.object(Git, '_call_process') + def test_blame_incremental(self, git): + git.return_value = fixture('blame_incremental') + blame_output = self.rorepo.blame_incremental('9debf6b0aafb6f7781ea9d1383c86939a1aacde3', 'AUTHORS') + blame_output = list(blame_output) + assert len(blame_output) == 5 + + # Check all outputted line numbers + ranges = flatten([line_numbers for _, line_numbers in blame_output]) + assert ranges == flatten([range(2, 3), range(14, 15), range(1, 2), range(3, 14), range(15, 17)]), str(ranges) + + commits = [c.hexsha[:7] for c, _ in blame_output] + assert commits == ['82b8902', '82b8902', 'c76852d', 'c76852d', 'c76852d'], str(commits) + + @patch.object(Git, '_call_process') def test_blame_complex_revision(self, git): git.return_value = fixture('blame_complex_revision') res = self.rorepo.blame("HEAD~10..HEAD", "README.md") |