From e5b8220a1a967abdf2bae2124e3e22a9eea3729f Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Wed, 13 Apr 2016 14:52:18 +0200 Subject: Add incremental blame support This adds a sibling method to Repo's blame method: Repo.blame_incremental(rev, path, **kwargs) This can alternatively be called using: Repo.blame(rev, path, incremental=True) The main difference is that blame incremental is a bit more efficient and does not return the full file's contents, just the commits and the line number ranges. The parser is a bit more straight-forward and faster since the incremental output format is defined a little stricter. --- git/repo/base.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) (limited to 'git/repo/base.py') diff --git a/git/repo/base.py b/git/repo/base.py index a23e767a..64374f80 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -52,12 +52,14 @@ from .fun import ( from git.compat import ( text_type, defenc, - PY3 + PY3, + safe_decode, ) import os import sys import re +from six.moves import range DefaultDBType = GitCmdObjectDB if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity @@ -655,7 +657,64 @@ class Repo(object): :return: Head to the active branch""" return self.head.reference - def blame(self, rev, file): + def blame_incremental(self, rev, file, **kwargs): + """Iterator for blame information for the given file at the given revision. + + Unlike .blame(), this does not return the actual file's contents, only + a stream of (commit, range) tuples. + + :parm rev: revision specifier, see git-rev-parse for viable options. + :return: lazy iterator of (git.Commit, range) tuples, where the commit + indicates the commit to blame for the line, and range + indicates a span of line numbers in the resulting file. + + If you combine all line number ranges outputted by this command, you + should get a continuous range spanning all line numbers in the file. + """ + data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) + commits = dict() + + stream = iter(data.splitlines()) + while True: + line = next(stream) # when exhausted, casues a StopIteration, terminating this function + + hexsha, _, lineno, num_lines = line.split() + lineno = int(lineno) + num_lines = int(num_lines) + if hexsha not in commits: + # Now read the next few lines and build up a dict of properties + # for this commit + props = dict() + while True: + line = next(stream) + if line == b'boundary': + # "boundary" indicates a root commit and occurs + # instead of the "previous" tag + continue + + tag, value = line.split(b' ', 1) + props[tag] = value + if tag == b'filename': + # "filename" formally terminates the entry for --incremental + break + + c = Commit(self, hex_to_bin(hexsha), + author=Actor(safe_decode(props[b'author']), + safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), + authored_date=int(props[b'author-time']), + committer=Actor(safe_decode(props[b'committer']), + safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))), + committed_date=int(props[b'committer-time']), + message=safe_decode(props[b'summary'])) + commits[hexsha] = c + else: + # Discard the next line (it's a filename end tag) + line = next(stream) + assert line.startswith(b'filename'), 'Unexpected git blame output' + + yield commits[hexsha], range(lineno, lineno + num_lines) + + def blame(self, rev, file, incremental=False): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. @@ -664,6 +723,9 @@ class Repo(object): A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" + if incremental: + return self.blame_incremental(rev, file) + data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() -- cgit v1.2.1