summaryrefslogtreecommitdiff
path: root/git/repo/base.py
diff options
context:
space:
mode:
authorVincent Driessen <me@nvie.com>2016-04-13 14:52:18 +0200
committerVincent Driessen <me@nvie.com>2016-04-13 16:44:17 +0200
commite5b8220a1a967abdf2bae2124e3e22a9eea3729f (patch)
tree4d10bd3b123dd8321d6024665e323c00fa86b011 /git/repo/base.py
parent9debf6b0aafb6f7781ea9d1383c86939a1aacde3 (diff)
downloadgitpython-e5b8220a1a967abdf2bae2124e3e22a9eea3729f.tar.gz
Add incremental blame support
This adds a sibling method to Repo's blame method: Repo.blame_incremental(rev, path, **kwargs) This can alternatively be called using: Repo.blame(rev, path, incremental=True) The main difference is that blame incremental is a bit more efficient and does not return the full file's contents, just the commits and the line number ranges. The parser is a bit more straight-forward and faster since the incremental output format is defined a little stricter.
Diffstat (limited to 'git/repo/base.py')
-rw-r--r--git/repo/base.py66
1 files changed, 64 insertions, 2 deletions
diff --git a/git/repo/base.py b/git/repo/base.py
index a23e767a..64374f80 100644
--- a/git/repo/base.py
+++ b/git/repo/base.py
@@ -52,12 +52,14 @@ from .fun import (
from git.compat import (
text_type,
defenc,
- PY3
+ PY3,
+ safe_decode,
)
import os
import sys
import re
+from six.moves import range
DefaultDBType = GitCmdObjectDB
if sys.version_info[:2] < (2, 5): # python 2.4 compatiblity
@@ -655,7 +657,64 @@ class Repo(object):
:return: Head to the active branch"""
return self.head.reference
- def blame(self, rev, file):
+ def blame_incremental(self, rev, file, **kwargs):
+ """Iterator for blame information for the given file at the given revision.
+
+ Unlike .blame(), this does not return the actual file's contents, only
+ a stream of (commit, range) tuples.
+
+ :parm rev: revision specifier, see git-rev-parse for viable options.
+ :return: lazy iterator of (git.Commit, range) tuples, where the commit
+ indicates the commit to blame for the line, and range
+ indicates a span of line numbers in the resulting file.
+
+ If you combine all line number ranges outputted by this command, you
+ should get a continuous range spanning all line numbers in the file.
+ """
+ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs)
+ commits = dict()
+
+ stream = iter(data.splitlines())
+ while True:
+ line = next(stream) # when exhausted, casues a StopIteration, terminating this function
+
+ hexsha, _, lineno, num_lines = line.split()
+ lineno = int(lineno)
+ num_lines = int(num_lines)
+ if hexsha not in commits:
+ # Now read the next few lines and build up a dict of properties
+ # for this commit
+ props = dict()
+ while True:
+ line = next(stream)
+ if line == b'boundary':
+ # "boundary" indicates a root commit and occurs
+ # instead of the "previous" tag
+ continue
+
+ tag, value = line.split(b' ', 1)
+ props[tag] = value
+ if tag == b'filename':
+ # "filename" formally terminates the entry for --incremental
+ break
+
+ c = Commit(self, hex_to_bin(hexsha),
+ author=Actor(safe_decode(props[b'author']),
+ safe_decode(props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
+ authored_date=int(props[b'author-time']),
+ committer=Actor(safe_decode(props[b'committer']),
+ safe_decode(props[b'committer-mail'].lstrip(b'<').rstrip(b'>'))),
+ committed_date=int(props[b'committer-time']),
+ message=safe_decode(props[b'summary']))
+ commits[hexsha] = c
+ else:
+ # Discard the next line (it's a filename end tag)
+ line = next(stream)
+ assert line.startswith(b'filename'), 'Unexpected git blame output'
+
+ yield commits[hexsha], range(lineno, lineno + num_lines)
+
+ def blame(self, rev, file, incremental=False):
"""The blame information for the given file at the given revision.
:parm rev: revision specifier, see git-rev-parse for viable options.
@@ -664,6 +723,9 @@ class Repo(object):
A list of tuples associating a Commit object with a list of lines that
changed within the given commit. The Commit objects will be given in order
of appearance."""
+ if incremental:
+ return self.blame_incremental(rev, file)
+
data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
commits = dict()
blames = list()