summaryrefslogtreecommitdiff
path: root/git/repo/base.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-09 12:49:03 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-09 12:49:03 +0100
commit17f5d13a7a741dcbb2a30e147bdafe929cff4697 (patch)
tree7794ddd8ace09c62627bb1639656f410267718b7 /git/repo/base.py
parent1531d789df97dbf1ed3f5b0340bbf39918d9fe48 (diff)
downloadgitpython-17f5d13a7a741dcbb2a30e147bdafe929cff4697.tar.gz
Added test to assure blame can deal with binary patches.
Fixes #74
Diffstat (limited to 'git/repo/base.py')
-rw-r--r--git/repo/base.py36
1 files changed, 29 insertions, 7 deletions
diff --git a/git/repo/base.py b/git/repo/base.py
index a84f617d..dbca4697 100644
--- a/git/repo/base.py
+++ b/git/repo/base.py
@@ -587,14 +587,28 @@ class Repo(object):
A list of tuples associating a Commit object with a list of lines that
changed within the given commit. The Commit objects will be given in order
of appearance."""
- data = self.git.blame(rev, '--', file, p=True)
+ data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
commits = dict()
blames = list()
info = None
- for line in data.splitlines(False):
- parts = self.re_whitespace.split(line, 1)
- firstpart = parts[0]
+ keepends = True
+ for line in data.splitlines(keepends):
+ try:
+ line = line.rstrip().decode(defenc)
+ except UnicodeDecodeError:
+ firstpart = ''
+ is_binary = True
+ else:
+ # As we don't have an idea when the binary data ends, as it could contain multiple newlines
+ # in the process. So we rely on being able to decode to tell us what is is.
+ # This can absolutely fail even on text files, but even if it does, we should be fine treating it
+ # as binary instead
+ parts = self.re_whitespace.split(line, 1)
+ firstpart = parts[0]
+ is_binary = False
+ # end handle decode of line
+
if self.re_hexsha_only.search(firstpart):
# handles
# 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start
@@ -651,10 +665,18 @@ class Repo(object):
message=info['summary'])
commits[sha] = c
# END if commit objects needs initial creation
- m = self.re_tab_full_line.search(line)
- text, = m.groups()
+ if not is_binary:
+ if line and line[0] == '\t':
+ line = line[1:]
+ else:
+ # NOTE: We are actually parsing lines out of binary data, which can lead to the
+ # binary being split up along the newline separator. We will append this to the blame
+ # we are currently looking at, even though it should be concatenated with the last line
+ # we have seen.
+ pass
+ # end handle line contents
blames[-1][0] = c
- blames[-1][1].append(text)
+ blames[-1][1].append(line)
info = {'id': sha}
# END if we collected commit info
# END distinguish filename,summary,rest