diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2015-01-09 12:49:03 +0100 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2015-01-09 12:49:03 +0100 |
commit | 17f5d13a7a741dcbb2a30e147bdafe929cff4697 (patch) | |
tree | 7794ddd8ace09c62627bb1639656f410267718b7 /git/repo/base.py | |
parent | 1531d789df97dbf1ed3f5b0340bbf39918d9fe48 (diff) | |
download | gitpython-17f5d13a7a741dcbb2a30e147bdafe929cff4697.tar.gz |
Added test to assure blame can deal with binary patches.
Fixes #74
Diffstat (limited to 'git/repo/base.py')
-rw-r--r-- | git/repo/base.py | 36 |
1 files changed, 29 insertions, 7 deletions
diff --git a/git/repo/base.py b/git/repo/base.py index a84f617d..dbca4697 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -587,14 +587,28 @@ class Repo(object): A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" - data = self.git.blame(rev, '--', file, p=True) + data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None - for line in data.splitlines(False): - parts = self.re_whitespace.split(line, 1) - firstpart = parts[0] + keepends = True + for line in data.splitlines(keepends): + try: + line = line.rstrip().decode(defenc) + except UnicodeDecodeError: + firstpart = '' + is_binary = True + else: + # As we don't have an idea when the binary data ends, as it could contain multiple newlines + # in the process. So we rely on being able to decode to tell us what is is. + # This can absolutely fail even on text files, but even if it does, we should be fine treating it + # as binary instead + parts = self.re_whitespace.split(line, 1) + firstpart = parts[0] + is_binary = False + # end handle decode of line + if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start @@ -651,10 +665,18 @@ class Repo(object): message=info['summary']) commits[sha] = c # END if commit objects needs initial creation - m = self.re_tab_full_line.search(line) - text, = m.groups() + if not is_binary: + if line and line[0] == '\t': + line = line[1:] + else: + # NOTE: We are actually parsing lines out of binary data, which can lead to the + # binary being split up along the newline separator. We will append this to the blame + # we are currently looking at, even though it should be concatenated with the last line + # we have seen. + pass + # end handle line contents blames[-1][0] = c - blames[-1][1].append(text) + blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest |