diff options
author | Vincent Driessen <me@nvie.com> | 2016-05-30 15:26:23 +0200 |
---|---|---|
committer | Vincent Driessen <me@nvie.com> | 2016-05-30 15:44:46 +0200 |
commit | 1faf84f8eb760b003ad2be81432443bf443b82e6 (patch) | |
tree | e3bde34d58698fb4ae3c2bf1e45830dccd8e724e /git/diff.py | |
parent | e836e5cdcc7e3148c388fe8c4a1bab7eeb00cc3f (diff) | |
download | gitpython-fix-octal-escaped-path-parser-bug.tar.gz |
Fix bug in diff parser outputfix-octal-escaped-path-parser-bug
The diff --patch parser was missing some edge case where Git would
encode non-ASCII chars in path names as octals, but these weren't
decoded properly.
\360\237\222\251.txt
Decoded via utf-8, that will return:
💩.txt
Diffstat (limited to 'git/diff.py')
-rw-r--r-- | git/diff.py | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/git/diff.py b/git/diff.py index 44a65017..9073767e 100644 --- a/git/diff.py +++ b/git/diff.py @@ -15,12 +15,23 @@ from git.compat import ( PY3 ) - __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE') # Special object to compare against the empty tree in diffs NULL_TREE = object() +_octal_byte_re = re.compile(b'\\\\([0-9]{3})') + + +def _octal_repl(matchobj): + value = matchobj.group(1) + value = int(value, 8) + if PY3: + value = bytes(bytearray((value,))) + else: + value = chr(value) + return value + def decode_path(path, has_ab_prefix=True): if path == b'/dev/null': @@ -32,6 +43,8 @@ def decode_path(path, has_ab_prefix=True): .replace(b'\\"', b'"') .replace(b'\\\\', b'\\')) + path = _octal_byte_re.sub(_octal_repl, path) + if has_ab_prefix: assert path.startswith(b'a/') or path.startswith(b'b/') path = path[2:] @@ -337,7 +350,7 @@ class Diff(object): :note: This property is deprecated, please use ``renamed_file`` instead. """ return self.renamed_file - + @property def renamed_file(self): """:returns: True if the blob of our diff has been renamed |