diff options
author | Vincent Driessen <me@nvie.com> | 2016-04-19 23:41:01 +0200 |
---|---|---|
committer | Vincent Driessen <me@nvie.com> | 2016-04-19 23:46:54 +0200 |
commit | 7fbc182e6d4636f67f44e5893dee3dcedfa90e04 (patch) | |
tree | 2e761da1ed007fbbeb1668eaf97c4d09be1d6920 /git/diff.py | |
parent | 504870e633eeb5fc1bd7c33b8dde0eb62a5b2d12 (diff) | |
download | gitpython-7fbc182e6d4636f67f44e5893dee3dcedfa90e04.tar.gz |
Fix diff patch parser for paths with unsafe chars
This specifically covers the cases where unsafe chars occur in path
names, and git-diff -p will escape those.
From the git-diff-tree manpage:
> 3. TAB, LF, double quote and backslash characters in pathnames are
> represented as \t, \n, \" and \\, respectively. If there is need
> for such substitution then the whole pathname is put in double
> quotes.
This patch checks whether or not this has happened and will unescape
those paths accordingly.
One thing to note here is that, depending on the position in the patch
format, those paths may be prefixed with an a/ or b/. I've specifically
made sure to never interpret a path that actually starts with a/ or b/
incorrectly.
Example of that subtlety below. Here, the actual file path is
"b/normal". On the diff file that gets encoded as "b/b/normal".
diff --git a/b/normal b/b/normal
new file mode 100644
index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54
--- /dev/null
+++ b/b/normal
@@ -0,0 +1 @@
+dummy content
Here, we prefer the "---" and "+++" lines' values. Note that these
paths start with a/ or b/. The only exception is the value "/dev/null",
which is handled as a special case.
Suppose now the file gets moved "b/moved", the output of that diff would
then be this:
diff --git a/b/normal b/b/moved
similarity index 100%
rename from b/normal
rename to b/moved
We prefer the "rename" lines' values in this case (the "diff" line is
always a last resort). Take note that those lines are not prefixed with
a/ or b/, but the ones in the "diff" line are (just like the ones in
"---" or "+++" lines).
Diffstat (limited to 'git/diff.py')
-rw-r--r-- | git/diff.py | 47 |
1 files changed, 34 insertions, 13 deletions
diff --git a/git/diff.py b/git/diff.py index d4affd30..a7e7411d 100644 --- a/git/diff.py +++ b/git/diff.py @@ -22,6 +22,20 @@ __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE') NULL_TREE = object() +def decode_path(path, has_ab_prefix=True): + if path == b'/dev/null': + return None + + if path.startswith(b'"') and path.endswith(b'"'): + path = path[1:-1].decode('string_escape') + + if has_ab_prefix: + assert path.startswith(b'a/') or path.startswith(b'b/') + path = path[2:] + + return path + + class Diffable(object): """Common interface for all object that can be diffed against another object of compatible type. @@ -196,9 +210,9 @@ class Diff(object): be different to the version in the index or tree, and hence has been modified.""" # precompiled regex - re_header = re.compile(r""" + re_header = re.compile(br""" ^diff[ ]--git - [ ](?:a/)?(?P<a_path_fallback>.+?)[ ](?:b/)?(?P<b_path_fallback>.+?)\n + [ ](?P<a_path_fallback>"?a/.+?"?)[ ](?P<b_path_fallback>"?b/.+?"?)\n (?:^old[ ]mode[ ](?P<old_mode>\d+)\n ^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))? (?:^similarity[ ]index[ ]\d+%\n @@ -208,9 +222,9 @@ class Diff(object): (?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))? (?:^index[ ](?P<a_blob_id>[0-9A-Fa-f]+) \.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))? - (?:^---[ ](?:a/)?(?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - (?:^\+\+\+[ ](?:b/)?(?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? - """.encode('ascii'), re.VERBOSE | re.MULTILINE) + (?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + (?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))? + """, re.VERBOSE | re.MULTILINE) # can be used for comparisons NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 @@ -320,6 +334,19 @@ class Diff(object): return self.rename_from != self.rename_to @classmethod + def _pick_best_path(cls, path_match, rename_match, path_fallback_match): + if path_match: + return decode_path(path_match) + + if rename_match: + return decode_path(rename_match, has_ab_prefix=False) + + if path_fallback_match: + return decode_path(path_fallback_match) + + return None + + @classmethod def _index_from_patch_format(cls, repo, stream): """Create a new DiffIndex from the given text which must be in patch format :param repo: is the repository we are operating on - it is required @@ -338,14 +365,8 @@ class Diff(object): a_path, b_path = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - a_path = a_path or rename_from or a_path_fallback - b_path = b_path or rename_to or b_path_fallback - - if a_path == b'/dev/null': - a_path = None - - if b_path == b'/dev/null': - b_path = None + a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) + b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) # Our only means to find the actual text is to see what has not been matched by our regex, # and then retro-actively assin it to our index |