From 7fbc182e6d4636f67f44e5893dee3dcedfa90e04 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Tue, 19 Apr 2016 23:41:01 +0200 Subject: Fix diff patch parser for paths with unsafe chars This specifically covers the cases where unsafe chars occur in path names, and git-diff -p will escape those. From the git-diff-tree manpage: > 3. TAB, LF, double quote and backslash characters in pathnames are > represented as \t, \n, \" and \\, respectively. If there is need > for such substitution then the whole pathname is put in double > quotes. This patch checks whether or not this has happened and will unescape those paths accordingly. One thing to note here is that, depending on the position in the patch format, those paths may be prefixed with an a/ or b/. I've specifically made sure to never interpret a path that actually starts with a/ or b/ incorrectly. Example of that subtlety below. Here, the actual file path is "b/normal". On the diff file that gets encoded as "b/b/normal". diff --git a/b/normal b/b/normal new file mode 100644 index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 --- /dev/null +++ b/b/normal @@ -0,0 +1 @@ +dummy content Here, we prefer the "---" and "+++" lines' values. Note that these paths start with a/ or b/. The only exception is the value "/dev/null", which is handled as a special case. Suppose now the file gets moved "b/moved", the output of that diff would then be this: diff --git a/b/normal b/b/moved similarity index 100% rename from b/normal rename to b/moved We prefer the "rename" lines' values in this case (the "diff" line is always a last resort). Take note that those lines are not prefixed with a/ or b/, but the ones in the "diff" line are (just like the ones in "---" or "+++" lines). --- git/test/test_diff.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'git/test/test_diff.py') diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 0c670f0b..7bca0c2a 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -1,4 +1,4 @@ -#-*-coding:utf-8-*- +# coding: utf-8 # test_diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # @@ -145,12 +145,37 @@ class TestDiff(TestBase): assert diff_index[0].new_file assert diff_index[0].diff == fixture('diff_initial') + def test_diff_unsafe_paths(self): + output = StringProcessAdapter(fixture('diff_patch_unsafe_paths')) + res = Diff._index_from_patch_format(None, output.stdout) + + # The "Additions" + self.assertEqual(res[0].b_path, u'path/ starting with a space') + self.assertEqual(res[1].b_path, u'path/"with-quotes"') + self.assertEqual(res[2].b_path, u"path/'with-single-quotes'") + self.assertEqual(res[3].b_path, u'path/ending in a space ') + self.assertEqual(res[4].b_path, u'path/with\ttab') + self.assertEqual(res[5].b_path, u'path/with\nnewline') + self.assertEqual(res[6].b_path, u'path/with spaces') + self.assertEqual(res[7].b_path, u'path/with-question-mark?') + self.assertEqual(res[8].b_path, ur'path/¯\_(ツ)_|¯') + + # The "Moves" + # NOTE: The path prefixes a/ and b/ here are legit! We're actually + # verifying that it's not "a/a/" that shows up, see the fixture data. + self.assertEqual(res[9].a_path, u'a/with spaces') # NOTE: path a/ here legit! + self.assertEqual(res[9].b_path, u'b/with some spaces') # NOTE: path b/ here legit! + self.assertEqual(res[10].a_path, u'a/ending in a space ') + self.assertEqual(res[10].b_path, u'b/ending with space ') + self.assertEqual(res[11].a_path, u'a/"with-quotes"') + self.assertEqual(res[11].b_path, u'b/"with even more quotes"') + def test_diff_patch_format(self): # test all of the 'old' format diffs for completness - it should at least # be able to deal with it fixtures = ("diff_2", "diff_2f", "diff_f", "diff_i", "diff_mode_only", "diff_new_mode", "diff_numstat", "diff_p", "diff_rename", - "diff_tree_numstat_root") + "diff_tree_numstat_root", "diff_patch_unsafe_paths") for fixture_name in fixtures: diff_proc = StringProcessAdapter(fixture(fixture_name)) -- cgit v1.2.1 From 19099f9ce7e8d6cb1f5cafae318859be8c082ca2 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Wed, 20 Apr 2016 00:07:22 +0200 Subject: Python 3 compat fixes Specifically "string_escape" does not exist as an encoding anymore. --- git/test/test_diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git/test/test_diff.py') diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 7bca0c2a..858b3994 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -158,7 +158,7 @@ class TestDiff(TestBase): self.assertEqual(res[5].b_path, u'path/with\nnewline') self.assertEqual(res[6].b_path, u'path/with spaces') self.assertEqual(res[7].b_path, u'path/with-question-mark?') - self.assertEqual(res[8].b_path, ur'path/¯\_(ツ)_|¯') + self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') # The "Moves" # NOTE: The path prefixes a/ and b/ here are legit! We're actually -- cgit v1.2.1