diff options
| author | Vincent Driessen <me@nvie.com> | 2016-05-30 15:26:23 +0200 | 
|---|---|---|
| committer | Vincent Driessen <me@nvie.com> | 2016-05-30 15:44:46 +0200 | 
| commit | 1faf84f8eb760b003ad2be81432443bf443b82e6 (patch) | |
| tree | e3bde34d58698fb4ae3c2bf1e45830dccd8e724e /git | |
| parent | e836e5cdcc7e3148c388fe8c4a1bab7eeb00cc3f (diff) | |
| download | gitpython-1faf84f8eb760b003ad2be81432443bf443b82e6.tar.gz | |
Fix bug in diff parser outputfix-octal-escaped-path-parser-bug
The diff --patch parser was missing some edge case where Git would
encode non-ASCII chars in path names as octals, but these weren't
decoded properly.
    \360\237\222\251.txt
Decoded via utf-8, that will return:
    💩.txt
Diffstat (limited to 'git')
| -rw-r--r-- | git/diff.py | 17 | ||||
| -rw-r--r-- | git/test/fixtures/diff_patch_unsafe_paths | 7 | ||||
| -rw-r--r-- | git/test/test_diff.py | 13 | 
3 files changed, 29 insertions, 8 deletions
diff --git a/git/diff.py b/git/diff.py index 44a65017..9073767e 100644 --- a/git/diff.py +++ b/git/diff.py @@ -15,12 +15,23 @@ from git.compat import (      PY3  ) -  __all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')  # Special object to compare against the empty tree in diffs  NULL_TREE = object() +_octal_byte_re = re.compile(b'\\\\([0-9]{3})') + + +def _octal_repl(matchobj): +    value = matchobj.group(1) +    value = int(value, 8) +    if PY3: +        value = bytes(bytearray((value,))) +    else: +        value = chr(value) +    return value +  def decode_path(path, has_ab_prefix=True):      if path == b'/dev/null': @@ -32,6 +43,8 @@ def decode_path(path, has_ab_prefix=True):                            .replace(b'\\"', b'"')                            .replace(b'\\\\', b'\\')) +    path = _octal_byte_re.sub(_octal_repl, path) +      if has_ab_prefix:          assert path.startswith(b'a/') or path.startswith(b'b/')          path = path[2:] @@ -337,7 +350,7 @@ class Diff(object):          :note: This property is deprecated, please use ``renamed_file`` instead.          """          return self.renamed_file -         +      @property      def renamed_file(self):          """:returns: True if the blob of our diff has been renamed diff --git a/git/test/fixtures/diff_patch_unsafe_paths b/git/test/fixtures/diff_patch_unsafe_paths index 14375f79..9ee6b834 100644 --- a/git/test/fixtures/diff_patch_unsafe_paths +++ b/git/test/fixtures/diff_patch_unsafe_paths @@ -61,6 +61,13 @@ index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94  +++ "b/path/¯\\_(ツ)_|¯"  @@ -0,0 +1 @@  +dummy content +diff --git "a/path/\360\237\222\251.txt" "b/path/\360\237\222\251.txt" +new file mode 100644 +index 0000000000000000000000000000000000000000..eaf5f7510320b6a327fb308379de2f94d8859a54 +--- /dev/null ++++ "b/path/\360\237\222\251.txt" +@@ -0,0 +1 @@ ++dummy content  diff --git a/a/with spaces b/b/with some spaces  similarity index 100%  rename from a/with spaces diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 1d7a4fda..8966351a 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -161,16 +161,17 @@ class TestDiff(TestBase):          self.assertEqual(res[6].b_path, u'path/with spaces')          self.assertEqual(res[7].b_path, u'path/with-question-mark?')          self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯') +        self.assertEqual(res[9].b_path, u'path/💩.txt')          # The "Moves"          # NOTE: The path prefixes a/ and b/ here are legit!  We're actually          # verifying that it's not "a/a/" that shows up, see the fixture data. -        self.assertEqual(res[9].a_path, u'a/with spaces')       # NOTE: path a/ here legit! -        self.assertEqual(res[9].b_path, u'b/with some spaces')  # NOTE: path b/ here legit! -        self.assertEqual(res[10].a_path, u'a/ending in a space ') -        self.assertEqual(res[10].b_path, u'b/ending with space ') -        self.assertEqual(res[11].a_path, u'a/"with-quotes"') -        self.assertEqual(res[11].b_path, u'b/"with even more quotes"') +        self.assertEqual(res[10].a_path, u'a/with spaces')       # NOTE: path a/ here legit! +        self.assertEqual(res[10].b_path, u'b/with some spaces')  # NOTE: path b/ here legit! +        self.assertEqual(res[11].a_path, u'a/ending in a space ') +        self.assertEqual(res[11].b_path, u'b/ending with space ') +        self.assertEqual(res[12].a_path, u'a/"with-quotes"') +        self.assertEqual(res[12].b_path, u'b/"with even more quotes"')      def test_diff_patch_format(self):          # test all of the 'old' format diffs for completness - it should at least  | 
