From e77128e5344ce7d84302facc08d17c3151037ec3 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Thu, 14 Apr 2016 21:27:39 +0200 Subject: Make diff patch parsing more reliable The a_path and b_path cannot reliably be read from the first diff line as it's ambiguous. From the git-diff manpage: > The a/ and b/ filenames are the same unless rename/copy is involved. > Especially, **even for a creation or a deletion**, /dev/null is not > used in place of the a/ or b/ filenames. This patch changes the a_path and b_path detection to read it from the more reliable locations further down the diff headers. Two use cases are fixed by this: - As the man page snippet above states, for new/deleted files the a or b path will now be properly None. - File names with spaces in it are now properly parsed. Working on this patch, I realized the --- and +++ lines really belong to the diff header, not the diff contents. This means that when parsing the patch format, the --- and +++ will now be swallowed, and not end up anymore as part of the diff contents. The diff contents now always start with an @@ line. This may be a breaking change for some users that rely on this behaviour. However, those users could now access that information more reliably via the normal Diff properties a_path and b_path now. --- git/test/test_diff.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'git/test/test_diff.py') diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 56e395fd..05f8f3ae 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -76,7 +76,7 @@ class TestDiff(TestBase): self._assert_diff_format(diffs) assert_equal(1, len(diffs)) - assert_equal(10, len(diffs[0].diff.splitlines())) + assert_equal(8, len(diffs[0].diff.splitlines())) def test_diff_with_rename(self): output = StringProcessAdapter(fixture('diff_rename')) @@ -140,7 +140,8 @@ class TestDiff(TestBase): # ...and with creating a patch diff_index = initial_commit.diff(NULL_TREE, create_patch=True) - assert diff_index[0].b_path == 'CHANGES' + assert diff_index[0].a_path is None, repr(diff_index[0].a_path) + assert diff_index[0].b_path == 'CHANGES', repr(diff_index[0].b_path) assert diff_index[0].new_file assert diff_index[0].diff == fixture('diff_initial') @@ -156,6 +157,12 @@ class TestDiff(TestBase): Diff._index_from_patch_format(self.rorepo, diff_proc.stdout) # END for each fixture + def test_diff_with_spaces(self): + data = StringProcessAdapter(fixture('diff_file_with_spaces')) + diff_index = Diff._index_from_patch_format(self.rorepo, data.stdout) + assert diff_index[0].a_path is None, repr(diff_index[0].a_path) + assert diff_index[0].b_path == u'file with spaces', repr(diff_index[0].b_path) + def test_diff_interface(self): # test a few variations of the main diff routine assertion_map = dict() -- cgit v1.2.1 From 2090b5487e69688be61cfbb97c346c452ab45ba2 Mon Sep 17 00:00:00 2001 From: Vincent Driessen Date: Fri, 15 Apr 2016 02:18:12 +0200 Subject: Make test stricter --- git/test/test_diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git/test/test_diff.py') diff --git a/git/test/test_diff.py b/git/test/test_diff.py index 05f8f3ae..0c670f0b 100644 --- a/git/test/test_diff.py +++ b/git/test/test_diff.py @@ -116,7 +116,7 @@ class TestDiff(TestBase): res = Diff._index_from_patch_format(None, output.stdout) assert len(res) == 6 for dr in res: - assert dr.diff + assert dr.diff.startswith(b'@@') assert str(dr), "Diff to string conversion should be possible" # end for each diff -- cgit v1.2.1