From c9b44d29d656e92bb08fa41bcc2c31b2a2a2607b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 11 Jan 2023 19:13:49 +0100 Subject: fix files list on file rename GitPython parses the output of `git diff --numstat` to get the files changed in a commit. This breaks when a commit contains a file rename, because the output of `git diff` is different than expected. This is the output of a normal commit: $ git diff --numstat 8f41a390bf9a^ 8f41a390bf9a 30 5 test/test_repo.py And this a commit containing a rename: $ git diff --numstat 185d847ec764^ 185d847ec764 3 1 .github/workflows/{test_pytest.yml => Future.yml} This can be triggered by this code: for commit in repo.iter_commits(): print(commit.hexsha) for file in commit.stats.files: print(file) Which will print for the normal commit: 8f41a390bf9a54db6f85032bc56b453307b95451 'test/test_repo.py' And when there is a rename: 185d847ec7647fd2642a82d9205fb3d07ea71715 '.github/workflows/{test_pytest.yml => Future.yml}' Additionally, when a path member is removed, the file list become a list of strings, breaking even more the caller. This is in the Linux kernel tree: $ git diff --numstat db401875f438^ db401875f438 1 1 tools/testing/selftests/drivers/net/mlxsw/{spectrum-2 => }/devlink_trap_tunnel_ipip6.sh and GitPython parses it as: db401875f438168c5804b295b93a28c7730bb57a ('tools/testing/selftests/drivers/net/mlxsw/{spectrum-2 => ' '}/devlink_trap_tunnel_ipip6.sh') Fix this by pasing the --no-renames option to `git diff` which ignores renames and print the same output as if the file was deleted from the old path and created in the new one: $ git diff --numstat --no-renames 185d847ec764^ 185d847ec764 57 0 .github/workflows/Future.yml 0 55 .github/workflows/test_pytest.yml --- git/objects/commit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'git/objects/commit.py') diff --git a/git/objects/commit.py b/git/objects/commit.py index 82d2387b..547e8fe8 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -324,14 +324,14 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): :return: git.Stats""" if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, root=True) + text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, no_renames=True, root=True) text2 = "" for line in text.splitlines()[1:]: (insertions, deletions, filename) = line.split("\t") text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) text = text2 else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True) + text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) return Stats._list_from_string(self.repo, text) @property -- cgit v1.2.1