summaryrefslogtreecommitdiff
path: root/git/diff.py
diff options
context:
space:
mode:
authorSebastian Thiel <sebastian.thiel@icloud.com>2022-05-18 07:43:53 +0800
committerSebastian Thiel <sebastian.thiel@icloud.com>2022-05-18 07:43:53 +0800
commit21ec529987d10e0010badd37f8da3274167d436f (patch)
treea3394cfe902ce7edd07c89420c21c13274a2d295 /git/diff.py
parentb30720ee4d9762a03eae4fa7cfa4b0190d81784d (diff)
downloadgitpython-21ec529987d10e0010badd37f8da3274167d436f.tar.gz
Run everything through 'black'
That way people who use it won't be deterred, while it unifies style everywhere.
Diffstat (limited to 'git/diff.py')
-rw-r--r--git/diff.py377
1 files changed, 248 insertions, 129 deletions
diff --git a/git/diff.py b/git/diff.py
index cea66d7e..6526ed68 100644
--- a/git/diff.py
+++ b/git/diff.py
@@ -15,7 +15,19 @@ from .objects.util import mode_str_to_int
# typing ------------------------------------------------------------------
-from typing import Any, Iterator, List, Match, Optional, Tuple, Type, TypeVar, Union, TYPE_CHECKING, cast
+from typing import (
+ Any,
+ Iterator,
+ List,
+ Match,
+ Optional,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+ TYPE_CHECKING,
+ cast,
+)
from git.types import PathLike, Literal
if TYPE_CHECKING:
@@ -26,7 +38,7 @@ if TYPE_CHECKING:
from subprocess import Popen
from git import Git
-Lit_change_type = Literal['A', 'D', 'C', 'M', 'R', 'T', 'U']
+Lit_change_type = Literal["A", "D", "C", "M", "R", "T", "U"]
# def is_change_type(inp: str) -> TypeGuard[Lit_change_type]:
@@ -36,12 +48,12 @@ Lit_change_type = Literal['A', 'D', 'C', 'M', 'R', 'T', 'U']
# ------------------------------------------------------------------------
-__all__ = ('Diffable', 'DiffIndex', 'Diff', 'NULL_TREE')
+__all__ = ("Diffable", "DiffIndex", "Diff", "NULL_TREE")
# Special object to compare against the empty tree in diffs
NULL_TREE = object()
-_octal_byte_re = re.compile(b'\\\\([0-9]{3})')
+_octal_byte_re = re.compile(b"\\\\([0-9]{3})")
def _octal_repl(matchobj: Match) -> bytes:
@@ -52,19 +64,22 @@ def _octal_repl(matchobj: Match) -> bytes:
def decode_path(path: bytes, has_ab_prefix: bool = True) -> Optional[bytes]:
- if path == b'/dev/null':
+ if path == b"/dev/null":
return None
if path.startswith(b'"') and path.endswith(b'"'):
- path = (path[1:-1].replace(b'\\n', b'\n')
- .replace(b'\\t', b'\t')
- .replace(b'\\"', b'"')
- .replace(b'\\\\', b'\\'))
+ path = (
+ path[1:-1]
+ .replace(b"\\n", b"\n")
+ .replace(b"\\t", b"\t")
+ .replace(b'\\"', b'"')
+ .replace(b"\\\\", b"\\")
+ )
path = _octal_byte_re.sub(_octal_repl, path)
if has_ab_prefix:
- assert path.startswith(b'a/') or path.startswith(b'b/')
+ assert path.startswith(b"a/") or path.startswith(b"b/")
path = path[2:]
return path
@@ -77,14 +92,16 @@ class Diffable(object):
:note:
Subclasses require a repo member as it is the case for Object instances, for practical
reasons we do not derive from Object."""
+
__slots__ = ()
# standin indicating you want to diff against the index
class Index(object):
pass
- def _process_diff_args(self, args: List[Union[str, 'Diffable', Type['Diffable.Index'], object]]
- ) -> List[Union[str, 'Diffable', Type['Diffable.Index'], object]]:
+ def _process_diff_args(
+ self, args: List[Union[str, "Diffable", Type["Diffable.Index"], object]]
+ ) -> List[Union[str, "Diffable", Type["Diffable.Index"], object]]:
"""
:return:
possibly altered version of the given args list.
@@ -92,9 +109,13 @@ class Diffable(object):
Subclasses can use it to alter the behaviour of the superclass"""
return args
- def diff(self, other: Union[Type['Index'], 'Tree', 'Commit', None, str, object] = Index,
- paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None,
- create_patch: bool = False, **kwargs: Any) -> 'DiffIndex':
+ def diff(
+ self,
+ other: Union[Type["Index"], "Tree", "Commit", None, str, object] = Index,
+ paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None,
+ create_patch: bool = False,
+ **kwargs: Any
+ ) -> "DiffIndex":
"""Creates diffs between two items being trees, trees and index or an
index and the working tree. It will detect renames automatically.
@@ -125,11 +146,11 @@ class Diffable(object):
:note:
On a bare repository, 'other' needs to be provided as Index or as
as Tree/Commit, or a git command error will occur"""
- args: List[Union[PathLike, Diffable, Type['Diffable.Index'], object]] = []
- args.append("--abbrev=40") # we need full shas
- args.append("--full-index") # get full index paths, not only filenames
+ args: List[Union[PathLike, Diffable, Type["Diffable.Index"], object]] = []
+ args.append("--abbrev=40") # we need full shas
+ args.append("--full-index") # get full index paths, not only filenames
- args.append("-M") # check for renames, in both formats
+ args.append("-M") # check for renames, in both formats
if create_patch:
args.append("-p")
else:
@@ -138,23 +159,23 @@ class Diffable(object):
# in any way, assure we don't see colored output,
# fixes https://github.com/gitpython-developers/GitPython/issues/172
- args.append('--no-color')
+ args.append("--no-color")
if paths is not None and not isinstance(paths, (tuple, list)):
paths = [paths]
- if hasattr(self, 'Has_Repo'):
- self.repo: 'Repo' = self.repo
+ if hasattr(self, "Has_Repo"):
+ self.repo: "Repo" = self.repo
diff_cmd = self.repo.git.diff
if other is self.Index:
- args.insert(0, '--cached')
+ args.insert(0, "--cached")
elif other is NULL_TREE:
- args.insert(0, '-r') # recursive diff-tree
- args.insert(0, '--root')
+ args.insert(0, "-r") # recursive diff-tree
+ args.insert(0, "--root")
diff_cmd = self.repo.git.diff_tree
elif other is not None:
- args.insert(0, '-r') # recursive diff-tree
+ args.insert(0, "-r") # recursive diff-tree
args.insert(0, other)
diff_cmd = self.repo.git.diff_tree
@@ -166,19 +187,21 @@ class Diffable(object):
args.extend(paths)
# END paths handling
- kwargs['as_process'] = True
+ kwargs["as_process"] = True
proc = diff_cmd(*self._process_diff_args(args), **kwargs)
- diff_method = (Diff._index_from_patch_format
- if create_patch
- else Diff._index_from_raw_format)
+ diff_method = (
+ Diff._index_from_patch_format
+ if create_patch
+ else Diff._index_from_raw_format
+ )
index = diff_method(self.repo, proc)
proc.wait()
return index
-T_Diff = TypeVar('T_Diff', bound='Diff')
+T_Diff = TypeVar("T_Diff", bound="Diff")
class DiffIndex(List[T_Diff]):
@@ -187,6 +210,7 @@ class DiffIndex(List[T_Diff]):
the diff properties.
The class improves the diff handling convenience"""
+
# change type invariant identifying possible ways a blob can have changed
# A = Added
# D = Deleted
@@ -208,7 +232,7 @@ class DiffIndex(List[T_Diff]):
* 'R' for renamed paths
* 'M' for paths with modified data
* 'T' for changed in the type paths
- """
+ """
if change_type not in self.change_type:
raise ValueError("Invalid change type: %s" % change_type)
@@ -223,7 +247,12 @@ class DiffIndex(List[T_Diff]):
yield diffidx
elif change_type == "R" and diffidx.renamed:
yield diffidx
- elif change_type == "M" and diffidx.a_blob and diffidx.b_blob and diffidx.a_blob != diffidx.b_blob:
+ elif (
+ change_type == "M"
+ and diffidx.a_blob
+ and diffidx.b_blob
+ and diffidx.a_blob != diffidx.b_blob
+ ):
yield diffidx
# END for each diff
@@ -261,7 +290,8 @@ class Diff(object):
be different to the version in the index or tree, and hence has been modified."""
# precompiled regex
- re_header = re.compile(br"""
+ re_header = re.compile(
+ rb"""
^diff[ ]--git
[ ](?P<a_path_fallback>"?[ab]/.+?"?)[ ](?P<b_path_fallback>"?[ab]/.+?"?)\n
(?:^old[ ]mode[ ](?P<old_mode>\d+)\n
@@ -278,22 +308,48 @@ class Diff(object):
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
(?:^---[ ](?P<a_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
(?:^\+\+\+[ ](?P<b_path>[^\t\n\r\f\v]*)[\t\r\f\v]*(?:\n|$))?
- """, re.VERBOSE | re.MULTILINE)
+ """,
+ re.VERBOSE | re.MULTILINE,
+ )
# can be used for comparisons
NULL_HEX_SHA = "0" * 40
NULL_BIN_SHA = b"\0" * 20
- __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
- "new_file", "deleted_file", "copied_file", "raw_rename_from",
- "raw_rename_to", "diff", "change_type", "score")
-
- def __init__(self, repo: 'Repo',
- a_rawpath: Optional[bytes], b_rawpath: Optional[bytes],
- a_blob_id: Union[str, bytes, None], b_blob_id: Union[str, bytes, None],
- a_mode: Union[bytes, str, None], b_mode: Union[bytes, str, None],
- new_file: bool, deleted_file: bool, copied_file: bool,
- raw_rename_from: Optional[bytes], raw_rename_to: Optional[bytes],
- diff: Union[str, bytes, None], change_type: Optional[Lit_change_type], score: Optional[int]) -> None:
+ __slots__ = (
+ "a_blob",
+ "b_blob",
+ "a_mode",
+ "b_mode",
+ "a_rawpath",
+ "b_rawpath",
+ "new_file",
+ "deleted_file",
+ "copied_file",
+ "raw_rename_from",
+ "raw_rename_to",
+ "diff",
+ "change_type",
+ "score",
+ )
+
+ def __init__(
+ self,
+ repo: "Repo",
+ a_rawpath: Optional[bytes],
+ b_rawpath: Optional[bytes],
+ a_blob_id: Union[str, bytes, None],
+ b_blob_id: Union[str, bytes, None],
+ a_mode: Union[bytes, str, None],
+ b_mode: Union[bytes, str, None],
+ new_file: bool,
+ deleted_file: bool,
+ copied_file: bool,
+ raw_rename_from: Optional[bytes],
+ raw_rename_to: Optional[bytes],
+ diff: Union[str, bytes, None],
+ change_type: Optional[Lit_change_type],
+ score: Optional[int],
+ ) -> None:
assert a_rawpath is None or isinstance(a_rawpath, bytes)
assert b_rawpath is None or isinstance(b_rawpath, bytes)
@@ -307,22 +363,26 @@ class Diff(object):
# we need to overwrite "repo" to the corresponding submodule's repo instead
if repo and a_rawpath:
for submodule in repo.submodules:
- if submodule.path == a_rawpath.decode(defenc, 'replace'):
+ if submodule.path == a_rawpath.decode(defenc, "replace"):
if submodule.module_exists():
repo = submodule.module()
break
- self.a_blob: Union['IndexObject', None]
+ self.a_blob: Union["IndexObject", None]
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
self.a_blob = None
else:
- self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
+ self.a_blob = Blob(
+ repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path
+ )
- self.b_blob: Union['IndexObject', None]
+ self.b_blob: Union["IndexObject", None]
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
self.b_blob = None
else:
- self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
+ self.b_blob = Blob(
+ repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path
+ )
self.new_file: bool = new_file
self.deleted_file: bool = deleted_file
@@ -358,10 +418,10 @@ class Diff(object):
elif self.b_blob:
h %= self.b_blob.path
- msg: str = ''
- line = None # temp line
- line_length = 0 # line length
- for b, n in zip((self.a_blob, self.b_blob), ('lhs', 'rhs')):
+ msg: str = ""
+ line = None # temp line
+ line_length = 0 # line length
+ for b, n in zip((self.a_blob, self.b_blob), ("lhs", "rhs")):
if b:
line = "\n%s: %o | %s" % (n, b.mode, b.hexsha)
else:
@@ -372,26 +432,30 @@ class Diff(object):
# END for each blob
# add headline
- h += '\n' + '=' * line_length
+ h += "\n" + "=" * line_length
if self.deleted_file:
- msg += '\nfile deleted in rhs'
+ msg += "\nfile deleted in rhs"
if self.new_file:
- msg += '\nfile added in rhs'
+ msg += "\nfile added in rhs"
if self.copied_file:
- msg += '\nfile %r copied from %r' % (self.b_path, self.a_path)
+ msg += "\nfile %r copied from %r" % (self.b_path, self.a_path)
if self.rename_from:
- msg += '\nfile renamed from %r' % self.rename_from
+ msg += "\nfile renamed from %r" % self.rename_from
if self.rename_to:
- msg += '\nfile renamed to %r' % self.rename_to
+ msg += "\nfile renamed to %r" % self.rename_to
if self.diff:
- msg += '\n---'
+ msg += "\n---"
try:
- msg += self.diff.decode(defenc) if isinstance(self.diff, bytes) else self.diff
+ msg += (
+ self.diff.decode(defenc)
+ if isinstance(self.diff, bytes)
+ else self.diff
+ )
except UnicodeDecodeError:
- msg += 'OMITTED BINARY DATA'
+ msg += "OMITTED BINARY DATA"
# end handle encoding
- msg += '\n---'
+ msg += "\n---"
# END diff info
# Python2 silliness: have to assure we convert our likely to be unicode object to a string with the
@@ -400,37 +464,44 @@ class Diff(object):
# end
return res
- @ property
+ @property
def a_path(self) -> Optional[str]:
- return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None
+ return self.a_rawpath.decode(defenc, "replace") if self.a_rawpath else None
- @ property
+ @property
def b_path(self) -> Optional[str]:
- return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None
+ return self.b_rawpath.decode(defenc, "replace") if self.b_rawpath else None
- @ property
+ @property
def rename_from(self) -> Optional[str]:
- return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None
+ return (
+ self.raw_rename_from.decode(defenc, "replace")
+ if self.raw_rename_from
+ else None
+ )
- @ property
+ @property
def rename_to(self) -> Optional[str]:
- return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None
+ return (
+ self.raw_rename_to.decode(defenc, "replace") if self.raw_rename_to else None
+ )
- @ property
+ @property
def renamed(self) -> bool:
""":returns: True if the blob of our diff has been renamed
:note: This property is deprecated, please use ``renamed_file`` instead.
"""
return self.renamed_file
- @ property
+ @property
def renamed_file(self) -> bool:
- """:returns: True if the blob of our diff has been renamed
- """
+ """:returns: True if the blob of our diff has been renamed"""
return self.rename_from != self.rename_to
- @ classmethod
- def _pick_best_path(cls, path_match: bytes, rename_match: bytes, path_fallback_match: bytes) -> Optional[bytes]:
+ @classmethod
+ def _pick_best_path(
+ cls, path_match: bytes, rename_match: bytes, path_fallback_match: bytes
+ ) -> Optional[bytes]:
if path_match:
return decode_path(path_match)
@@ -442,34 +513,51 @@ class Diff(object):
return None
- @ classmethod
- def _index_from_patch_format(cls, repo: 'Repo', proc: Union['Popen', 'Git.AutoInterrupt']) -> DiffIndex:
+ @classmethod
+ def _index_from_patch_format(
+ cls, repo: "Repo", proc: Union["Popen", "Git.AutoInterrupt"]
+ ) -> DiffIndex:
"""Create a new DiffIndex from the given text which must be in patch format
:param repo: is the repository we are operating on - it is required
:param stream: result of 'git diff' as a stream (supporting file protocol)
- :return: git.DiffIndex """
+ :return: git.DiffIndex"""
## FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise.
text_list: List[bytes] = []
- handle_process_output(proc, text_list.append, None, finalize_process, decode_streams=False)
+ handle_process_output(
+ proc, text_list.append, None, finalize_process, decode_streams=False
+ )
# for now, we have to bake the stream
- text = b''.join(text_list)
- index: 'DiffIndex' = DiffIndex()
+ text = b"".join(text_list)
+ index: "DiffIndex" = DiffIndex()
previous_header: Union[Match[bytes], None] = None
header: Union[Match[bytes], None] = None
a_path, b_path = None, None # for mypy
a_mode, b_mode = None, None # for mypy
for _header in cls.re_header.finditer(text):
- a_path_fallback, b_path_fallback, \
- old_mode, new_mode, \
- rename_from, rename_to, \
- new_file_mode, deleted_file_mode, copied_file_name, \
- a_blob_id, b_blob_id, b_mode, \
- a_path, b_path = _header.groups()
-
- new_file, deleted_file, copied_file = \
- bool(new_file_mode), bool(deleted_file_mode), bool(copied_file_name)
+ (
+ a_path_fallback,
+ b_path_fallback,
+ old_mode,
+ new_mode,
+ rename_from,
+ rename_to,
+ new_file_mode,
+ deleted_file_mode,
+ copied_file_name,
+ a_blob_id,
+ b_blob_id,
+ b_mode,
+ a_path,
+ b_path,
+ ) = _header.groups()
+
+ new_file, deleted_file, copied_file = (
+ bool(new_file_mode),
+ bool(deleted_file_mode),
+ bool(copied_file_name),
+ )
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback)
@@ -477,41 +565,53 @@ class Diff(object):
# Our only means to find the actual text is to see what has not been matched by our regex,
# and then retro-actively assign it to our index
if previous_header is not None:
- index[-1].diff = text[previous_header.end():_header.start()]
+ index[-1].diff = text[previous_header.end() : _header.start()]
# end assign actual diff
# Make sure the mode is set if the path is set. Otherwise the resulting blob is invalid
# We just use the one mode we should have parsed
- a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
+ a_mode = (
+ old_mode
+ or deleted_file_mode
+ or (a_path and (b_mode or new_mode or new_file_mode))
+ )
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
- index.append(Diff(repo,
- a_path,
- b_path,
- a_blob_id and a_blob_id.decode(defenc),
- b_blob_id and b_blob_id.decode(defenc),
- a_mode and a_mode.decode(defenc),
- b_mode and b_mode.decode(defenc),
- new_file, deleted_file, copied_file,
- rename_from,
- rename_to,
- None, None, None))
+ index.append(
+ Diff(
+ repo,
+ a_path,
+ b_path,
+ a_blob_id and a_blob_id.decode(defenc),
+ b_blob_id and b_blob_id.decode(defenc),
+ a_mode and a_mode.decode(defenc),
+ b_mode and b_mode.decode(defenc),
+ new_file,
+ deleted_file,
+ copied_file,
+ rename_from,
+ rename_to,
+ None,
+ None,
+ None,
+ )
+ )
previous_header = _header
header = _header
# end for each header we parse
if index and header:
- index[-1].diff = text[header.end():]
+ index[-1].diff = text[header.end() :]
# end assign last diff
return index
- @ staticmethod
- def _handle_diff_line(lines_bytes: bytes, repo: 'Repo', index: DiffIndex) -> None:
+ @staticmethod
+ def _handle_diff_line(lines_bytes: bytes, repo: "Repo", index: DiffIndex) -> None:
lines = lines_bytes.decode(defenc)
- for line in lines.split(':')[1:]:
- meta, _, path = line.partition('\x00')
- path = path.rstrip('\x00')
+ for line in lines.split(":")[1:]:
+ meta, _, path = line.partition("\x00")
+ path = path.rstrip("\x00")
a_blob_id: Optional[str]
b_blob_id: Optional[str]
old_mode, new_mode, a_blob_id, b_blob_id, _change_type = meta.split(None, 4)
@@ -520,7 +620,7 @@ class Diff(object):
# 100: score (in case of copy and rename)
# assert is_change_type(_change_type[0]), f"Unexpected value for change_type received: {_change_type[0]}"
change_type: Lit_change_type = cast(Lit_change_type, _change_type[0])
- score_str = ''.join(_change_type[1:])
+ score_str = "".join(_change_type[1:])
score = int(score_str) if score_str.isdigit() else None
path = path.strip()
a_path = path.encode(defenc)
@@ -533,41 +633,60 @@ class Diff(object):
# NOTE: We cannot conclude from the existence of a blob to change type
# as diffs with the working do not have blobs yet
- if change_type == 'D':
+ if change_type == "D":
b_blob_id = None # Optional[str]
deleted_file = True
- elif change_type == 'A':
+ elif change_type == "A":
a_blob_id = None
new_file = True
- elif change_type == 'C':
+ elif change_type == "C":
copied_file = True
- a_path_str, b_path_str = path.split('\x00', 1)
+ a_path_str, b_path_str = path.split("\x00", 1)
a_path = a_path_str.encode(defenc)
b_path = b_path_str.encode(defenc)
- elif change_type == 'R':
- a_path_str, b_path_str = path.split('\x00', 1)
+ elif change_type == "R":
+ a_path_str, b_path_str = path.split("\x00", 1)
a_path = a_path_str.encode(defenc)
b_path = b_path_str.encode(defenc)
rename_from, rename_to = a_path, b_path
- elif change_type == 'T':
+ elif change_type == "T":
# Nothing to do
pass
# END add/remove handling
- diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode,
- new_file, deleted_file, copied_file, rename_from, rename_to,
- '', change_type, score)
+ diff = Diff(
+ repo,
+ a_path,
+ b_path,
+ a_blob_id,
+ b_blob_id,
+ old_mode,
+ new_mode,
+ new_file,
+ deleted_file,
+ copied_file,
+ rename_from,
+ rename_to,
+ "",
+ change_type,
+ score,
+ )
index.append(diff)
- @ classmethod
- def _index_from_raw_format(cls, repo: 'Repo', proc: 'Popen') -> 'DiffIndex':
+ @classmethod
+ def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex":
"""Create a new DiffIndex from the given stream which must be in raw format.
:return: git.DiffIndex"""
# handles
# :100644 100644 687099101... 37c5e30c8... M .gitignore
- index: 'DiffIndex' = DiffIndex()
- handle_process_output(proc, lambda byt: cls._handle_diff_line(byt, repo, index),
- None, finalize_process, decode_streams=False)
+ index: "DiffIndex" = DiffIndex()
+ handle_process_output(
+ proc,
+ lambda byt: cls._handle_diff_line(byt, repo, index),
+ None,
+ finalize_process,
+ decode_streams=False,
+ )
return index