From b372e26366348920eae32ee81a47b469b511a21f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Oct 2009 19:19:57 +0200 Subject: added Diffable interface to objects.base, its used by Commit and Tree objects. Diff class has been prepared to process raw input, but its not yet more than a frame --- lib/git/diff.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 0db83b4f..e16d5b07 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -9,7 +9,7 @@ import objects.blob as blob class Diff(object): """ - A Diff contains diff information between two commits. + A Diff contains diff information between two Trees. It contains two sides a and b of the diff, members are prefixed with "a" and "b" respectively to inidcate that. @@ -74,18 +74,20 @@ class Diff(object): self.diff = diff @classmethod - def _list_from_string(cls, repo, text): + def _index_from_patch_format(cls, repo, stream): """ - Create a new diff object from the given text + Create a new DiffIndex from the given text which must be in patch format ``repo`` is the repository we are operating on - it is required - ``text`` - result of 'git diff' between two commits or one commit and the index + ``stream`` + result of 'git diff' as a stream (supporting file protocol) Returns - git.Diff[] + git.DiffIndex """ + # for now, we have to bake the stream + text = stream.read() diffs = [] diff_header = cls.re_header.match @@ -102,4 +104,14 @@ class Diff(object): new_file, deleted_file, rename_from, rename_to, diff[header.end():])) return diffs + + @classmethod + def _index_from_raw_format(cls, repo, stream): + """ + Create a new DiffIndex from the given stream which must be in raw format. + + Returns + git.DiffIndex + """ + raise NotImplementedError("") -- cgit v1.2.1 From aed099a73025422f0550f5dd5c3e4651049494b2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 12:54:16 +0200 Subject: resolved cyclic inclusion issue by moving the Diffable interface into the diff module, which probably is the right thing to do anyway --- lib/git/diff.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index e16d5b07..760897ec 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -7,6 +7,78 @@ import re import objects.blob as blob + +class Diffable(object): + """ + Common interface for all object that can be diffed against another object of compatible type. + + NOTE: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object. + """ + __slots__ = tuple() + + # subclasses provide additional arguments to the git-diff comamnd by supplynig + # them in this tuple + _diff_args = tuple() + + def diff(self, other=None, paths=None, create_patch=False, **kwargs): + """ + Creates diffs between two items being trees, trees and index or an + index and the working tree. + + ``other`` + Is the item to compare us with. + If None, we will be compared to the working tree. + + ``paths`` + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. + + ``create_patch`` + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. + + ``kwargs`` + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. + + Returns + git.DiffIndex + + Note + Rename detection will only work if create_patch is True + """ + args = list(self._diff_args[:]) + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + paths = paths or [] + if paths: + paths.insert(0, "--") + + if other is not None: + args.insert(0, other) + + args.insert(0,self) + args.extend(paths) + + kwargs['as_process'] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = Diff._index_from_raw_format + if create_patch: + diff_method = Diff._index_from_patch_format + return diff_method(self.repo, proc.stdout) + + class Diff(object): """ A Diff contains diff information between two Trees. -- cgit v1.2.1 From e063d101face690b8cf4132fa419c5ce3857ef44 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 13:24:04 +0200 Subject: diff: implemented raw diff parsing which appears to be able to handle possible input types, DiffIndex still requires implementation though --- lib/git/diff.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 53 insertions(+), 9 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 760897ec..6a6a097c 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -60,15 +60,19 @@ class Diffable(object): else: args.append("--raw") - paths = paths or [] - if paths: - paths.insert(0, "--") + if paths is not None and not isinstance(paths, (tuple,list)): + paths = [ paths ] if other is not None: args.insert(0, other) args.insert(0,self) - args.extend(paths) + + # paths is list here or None + if paths: + args.append("--") + args.extend(paths) + # END paths handling kwargs['as_process'] = True proc = self.repo.git.diff(*args, **kwargs) @@ -79,6 +83,16 @@ class Diffable(object): return diff_method(self.repo, proc.stdout) +class DiffIndex(list): + """ + Implements an Index for diffs, allowing a list of Diffs to be queried by + the diff properties. + + The class improves the diff handling convenience + """ + + + class Diff(object): """ A Diff contains diff information between two Trees. @@ -99,7 +113,7 @@ class Diff(object): ``Deleted File``:: b_mode is None - b_blob is NOne + b_blob is None """ # precompiled regex @@ -160,7 +174,7 @@ class Diff(object): """ # for now, we have to bake the stream text = stream.read() - diffs = [] + index = DiffIndex() diff_header = cls.re_header.match for diff in ('\n' + text).split('\ndiff --git')[1:]: @@ -171,19 +185,49 @@ class Diff(object): a_blob_id, b_blob_id, b_mode = header.groups() new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode) - diffs.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, + index.append(Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode, new_file, deleted_file, rename_from, rename_to, diff[header.end():])) - return diffs + return index @classmethod def _index_from_raw_format(cls, repo, stream): """ Create a new DiffIndex from the given stream which must be in raw format. + NOTE: + This format is inherently incapable of detecting renames, hence we only + modify, delete and add files + Returns git.DiffIndex """ - raise NotImplementedError("") + # handles + # :100644 100644 6870991011cc8d9853a7a8a6f02061512c6a8190 37c5e30c879213e9ae83b21e9d11e55fc20c54b7 M .gitignore + index = DiffIndex() + for line in stream: + if not line.startswith(":"): + continue + # END its not a valid diff line + old_mode, new_mode, a_blob_id, b_blob_id, modification_id, path = line[1:].split() + a_path = path + b_path = path + deleted_file = False + new_file = False + if modification_id == 'D': + b_path = None + deleted_file = True + elif modification_id == 'A': + a_path = None + new_file = True + # END add/remove handling + + + diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, + new_file, deleted_file, None, None, '') + index.append(diff) + # END for each line + + return index -- cgit v1.2.1 From 9acc7806d6bdb306a929c460437d3d03e5e48dcd Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 14:24:30 +0200 Subject: DiffIndex implemented including test --- lib/git/diff.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 56 insertions(+), 9 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 6a6a097c..1774597a 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -22,6 +22,10 @@ class Diffable(object): # them in this tuple _diff_args = tuple() + # Temporary standin for Index type until we have a real index type + class Index(object): + pass + def diff(self, other=None, paths=None, create_patch=False, **kwargs): """ Creates diffs between two items being trees, trees and index or an @@ -30,6 +34,7 @@ class Diffable(object): ``other`` Is the item to compare us with. If None, we will be compared to the working tree. + If Index ( type ), it will be compared against the index ``paths`` is a list of paths or a single path to limit the diff to. @@ -63,8 +68,10 @@ class Diffable(object): if paths is not None and not isinstance(paths, (tuple,list)): paths = [ paths ] - if other is not None: + if other is not None and other is not self.Index: args.insert(0, other) + if other is self.Index: + args.insert(0, "--cached") args.insert(0,self) @@ -90,7 +97,33 @@ class DiffIndex(list): The class improves the diff handling convenience """ + # change type invariant identifying possible ways a blob can have changed + # A = Added + # D = Deleted + # R = Renamed + # NOTE: 'Modified' mode is impllied as it wouldn't be listed as a diff otherwise + change_type = ("A", "D", "R") + + def iter_change_type(self, change_type): + """ + Return + iterator yieling Diff instances that match the given change_type + + ``change_type`` + Member of DiffIndex.change_type + """ + if change_type not in self.change_type: + raise ValueError( "Invalid change type: %s" % change_type ) + + for diff in self: + if change_type == "A" and diff.new_file: + yield diff + elif change_type == "D" and diff.deleted_file: + yield diff + elif change_type == "R" and diff.renamed: + yield diff + # END for each diff class Diff(object): @@ -132,7 +165,7 @@ class Diff(object): """, re.VERBOSE | re.MULTILINE) re_is_null_hexsha = re.compile( r'^0{40}$' ) __slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file", - "rename_from", "rename_to", "renamed", "diff") + "rename_from", "rename_to", "diff") def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode, b_mode, new_file, deleted_file, rename_from, @@ -148,17 +181,29 @@ class Diff(object): self.a_mode = a_mode self.b_mode = b_mode + if self.a_mode: self.a_mode = blob.Blob._mode_str_to_int( self.a_mode ) if self.b_mode: self.b_mode = blob.Blob._mode_str_to_int( self.b_mode ) + self.new_file = new_file self.deleted_file = deleted_file - self.rename_from = rename_from - self.rename_to = rename_to - self.renamed = rename_from != rename_to + + # be clear and use None instead of empty strings + self.rename_from = rename_from or None + self.rename_to = rename_to or None + self.diff = diff + @property + def renamed(self): + """ + Returns: + True if the blob of our diff has been renamed + """ + return self.rename_from != self.rename_to + @classmethod def _index_from_patch_format(cls, repo, stream): """ @@ -210,20 +255,22 @@ class Diff(object): if not line.startswith(":"): continue # END its not a valid diff line - old_mode, new_mode, a_blob_id, b_blob_id, modification_id, path = line[1:].split() + old_mode, new_mode, a_blob_id, b_blob_id, change_type, path = line[1:].split() a_path = path b_path = path deleted_file = False new_file = False - if modification_id == 'D': + + # NOTE: We cannot conclude from the existance of a blob to change type + # as diffs with the working do not have blobs yet + if change_type == 'D': b_path = None deleted_file = True - elif modification_id == 'A': + elif change_type == 'A': a_path = None new_file = True # END add/remove handling - diff = Diff(repo, a_path, b_path, a_blob_id, b_blob_id, old_mode, new_mode, new_file, deleted_file, None, None, '') index.append(diff) -- cgit v1.2.1 From 9840afda82fafcc3eaf52351c64e2cfdb8962397 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 17:09:13 +0200 Subject: diff method now checks for git-diff errrs that can easily occour if the repository is bare and if there is no index or second tree specified --- lib/git/diff.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 1774597a..9b884502 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -6,7 +6,7 @@ import re import objects.blob as blob - +from errors import GitCommandError class Diffable(object): """ @@ -26,7 +26,7 @@ class Diffable(object): class Index(object): pass - def diff(self, other=None, paths=None, create_patch=False, **kwargs): + def diff(self, other=Index, paths=None, create_patch=False, **kwargs): """ Creates diffs between two items being trees, trees and index or an index and the working tree. @@ -34,7 +34,9 @@ class Diffable(object): ``other`` Is the item to compare us with. If None, we will be compared to the working tree. - If Index ( type ), it will be compared against the index + If Index ( type ), it will be compared against the index. + It defaults to Index to assure the method will not by-default fail + on bare repositories. ``paths`` is a list of paths or a single path to limit the diff to. @@ -53,7 +55,10 @@ class Diffable(object): git.DiffIndex Note - Rename detection will only work if create_patch is True + Rename detection will only work if create_patch is True. + + On a bare repository, 'other' needs to be provided as Index or as + as Tree/Commit, or a git command error will occour """ args = list(self._diff_args[:]) args.append( "--abbrev=40" ) # we need full shas @@ -87,7 +92,13 @@ class Diffable(object): diff_method = Diff._index_from_raw_format if create_patch: diff_method = Diff._index_from_patch_format - return diff_method(self.repo, proc.stdout) + index = diff_method(self.repo, proc.stdout) + + status = proc.wait() + if status != 0: + raise GitCommandError("git-diff", status, proc.stderr ) + + return index class DiffIndex(list): -- cgit v1.2.1 From ea33fe8b21d2b02f902b131aba0d14389f2f8715 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 22:14:02 +0200 Subject: Index: Is now diffable and appears to properly implement diffing against other items as well as the working tree Diff.Diffable: added callback allowing superclasses to preprocess diff arguments Diff.Diff: added eq, ne and hash methods, string methods would be nice --- lib/git/diff.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 9b884502..03e6709c 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -18,13 +18,18 @@ class Diffable(object): """ __slots__ = tuple() - # subclasses provide additional arguments to the git-diff comamnd by supplynig - # them in this tuple - _diff_args = tuple() - - # Temporary standin for Index type until we have a real index type + # standin indicating you want to diff against the index class Index(object): pass + + def _process_diff_args(self, args): + """ + Returns + possibly altered version of the given args list. + Method is called right before git command execution. + Subclasses can use it to alter the behaviour of the superclass + """ + return args def diff(self, other=Index, paths=None, create_patch=False, **kwargs): """ @@ -60,13 +65,13 @@ class Diffable(object): On a bare repository, 'other' needs to be provided as Index or as as Tree/Commit, or a git command error will occour """ - args = list(self._diff_args[:]) + args = list() args.append( "--abbrev=40" ) # we need full shas args.append( "--full-index" ) # get full index paths, not only filenames if create_patch: args.append("-p") - args.append("-M") # check for renames + args.append("-M") # check for renames else: args.append("--raw") @@ -87,7 +92,7 @@ class Diffable(object): # END paths handling kwargs['as_process'] = True - proc = self.repo.git.diff(*args, **kwargs) + proc = self.repo.git.diff(*self._process_diff_args(args), **kwargs) diff_method = Diff._index_from_raw_format if create_patch: @@ -96,7 +101,7 @@ class Diffable(object): status = proc.wait() if status != 0: - raise GitCommandError("git-diff", status, proc.stderr ) + raise GitCommandError(("git diff",)+tuple(args), status, proc.stderr.read()) return index @@ -207,6 +212,20 @@ class Diff(object): self.diff = diff + + def __eq__(self, other): + for name in self.__slots__: + if getattr(self, name) != getattr(other, name): + return False + # END for each name + return True + + def __ne__(self, other): + return not ( self == other ) + + def __hash__(self): + return hash(tuple(getattr(self,n) for n in self.__slots__)) + @property def renamed(self): """ -- cgit v1.2.1 From 78a46c432b31a0ea4c12c391c404cd128df4d709 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 26 Oct 2009 19:51:00 +0100 Subject: cmd.wait: AutoKill wrapped process will automatically raise on errors to unify error handling amongst clients using the process directly. It might be needed to add a flag allowing to easily override that --- lib/git/diff.py | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 03e6709c..b0e0898a 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -100,9 +100,6 @@ class Diffable(object): index = diff_method(self.repo, proc.stdout) status = proc.wait() - if status != 0: - raise GitCommandError(("git diff",)+tuple(args), status, proc.stderr.read()) - return index -- cgit v1.2.1 From 3cb5ba18ab1a875ef6b62c65342de476be47871b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 3 Nov 2009 16:35:33 +0100 Subject: object: renamed id attribute to sha as it in fact is always being rewritten as sha, even if the passed in id was a ref. This is done to assure objects are uniquely identified and will compare correctly --- lib/git/diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index b0e0898a..9a826630 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -186,11 +186,11 @@ class Diff(object): if not a_blob_id or self.re_is_null_hexsha.search(a_blob_id): self.a_blob = None else: - self.a_blob = blob.Blob(repo, id=a_blob_id, mode=a_mode, path=a_path) + self.a_blob = blob.Blob(repo, a_blob_id, mode=a_mode, path=a_path) if not b_blob_id or self.re_is_null_hexsha.search(b_blob_id): self.b_blob = None else: - self.b_blob = blob.Blob(repo, id=b_blob_id, mode=b_mode, path=b_path) + self.b_blob = blob.Blob(repo, b_blob_id, mode=b_mode, path=b_path) self.a_mode = a_mode self.b_mode = b_mode -- cgit v1.2.1 From 52bb0046c0bf0e50598c513e43b76d593f2cbbff Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Nov 2009 16:26:03 +0100 Subject: added query for 'M' modified diffs to DiffIndex including test. The latter one was made faster by reducing the amount of permutations to the minimal value --- lib/git/diff.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib/git/diff.py') diff --git a/lib/git/diff.py b/lib/git/diff.py index 9a826630..a43d3725 100644 --- a/lib/git/diff.py +++ b/lib/git/diff.py @@ -114,8 +114,8 @@ class DiffIndex(list): # A = Added # D = Deleted # R = Renamed - # NOTE: 'Modified' mode is impllied as it wouldn't be listed as a diff otherwise - change_type = ("A", "D", "R") + # M = modified + change_type = ("A", "D", "R", "M") def iter_change_type(self, change_type): @@ -124,7 +124,15 @@ class DiffIndex(list): iterator yieling Diff instances that match the given change_type ``change_type`` - Member of DiffIndex.change_type + Member of DiffIndex.change_type, namely + + 'A' for added paths + + 'D' for deleted paths + + 'R' for renamed paths + + 'M' for paths with modified data """ if change_type not in self.change_type: raise ValueError( "Invalid change type: %s" % change_type ) @@ -136,6 +144,8 @@ class DiffIndex(list): yield diff elif change_type == "R" and diff.renamed: yield diff + elif change_type == "M" and diff.a_blob and diff.b_blob and diff.a_blob != diff.b_blob: + yield diff # END for each diff -- cgit v1.2.1