From bb24f67e64b4ebe11c4d3ce7df021a6ad7ca98f2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Oct 2009 16:09:07 +0200 Subject: Fixed object bug that would cause object ids not to be resolved to sha's as this was assumed - now there is a test for it as well repo: removed diff and commit_diff methods, added 'head' property returning the current head as Reference object --- lib/git/objects/base.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 3b48e066..d780c7b3 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -15,22 +15,12 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: - inst = Object(repo,id) + inst = Object.new(repo,id) """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass - def __new__(cls, repo, id, *args, **kwargs): - if cls is Object: - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = super(Object,cls).__new__(obj_type, repo, hexsha, *args, **kwargs) - inst.size = size - return inst - else: - return super(Object,cls).__new__(cls, repo, id, *args, **kwargs) - def __init__(self, repo, id): """ Initialize an object by identifying it by its id. All keyword arguments @@ -45,7 +35,25 @@ class Object(LazyMixin): super(Object,self).__init__() self.repo = repo self.id = id - + + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + def _set_self_from_args_(self, args_dict): """ Initialize attributes on self from the given dict that was retrieved -- cgit v1.2.1 From b372e26366348920eae32ee81a47b469b511a21f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 16 Oct 2009 19:19:57 +0200 Subject: added Diffable interface to objects.base, its used by Commit and Tree objects. Diff class has been prepared to process raw input, but its not yet more than a frame --- lib/git/objects/base.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index d780c7b3..1bb2e8f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -172,3 +172,74 @@ class IndexObject(Object): return mode +class Diffable(object): + """ + Common interface for all object that can be diffed against another object of compatible type. + + NOTE: + Subclasses require a repo member as it is the case for Object instances, for practical + reasons we do not derive from Object. + """ + __slots__ = tuple() + + # subclasses provide additional arguments to the git-diff comamnd by supplynig + # them in this tuple + _diff_args = tuple() + + def diff(self, other=None, paths=None, create_patch=False, **kwargs): + """ + Creates diffs between two items being trees, trees and index or an + index and the working tree. + + ``other`` + Is the item to compare us with. + If None, we will be compared to the working tree. + + ``paths`` + is a list of paths or a single path to limit the diff to. + It will only include at least one of the givne path or paths. + + ``create_patch`` + If True, the returned Diff contains a detailed patch that if applied + makes the self to other. Patches are somwhat costly as blobs have to be read + and diffed. + + ``kwargs`` + Additional arguments passed to git-diff, such as + R=True to swap both sides of the diff. + + Returns + git.DiffIndex + + Note + Rename detection will only work if create_patch is True + """ + args = list(self._diff_args[:]) + args.append( "--abbrev=40" ) # we need full shas + args.append( "--full-index" ) # get full index paths, not only filenames + + if create_patch: + args.append("-p") + args.append("-M") # check for renames + else: + args.append("--raw") + + paths = paths or [] + if paths: + paths.insert(0, "--") + + if other is not None: + args.insert(0, other) + + args.insert(0,self) + args.extend(paths) + + kwargs['as_process'] = True + proc = self.repo.git.diff(*args, **kwargs) + + diff_method = diff.Diff._index_from_raw_format + if create_patch: + diff_method = diff.Diff._index_from_patch_format(self.repo, proc.stdout) + return diff_method(self.repo, proc.stdout) + + -- cgit v1.2.1 From 9946e0ce07c8d93a43bd7b8900ddf5d913fe3b03 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 12:33:06 +0200 Subject: implemented diff tests, but will have to move the diff module as it needs to create objects, whose import would create a dependency cycle --- lib/git/objects/base.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 1bb2e8f1..b347b5f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -214,6 +214,9 @@ class Diffable(object): Note Rename detection will only work if create_patch is True """ + # import it in a retared fashion to avoid dependency cycle + from git.diff import Diff + args = list(self._diff_args[:]) args.append( "--abbrev=40" ) # we need full shas args.append( "--full-index" ) # get full index paths, not only filenames @@ -237,9 +240,9 @@ class Diffable(object): kwargs['as_process'] = True proc = self.repo.git.diff(*args, **kwargs) - diff_method = diff.Diff._index_from_raw_format + diff_method = Diff._index_from_raw_format if create_patch: - diff_method = diff.Diff._index_from_patch_format(self.repo, proc.stdout) + diff_method = Diff._index_from_patch_format return diff_method(self.repo, proc.stdout) -- cgit v1.2.1 From aed099a73025422f0550f5dd5c3e4651049494b2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 18 Oct 2009 12:54:16 +0200 Subject: resolved cyclic inclusion issue by moving the Diffable interface into the diff module, which probably is the right thing to do anyway --- lib/git/objects/base.py | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index b347b5f1..ab1da7b0 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -170,79 +170,4 @@ class IndexObject(Object): mode += int(char) << iteration*3 # END for each char return mode - -class Diffable(object): - """ - Common interface for all object that can be diffed against another object of compatible type. - - NOTE: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object. - """ - __slots__ = tuple() - - # subclasses provide additional arguments to the git-diff comamnd by supplynig - # them in this tuple - _diff_args = tuple() - - def diff(self, other=None, paths=None, create_patch=False, **kwargs): - """ - Creates diffs between two items being trees, trees and index or an - index and the working tree. - - ``other`` - Is the item to compare us with. - If None, we will be compared to the working tree. - - ``paths`` - is a list of paths or a single path to limit the diff to. - It will only include at least one of the givne path or paths. - - ``create_patch`` - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somwhat costly as blobs have to be read - and diffed. - - ``kwargs`` - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. - - Returns - git.DiffIndex - - Note - Rename detection will only work if create_patch is True - """ - # import it in a retared fashion to avoid dependency cycle - from git.diff import Diff - - args = list(self._diff_args[:]) - args.append( "--abbrev=40" ) # we need full shas - args.append( "--full-index" ) # get full index paths, not only filenames - - if create_patch: - args.append("-p") - args.append("-M") # check for renames - else: - args.append("--raw") - - paths = paths or [] - if paths: - paths.insert(0, "--") - - if other is not None: - args.insert(0, other) - - args.insert(0,self) - args.extend(paths) - - kwargs['as_process'] = True - proc = self.repo.git.diff(*args, **kwargs) - - diff_method = Diff._index_from_raw_format - if create_patch: - diff_method = Diff._index_from_patch_format - return diff_method(self.repo, proc.stdout) - - -- cgit v1.2.1 From 33fa178eeb7bf519f5fff118ebc8e27e76098363 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 11:04:30 +0200 Subject: added Object.data_stream property allowing to stream object data directly.Considering the implementation of the git commnd which temporarily keeps it in a cache, it doesnt make a huge diffence as the data is kept in memory while streaming. Only good thing is that it is in a different process so python will never see it if done properly --- lib/git/objects/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index ab1da7b0..dd67a3c7 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -16,6 +16,9 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: inst = Object.new(repo,id) + inst.id # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object """ TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) @@ -115,6 +118,15 @@ class Object(LazyMixin): """ return '' % (self.__class__.__name__, self.id) + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.id, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + class IndexObject(Object): """ -- cgit v1.2.1 From 4fe5cfa0e063a8d51a1eb6f014e2aaa994e5e7d4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 22 Oct 2009 12:28:04 +0200 Subject: Stream_data streams data to a given output stream most efficiently with a low memory footprint. Still, the git-cat-file command keeps all data in an interal buffer instead of streaming it directly. This is a git design issue though, and will be hard to address without some proper git-hacking. Conflicts: lib/git/cmd.py --- lib/git/objects/base.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index dd67a3c7..0dfd1a23 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -127,6 +127,18 @@ class Object(LazyMixin): proc = self.repo.git.cat_file(self.type, self.id, as_process=True) return utils.ProcessStreamAdapter(proc, "stdout") + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.id, output_stream=ostream) + return self class IndexObject(Object): """ -- cgit v1.2.1 From 0cd09bd306486028f5442c56ef2e947355a06282 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Oct 2009 21:49:13 +0200 Subject: index.remove implemented including throrough test --- lib/git/objects/base.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 0dfd1a23..0bece6f1 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -20,6 +20,7 @@ class Object(LazyMixin): inst.size # objects uncompressed data size inst.data # byte string containing the whole data of the object """ + NULL_HEX_SHA = '0'*40 TYPES = ("blob", "tree", "commit", "tag") __slots__ = ("repo", "id", "size", "data" ) type = None # to be set by subclass -- cgit v1.2.1 From 3cb5ba18ab1a875ef6b62c65342de476be47871b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 3 Nov 2009 16:35:33 +0100 Subject: object: renamed id attribute to sha as it in fact is always being rewritten as sha, even if the passed in id was a ref. This is done to assure objects are uniquely identified and will compare correctly --- lib/git/objects/base.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 0bece6f1..6dd03ba4 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -16,13 +16,13 @@ class Object(LazyMixin): This Object also serves as a constructor for instances of the correct type:: inst = Object.new(repo,id) - inst.id # objects sha in hex + inst.sha # objects sha in hex inst.size # objects uncompressed data size inst.data # byte string containing the whole data of the object """ NULL_HEX_SHA = '0'*40 TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "id", "size", "data" ) + __slots__ = ("repo", "sha", "size", "data" ) type = None # to be set by subclass def __init__(self, repo, id): @@ -38,7 +38,7 @@ class Object(LazyMixin): """ super(Object,self).__init__() self.repo = repo - self.id = id + self.sha = id @classmethod def new(cls, repo, id): @@ -76,11 +76,11 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.id) - assert typename == self.type, _assertion_msg_format % (self.id, typename, self.type) + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) else: super(Object,self)._set_cache_(attr) @@ -89,35 +89,35 @@ class Object(LazyMixin): Returns True if the objects have the same SHA1 """ - return self.id == other.id + return self.sha == other.sha def __ne__(self, other): """ Returns True if the objects do not have the same SHA1 """ - return self.id != other.id + return self.sha != other.sha def __hash__(self): """ Returns Hash of our id allowing objects to be used in dicts and sets """ - return hash(self.id) + return hash(self.sha) def __str__(self): """ Returns string of our SHA1 as understood by all git commands """ - return self.id + return self.sha def __repr__(self): """ Returns string with pythonic representation of our object """ - return '' % (self.__class__.__name__, self.id) + return '' % (self.__class__.__name__, self.sha) @property def data_stream(self): @@ -125,7 +125,7 @@ class Object(LazyMixin): Returns File Object compatible stream to the uncompressed raw data of the object """ - proc = self.repo.git.cat_file(self.type, self.id, as_process=True) + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) return utils.ProcessStreamAdapter(proc, "stdout") def stream_data(self, ostream): @@ -138,7 +138,7 @@ class Object(LazyMixin): Returns self """ - self.repo.git.cat_file(self.type, self.id, output_stream=ostream) + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) return self class IndexObject(Object): @@ -148,13 +148,13 @@ class IndexObject(Object): """ __slots__ = ("path", "mode") - def __init__(self, repo, id, mode=None, path=None): + def __init__(self, repo, sha, mode=None, path=None): """ Initialize a newly instanced IndexObject ``repo`` is the Repo we are located in - ``id`` : string + ``sha`` : string is the git object id as hex sha ``mode`` : int @@ -168,7 +168,7 @@ class IndexObject(Object): Path may not be set of the index object has been created directly as it cannot be retrieved without knowing the parent tree. """ - super(IndexObject, self).__init__(repo, id) + super(IndexObject, self).__init__(repo, sha) self._set_self_from_args_(locals()) if isinstance(mode, basestring): self.mode = self._mode_str_to_int(mode) -- cgit v1.2.1 From c4cde8df886112ee32b0a09fcac90c28c85ded7f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 4 Nov 2009 12:46:37 +0100 Subject: IndexObject: assured that .path fields are relative to the repository ( previously it would just be a name ) added abspath property and name property to provide easy access to most common paths of an index object --- lib/git/objects/base.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 6dd03ba4..b0989a43 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -196,3 +196,20 @@ class IndexObject(Object): # END for each char return mode + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ) + """ + return os.path.join(self.repo.git.git_dir, self.path) + -- cgit v1.2.1