From df0892351a394d768489b5647d47b73c24d3ef5f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 00:48:16 +0200 Subject: commit: initial version of commit_from_tree which could create commit objects if it could serialize itself --- lib/git/objects/base.py | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 6a51eed3..bb15192d 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -141,6 +141,7 @@ class Object(LazyMixin): self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) return self + class IndexObject(Object): """ Base for all objects that can be part of the index file , namely Tree, Blob and -- cgit v1.2.1 From 8c1a87d11df666d308d14e4ae7ee0e9d614296b6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 2 Jun 2010 12:30:33 +0200 Subject: commit: refactored existing code to decode commits from streams - performance is slightly better git.cmd: added method to provide access to the content stream directly. This is more efficient if large objects are handled, if it is actually used test.helpers: removed unnecessary code --- lib/git/objects/base.py | 418 ++++++++++++++++++++++++------------------------ 1 file changed, 209 insertions(+), 209 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index bb15192d..f7043199 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -6,223 +6,223 @@ import os from git.utils import LazyMixin, join_path_native import utils - + _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" class Object(LazyMixin): - """ - Implements an Object which may be Blobs, Trees, Commits and Tags - - This Object also serves as a constructor for instances of the correct type:: - - inst = Object.new(repo,id) - inst.sha # objects sha in hex - inst.size # objects uncompressed data size - inst.data # byte string containing the whole data of the object - """ - NULL_HEX_SHA = '0'*40 - TYPES = ("blob", "tree", "commit", "tag") - __slots__ = ("repo", "sha", "size", "data" ) - type = None # to be set by subclass - - def __init__(self, repo, id): - """ - Initialize an object by identifying it by its id. All keyword arguments - will be set on demand if None. - - ``repo`` - repository this object is located in - - ``id`` - SHA1 or ref suitable for git-rev-parse - """ - super(Object,self).__init__() - self.repo = repo - self.sha = id + """ + Implements an Object which may be Blobs, Trees, Commits and Tags + + This Object also serves as a constructor for instances of the correct type:: + + inst = Object.new(repo,id) + inst.sha # objects sha in hex + inst.size # objects uncompressed data size + inst.data # byte string containing the whole data of the object + """ + NULL_HEX_SHA = '0'*40 + TYPES = ("blob", "tree", "commit", "tag") + __slots__ = ("repo", "sha", "size", "data" ) + type = None # to be set by subclass + + def __init__(self, repo, id): + """ + Initialize an object by identifying it by its id. All keyword arguments + will be set on demand if None. + + ``repo`` + repository this object is located in + + ``id`` + SHA1 or ref suitable for git-rev-parse + """ + super(Object,self).__init__() + self.repo = repo + self.sha = id - @classmethod - def new(cls, repo, id): - """ - Return - New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a hexsha even though - the input id may have been a Reference or Rev-Spec - - Note - This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a hexsha. - """ - hexsha, typename, size = repo.git.get_object_header(id) - obj_type = utils.get_object_type_by_name(typename) - inst = obj_type(repo, hexsha) - inst.size = size - return inst - - def _set_self_from_args_(self, args_dict): - """ - Initialize attributes on self from the given dict that was retrieved - from locals() in the calling method. - - Will only set an attribute on self if the corresponding value in args_dict - is not None - """ - for attr, val in args_dict.items(): - if attr != "self" and val is not None: - setattr( self, attr, val ) - # END set all non-None attributes - - def _set_cache_(self, attr): - """ - Retrieve object information - """ - if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) - else: - super(Object,self)._set_cache_(attr) - - def __eq__(self, other): - """ - Returns - True if the objects have the same SHA1 - """ - return self.sha == other.sha - - def __ne__(self, other): - """ - Returns - True if the objects do not have the same SHA1 - """ - return self.sha != other.sha - - def __hash__(self): - """ - Returns - Hash of our id allowing objects to be used in dicts and sets - """ - return hash(self.sha) - - def __str__(self): - """ - Returns - string of our SHA1 as understood by all git commands - """ - return self.sha - - def __repr__(self): - """ - Returns - string with pythonic representation of our object - """ - return '' % (self.__class__.__name__, self.sha) + @classmethod + def new(cls, repo, id): + """ + Return + New Object instance of a type appropriate to the object type behind + id. The id of the newly created object will be a hexsha even though + the input id may have been a Reference or Rev-Spec + + Note + This cannot be a __new__ method as it would always call __init__ + with the input id which is not necessarily a hexsha. + """ + hexsha, typename, size = repo.git.get_object_header(id) + obj_type = utils.get_object_type_by_name(typename) + inst = obj_type(repo, hexsha) + inst.size = size + return inst + + def _set_self_from_args_(self, args_dict): + """ + Initialize attributes on self from the given dict that was retrieved + from locals() in the calling method. + + Will only set an attribute on self if the corresponding value in args_dict + is not None + """ + for attr, val in args_dict.items(): + if attr != "self" and val is not None: + setattr( self, attr, val ) + # END set all non-None attributes + + def _set_cache_(self, attr): + """ + Retrieve object information + """ + if attr == "size": + hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + elif attr == "data": + hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + else: + super(Object,self)._set_cache_(attr) + + def __eq__(self, other): + """ + Returns + True if the objects have the same SHA1 + """ + return self.sha == other.sha + + def __ne__(self, other): + """ + Returns + True if the objects do not have the same SHA1 + """ + return self.sha != other.sha + + def __hash__(self): + """ + Returns + Hash of our id allowing objects to be used in dicts and sets + """ + return hash(self.sha) + + def __str__(self): + """ + Returns + string of our SHA1 as understood by all git commands + """ + return self.sha + + def __repr__(self): + """ + Returns + string with pythonic representation of our object + """ + return '' % (self.__class__.__name__, self.sha) - @property - def data_stream(self): - """ - Returns - File Object compatible stream to the uncompressed raw data of the object - """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") - - def stream_data(self, ostream): - """ - Writes our data directly to the given output stream - - ``ostream`` - File object compatible stream object. - - Returns - self - """ - self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) - return self + @property + def data_stream(self): + """ + Returns + File Object compatible stream to the uncompressed raw data of the object + """ + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") + def stream_data(self, ostream): + """ + Writes our data directly to the given output stream + + ``ostream`` + File object compatible stream object. + + Returns + self + """ + self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + return self + class IndexObject(Object): - """ - Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects - """ - __slots__ = ("path", "mode") - - def __init__(self, repo, sha, mode=None, path=None): - """ - Initialize a newly instanced IndexObject - ``repo`` - is the Repo we are located in + """ + Base for all objects that can be part of the index file , namely Tree, Blob and + SubModule objects + """ + __slots__ = ("path", "mode") + + def __init__(self, repo, sha, mode=None, path=None): + """ + Initialize a newly instanced IndexObject + ``repo`` + is the Repo we are located in - ``sha`` : string - is the git object id as hex sha + ``sha`` : string + is the git object id as hex sha - ``mode`` : int - is the file mode as int, use the stat module to evaluate the infomration + ``mode`` : int + is the file mode as int, use the stat module to evaluate the infomration - ``path`` : str - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext - - NOTE - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree. - """ - super(IndexObject, self).__init__(repo, sha) - self._set_self_from_args_(locals()) - if isinstance(mode, basestring): - self.mode = self._mode_str_to_int(mode) - - def __hash__(self): - """ - Returns - Hash of our path as index items are uniquely identifyable by path, not - by their data ! - """ - return hash(self.path) - - def _set_cache_(self, attr): - if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) - raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) - else: - super(IndexObject, self)._set_cache_(attr) - - @classmethod - def _mode_str_to_int(cls, modestr): - """ - ``modestr`` - string like 755 or 644 or 100644 - only the last 6 chars will be used - - Returns - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example. - """ - mode = 0 - for iteration,char in enumerate(reversed(modestr[-6:])): - mode += int(char) << iteration*3 - # END for each char - return mode - - @property - def name(self): - """ - Returns - Name portion of the path, effectively being the basename - """ - return os.path.basename(self.path) - - @property - def abspath(self): - """ - Returns - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). - - The returned path will be native to the system and contains '\' on windows. - """ - return join_path_native(self.repo.working_tree_dir, self.path) - + ``path`` : str + is the path to the file in the file system, relative to the git repository root, i.e. + file.ext or folder/other.ext + + NOTE + Path may not be set of the index object has been created directly as it cannot + be retrieved without knowing the parent tree. + """ + super(IndexObject, self).__init__(repo, sha) + self._set_self_from_args_(locals()) + if isinstance(mode, basestring): + self.mode = self._mode_str_to_int(mode) + + def __hash__(self): + """ + Returns + Hash of our path as index items are uniquely identifyable by path, not + by their data ! + """ + return hash(self.path) + + def _set_cache_(self, attr): + if attr in IndexObject.__slots__: + # they cannot be retrieved lateron ( not without searching for them ) + raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ ) + else: + super(IndexObject, self)._set_cache_(attr) + + @classmethod + def _mode_str_to_int(cls, modestr): + """ + ``modestr`` + string like 755 or 644 or 100644 - only the last 6 chars will be used + + Returns + String identifying a mode compatible to the mode methods ids of the + stat module regarding the rwx permissions for user, group and other, + special flags and file system flags, i.e. whether it is a symlink + for example. + """ + mode = 0 + for iteration,char in enumerate(reversed(modestr[-6:])): + mode += int(char) << iteration*3 + # END for each char + return mode + + @property + def name(self): + """ + Returns + Name portion of the path, effectively being the basename + """ + return os.path.basename(self.path) + + @property + def abspath(self): + """ + Returns + Absolute path to this index object in the file system ( as opposed to the + .path field which is a path relative to the git repository ). + + The returned path will be native to the system and contains '\' on windows. + """ + return join_path_native(self.repo.working_tree_dir, self.path) + -- cgit v1.2.1 From 38d59fc8ccccae8882fa48671377bf40a27915a7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 16:35:35 +0200 Subject: odb: implemented loose object streaming, which is impossible to do efficiently considering that it copies string buffers all the time --- lib/git/objects/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index f7043199..64a5678e 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -125,8 +125,8 @@ class Object(LazyMixin): Returns File Object compatible stream to the uncompressed raw data of the object """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") + sha, type, size, stream = self.repo.git.stream_object_data(self.sha) + return stream def stream_data(self, ostream): """ -- cgit v1.2.1 From 1e2b46138ba58033738a24dadccc265748fce2ca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 3 Jun 2010 23:20:34 +0200 Subject: commit.create_from_tree now uses pure python implementation, fixed message parsing which truncated newlines although it was ilegitimate. Its up to the reader to truncate therse, nowhere in the git code I could find anyone adding newlines to commits where it is written Added performance tests for serialization, it does about 5k commits per second if writing to tmpfs --- lib/git/objects/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 64a5678e..f7043199 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -125,8 +125,8 @@ class Object(LazyMixin): Returns File Object compatible stream to the uncompressed raw data of the object """ - sha, type, size, stream = self.repo.git.stream_object_data(self.sha) - return stream + proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) + return utils.ProcessStreamAdapter(proc, "stdout") def stream_data(self, ostream): """ -- cgit v1.2.1 From b01ca6a3e4ae9d944d799743c8ff774e2a7a82b6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 00:09:00 +0200 Subject: db: implemented GitObjectDB using the git command to make sure we can lookup everything. Next is to implement pack-file reading, then alternates which should allow to resolve everything --- lib/git/objects/base.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index f7043199..446c4406 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -4,7 +4,7 @@ # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -from git.utils import LazyMixin, join_path_native +from git.utils import LazyMixin, join_path_native, stream_copy import utils _assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r" @@ -76,10 +76,11 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - hexsha, typename, self.size = self.repo.git.get_object_header(self.sha) + typename, self.size = self.repo.odb.object_info(self.sha) assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) elif attr == "data": - hexsha, typename, self.size, self.data = self.repo.git.get_object_data(self.sha) + typename, self.size, stream = self.repo.odb.object(self.sha) + self.data = stream.read() # once we have an own odb, we can delay reading assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) else: super(Object,self)._set_cache_(attr) @@ -121,24 +122,17 @@ class Object(LazyMixin): @property def data_stream(self): - """ - Returns - File Object compatible stream to the uncompressed raw data of the object - """ - proc = self.repo.git.cat_file(self.type, self.sha, as_process=True) - return utils.ProcessStreamAdapter(proc, "stdout") + """ :return: File Object compatible stream to the uncompressed raw data of the object + :note: returned streams must be read in order""" + type, size, stream = self.repo.odb.object(self.sha) + return stream def stream_data(self, ostream): - """ - Writes our data directly to the given output stream - - ``ostream`` - File object compatible stream object. - - Returns - self - """ - self.repo.git.cat_file(self.type, self.sha, output_stream=ostream) + """Writes our data directly to the given output stream + :param ostream: File object compatible stream object. + :return: self""" + type, size, istream = self.repo.odb.object(self.sha) + stream_copy(istream, ostream) return self -- cgit v1.2.1 From a1e80445ad5cb6da4c0070d7cb8af89da3b0803b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 14:41:15 +0200 Subject: initial version of new odb design to facilitate a channel based multi-threading implementation of all odb functions --- lib/git/objects/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 446c4406..76384888 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -76,10 +76,10 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - typename, self.size = self.repo.odb.object_info(self.sha) + typename, self.size = self.repo.odb.info(self.sha) assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) elif attr == "data": - typename, self.size, stream = self.repo.odb.object(self.sha) + typename, self.size, stream = self.repo.odb.stream(self.sha) self.data = stream.read() # once we have an own odb, we can delay reading assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) else: @@ -124,14 +124,14 @@ class Object(LazyMixin): def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" - type, size, stream = self.repo.odb.object(self.sha) + type, size, stream = self.repo.odb.stream(self.sha) return stream def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" - type, size, istream = self.repo.odb.object(self.sha) + type, size, istream = self.repo.odb.stream(self.sha) stream_copy(istream, ostream) return self -- cgit v1.2.1 From e746f96bcc29238b79118123028ca170adc4ff0f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 4 Jun 2010 17:22:08 +0200 Subject: Fixed implementation after design change to deal with it - all tests run, but next there will have to be more through testing --- lib/git/objects/base.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'lib/git/objects/base.py') diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 76384888..5a3a15a7 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -76,12 +76,14 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - typename, self.size = self.repo.odb.info(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + oinfo = self.repo.odb.info(self.sha) + self.size = oinfo.size + assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type) elif attr == "data": - typename, self.size, stream = self.repo.odb.stream(self.sha) - self.data = stream.read() # once we have an own odb, we can delay reading - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + ostream = self.repo.odb.stream(self.sha) + self.size = ostream.size + self.data = ostream.read() + assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type) else: super(Object,self)._set_cache_(attr) @@ -124,14 +126,13 @@ class Object(LazyMixin): def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" - type, size, stream = self.repo.odb.stream(self.sha) - return stream + return self.repo.odb.stream(self.sha) def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" - type, size, istream = self.repo.odb.stream(self.sha) + istream = self.repo.odb.stream(self.sha) stream_copy(istream, ostream) return self -- cgit v1.2.1