diff options
Diffstat (limited to 'lib')
m--------- | lib/git/ext/gitdb | 0 | ||||
-rw-r--r-- | lib/git/objects/submodule.py | 15 | ||||
-rw-r--r-- | lib/git/objects/tree.py | 501 |
3 files changed, 281 insertions, 235 deletions
diff --git a/lib/git/ext/gitdb b/lib/git/ext/gitdb -Subproject 10fef8f8e4ee83cf54feadbb5ffb522efec739f +Subproject 0ef86550179b9bb9e29ecccdccd586713b9d175 diff --git a/lib/git/objects/submodule.py b/lib/git/objects/submodule.py new file mode 100644 index 00000000..4742d448 --- /dev/null +++ b/lib/git/objects/submodule.py @@ -0,0 +1,15 @@ +import base + + +class Submodule(base.IndexObject): + """Implements access to a git submodule. They are special in that their sha + represents a commit in the submodule's repository which is to be checked out + at the path of this instance. + The submodule type does not have a string type associated with it, as it exists + solely as a marker in the tree and index""" + + # this is a bogus type for base class compatability + type = 'submodule' + + # TODO: Add functions to retrieve a repo for the submodule, to allow + # its initiailization and handling diff --git a/lib/git/objects/tree.py b/lib/git/objects/tree.py index 285d3b5b..3c860199 100644 --- a/lib/git/objects/tree.py +++ b/lib/git/objects/tree.py @@ -5,245 +5,276 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os -import blob +from blob import Blob +from submodule import Submodule import base import binascii import git.diff as diff import utils from git.utils import join_path + def sha_to_hex(sha): - """Takes a string and returns the hex of the sha within""" - hexsha = binascii.hexlify(sha) - assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha - return hexsha - - -class Tree(base.IndexObject, diff.Diffable, utils.Traversable): - """ - Tress represent a ordered list of Blobs and other Trees. Hence it can be - accessed like a list. - - Tree's will cache their contents after first retrieval to improve efficiency. - - ``Tree as a list``:: - - Access a specific blob using the - tree['filename'] notation. - - You may as well access by index - blob = tree[0] - - - """ - - type = "tree" - __slots__ = "_cache" - - # using ascii codes for comparison - commit_id = 016 - blob_id = 010 - symlink_id = 012 - tree_id = 004 - - - def __init__(self, repo, sha, mode=0, path=None): - super(Tree, self).__init__(repo, sha, mode, path) - - @classmethod - def _get_intermediate_items(cls, index_object): - if index_object.type == "tree": - return index_object._cache - return tuple() - - - def _set_cache_(self, attr): - if attr == "_cache": - # Set the data when we need it - self._cache = self._get_tree_cache() - else: - super(Tree, self)._set_cache_(attr) - - def _get_tree_cache(self): - """ - Return - list(object_instance, ...) - - ``treeish`` - sha or ref identifying a tree - """ - out = list() - for obj in self._iter_from_data(): - if obj is not None: - out.append(obj) - # END if object was handled - # END for each line from ls-tree - return out - - - def _iter_from_data(self): - """ - Reads the binary non-pretty printed representation of a tree and converts - it into Blob, Tree or Commit objects. - - Note: This method was inspired by the parse_tree method in dulwich. - - Returns - list(IndexObject, ...) - """ - ord_zero = ord('0') - data = self.data - len_data = len(data) - i = 0 - while i < len_data: - mode = 0 - - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later - while data[i] != ' ': - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character - mode = (mode << 3) + (ord(data[i]) - ord_zero) - i += 1 - # END while reading mode - type_id = mode >> 12 - - # byte is space now, skip it - i += 1 - - # parse name, it is NULL separated - - ns = i - while data[i] != '\0': - i += 1 - # END while not reached NULL - name = data[ns:i] - path = join_path(self.path, name) - - # byte is NULL, get next 20 - i += 1 - sha = data[i:i+20] - i = i + 20 - - hexsha = sha_to_hex(sha) - if type_id == self.blob_id or type_id == self.symlink_id: - yield blob.Blob(self.repo, hexsha, mode, path) - elif type_id == self.tree_id: - yield Tree(self.repo, hexsha, mode, path) - elif type_id == self.commit_id: - # submodules - yield None - else: - raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) - # END for each byte in data stream - - - def __div__(self, file): - """ - Find the named object in this tree's contents - - Examples:: - - >>> Repo('/path/to/python-git').tree/'lib' - <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> - >>> Repo('/path/to/python-git').tree/'README.txt' - <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> - - Returns - ``git.Blob`` or ``git.Tree`` - - Raise - KeyError if given file or tree does not exist in tree - """ - msg = "Blob or Tree named %r not found" - if '/' in file: - tree = self - item = self - tokens = file.split('/') - for i,token in enumerate(tokens): - item = tree[token] - if item.type == 'tree': - tree = item - else: - # safety assertion - blobs are at the end of the path - if i != len(tokens)-1: - raise KeyError(msg % file) - return item - # END handle item type - # END for each token of split path - if item == self: - raise KeyError(msg % file) - return item - else: - for obj in self._cache: - if obj.name == file: - return obj - # END for each obj - raise KeyError( msg % file ) - # END handle long paths - - - def __repr__(self): - return '<git.Tree "%s">' % self.sha - - @property - def trees(self): - """ - Returns - list(Tree, ...) list of trees directly below this tree - """ - return [ i for i in self if i.type == "tree" ] - - @property - def blobs(self): - """ - Returns - list(Blob, ...) list of blobs directly below this tree - """ - return [ i for i in self if i.type == "blob" ] - - - def traverse( self, predicate = lambda i,d: True, - prune = lambda i,d: False, depth = -1, branch_first=True, - visit_once = False, ignore_self=1 ): - """For documentation, see utils.Traversable.traverse - - Trees are set to visit_once = False to gain more performance in the traversal""" - return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) - - # List protocol - def __getslice__(self,i,j): - return self._cache[i:j] - - def __iter__(self): - return iter(self._cache) - - def __len__(self): - return len(self._cache) - - def __getitem__(self,item): - if isinstance(item, int): - return self._cache[item] - - if isinstance(item, basestring): - # compatability - return self.__div__(item) - # END index is basestring - - raise TypeError( "Invalid index type: %r" % item ) - - - def __contains__(self,item): - if isinstance(item, base.IndexObject): - return item in self._cache - - # compatability - for obj in self._cache: - if item == obj.path: - return True - # END for each item - return False - - def __reversed__(self): - return reversed(self._cache) + """Takes a string and returns the hex of the sha within""" + hexsha = binascii.hexlify(sha) + return hexsha + + +class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializable): + """ + Tress represent a ordered list of Blobs and other Trees. Hence it can be + accessed like a list. + + Tree's will cache their contents after first retrieval to improve efficiency. + + ``Tree as a list``:: + + Access a specific blob using the + tree['filename'] notation. + + You may as well access by index + blob = tree[0] + + + """ + + type = "tree" + __slots__ = "_cache" + + # using ascii codes for comparison + commit_id = 016 + blob_id = 010 + symlink_id = 012 + tree_id = 004 + + + def __init__(self, repo, sha, mode=0, path=None): + super(Tree, self).__init__(repo, sha, mode, path) + + @classmethod + def _get_intermediate_items(cls, index_object): + if index_object.type == "tree": + return index_object._cache + return tuple() + + + def _set_cache_(self, attr): + if attr == "_cache": + # Set the data when we need it + self._cache = self._get_tree_cache() + else: + super(Tree, self)._set_cache_(attr) + + def _get_tree_cache(self, data=None): + """ :return: list(object_instance, ...) + :param data: if not None, a byte string representing the tree data + If None, self.data will be used instead""" + out = list() + if data is None: + data = self.data + for obj in self._iter_from_data(data): + if obj is not None: + out.append(obj) + # END if object was handled + # END for each line from ls-tree + return out + + + def _iter_from_data(self, data): + """ + Reads the binary non-pretty printed representation of a tree and converts + it into Blob, Tree or Commit objects. + + Note: This method was inspired by the parse_tree method in dulwich. + + Returns + list(IndexObject, ...) + """ + ord_zero = ord('0') + len_data = len(data) + i = 0 + while i < len_data: + mode = 0 + + # read mode + # Some git versions truncate the leading 0, some don't + # The type will be extracted from the mode later + while data[i] != ' ': + # move existing mode integer up one level being 3 bits + # and add the actual ordinal value of the character + mode = (mode << 3) + (ord(data[i]) - ord_zero) + i += 1 + # END while reading mode + type_id = mode >> 12 + + # byte is space now, skip it + i += 1 + + # parse name, it is NULL separated + + ns = i + while data[i] != '\0': + i += 1 + # END while not reached NULL + name = data[ns:i] + path = join_path(self.path, name) + + # byte is NULL, get next 20 + i += 1 + sha = data[i:i+20] + i = i + 20 + + hexsha = sha_to_hex(sha) + if type_id == self.blob_id or type_id == self.symlink_id: + yield Blob(self.repo, hexsha, mode, path) + elif type_id == self.tree_id: + yield Tree(self.repo, hexsha, mode, path) + elif type_id == self.commit_id: + yield Submodule(self.repo, hexsha, mode, path) + else: + raise TypeError( "Unknown type found in tree data %i for path '%s'" % (type_id, path)) + # END for each byte in data stream + + + def __div__(self, file): + """ + Find the named object in this tree's contents + + Examples:: + + >>> Repo('/path/to/python-git').tree/'lib' + <git.Tree "6cc23ee138be09ff8c28b07162720018b244e95e"> + >>> Repo('/path/to/python-git').tree/'README.txt' + <git.Blob "8b1e02c0fb554eed2ce2ef737a68bb369d7527df"> + + Returns + ``git.Blob`` or ``git.Tree`` + + Raise + KeyError if given file or tree does not exist in tree + """ + msg = "Blob or Tree named %r not found" + if '/' in file: + tree = self + item = self + tokens = file.split('/') + for i,token in enumerate(tokens): + item = tree[token] + if item.type == 'tree': + tree = item + else: + # safety assertion - blobs are at the end of the path + if i != len(tokens)-1: + raise KeyError(msg % file) + return item + # END handle item type + # END for each token of split path + if item == self: + raise KeyError(msg % file) + return item + else: + for obj in self._cache: + if obj.name == file: + return obj + # END for each obj + raise KeyError( msg % file ) + # END handle long paths + + + def __repr__(self): + return '<git.Tree "%s">' % self.sha + + @property + def trees(self): + """ + Returns + list(Tree, ...) list of trees directly below this tree + """ + return [ i for i in self if i.type == "tree" ] + + @property + def blobs(self): + """ + Returns + list(Blob, ...) list of blobs directly below this tree + """ + return [ i for i in self if i.type == "blob" ] + + + def traverse( self, predicate = lambda i,d: True, + prune = lambda i,d: False, depth = -1, branch_first=True, + visit_once = False, ignore_self=1 ): + """For documentation, see utils.Traversable.traverse + + Trees are set to visit_once = False to gain more performance in the traversal""" + return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + + # List protocol + def __getslice__(self,i,j): + return self._cache[i:j] + + def __iter__(self): + return iter(self._cache) + + def __len__(self): + return len(self._cache) + + def __getitem__(self,item): + if isinstance(item, int): + return self._cache[item] + + if isinstance(item, basestring): + # compatability + return self.__div__(item) + # END index is basestring + + raise TypeError( "Invalid index type: %r" % item ) + + + def __contains__(self,item): + if isinstance(item, base.IndexObject): + return item in self._cache + + # compatability + for obj in self._cache: + if item == obj.path: + return True + # END for each item + return False + + def __reversed__(self): + return reversed(self._cache) + + def _serialize(self, stream, presort=False): + """Serialize this tree into the stream. Please note that we will assume + our tree data to be in a sorted state. If this is not the case, set the + presort flag True + :param presort: if True, default False, sort our tree information before + writing it to the stream. This should be done if the cache changed + in the meanwhile""" + ord_zero = ord('0') + bit_mask = 7 # 3 bits set + hex_to_bin = binascii.a2b_hex + + for item in self._cache: + mode = '' + mb = item.mode + for i in xrange(6): + mode = chr(((mb >> (i*3)) & bit_mask) + ord_zero) + mode + # END for each 8 octal value + # git slices away the first octal if its zero + if mode[0] == '0': + mode = mode[1:] + # END save a byte + + # note: the cache currently contains repo-relative paths, not + # tree-relative ones. Maybe the cache should only contain + # actual tuples, which are converted to objects later + # TODO: do it so + stream.write("%s %s\0%s" % (mode, os.path.basename(item.path), hex_to_bin(item.sha))) + # END for each item + return self + + def _deserialize(self, stream): + self._cache = self._get_tree_cache(stream.read()) + return self |