# tree.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # # This module is part of GitPython and is released under # the BSD License: http://www.opensource.org/licenses/bsd-license.php import os from blob import Blob from submodule import Submodule import base import binascii import git.diff as diff import utils from git.utils import join_path join = os.path.join def sha_to_hex(sha): """Takes a string and returns the hex of the sha within""" hexsha = binascii.hexlify(sha) return hexsha class TreeModifier(object): """A utility class providing methods to alter the underlying cache in a list-like fashion. Once all adjustments are complete, the _cache, which really is a refernce to the cache of a tree, will be sorted. Assuring it will be in a serializable state""" __slots__ = '_cache' def __init__(self, cache): self._cache = cache def _index_by_name(self, name): """:return: index of an item with name, or -1 if not found""" for i, t in enumerate(self._cache): if t[2] == name: return i # END found item # END for each item in cache return -1 #{ Interface def set_done(self): """Call this method once you are done modifying the tree information. It may be called several times, but be aware that each call will cause a sort operation :return self:""" self._cache.sort(key=lambda t: t[2]) # sort by name return self #} END interface #{ Mutators def add(self, hexsha, mode, name, force=False): """Add the given item to the tree. If an item with the given name already exists, nothing will be done, but a ValueError will be raised if the sha and mode of the existing item do not match the one you add, unless force is True :param hexsha: The 40 byte sha of the item to add :param mode: int representing the stat compatible mode of the item :param force: If True, an item with your name and information will overwrite any existing item with the same name, no matter which information it has :return: self""" if '/' in name: raise ValueError("Name must not contain '/' characters") if len(hexsha) != 40: raise ValueError("Hexsha required, got %r" % hexsha) if (mode >> 12) not in Tree._map_id_to_type: raise ValueError("Invalid object type according to mode %o" % mode) index = self._index_by_name(name) item = (hexsha, mode, name) if index == -1: self._cache.append(item) else: if force: self._cache[index] = item else: ex_item = self._cache[index] if ex_item[0] != hexsha or ex_item[1] != mode: raise ValueError("Item %r existed with different properties" % name) # END handle mismatch # END handle force # END handle name exists return self def add_unchecked(self, hexsha, mode, name): """Add the given item to the tree, its correctness is assumed, which puts the caller into responsibility to assure the input is correct. For more information on the parameters, see ``add``""" self._cache.append((hexsha, mode, name)) def __delitem__(self, name): """Deletes an item with the given name if it exists""" index = self._index_by_name(name) if index > -1: del(self._cache[index]) #} END mutators class Tree(base.IndexObject, diff.Diffable, utils.Traversable, utils.Serializable): """ Tress represent a ordered list of Blobs and other Trees. Hence it can be accessed like a list. Tree's will cache their contents after first retrieval to improve efficiency. ``Tree as a list``:: Access a specific blob using the tree['filename'] notation. You may as well access by index blob = tree[0] """ type = "tree" __slots__ = "_cache" # actual integer ids for comparison commit_id = 016 blob_id = 010 symlink_id = 012 tree_id = 004 _map_id_to_type = { commit_id : Submodule, blob_id : Blob, symlink_id : Blob # tree id added once Tree is defined } def __init__(self, repo, sha, mode=tree_id<<12, path=None): super(Tree, self).__init__(repo, sha, mode, path) @classmethod def _get_intermediate_items(cls, index_object): if index_object.type == "tree": return tuple(index_object._iter_convert_to_object(index_object._cache)) return tuple() def _set_cache_(self, attr): if attr == "_cache": # Set the data when we need it self._cache = self._get_tree_cache(self.data) else: super(Tree, self)._set_cache_(attr) def _get_tree_cache(self, data): """ :return: list(object_instance, ...) :param data: data string containing our serialized information""" return list(self._iter_from_data(data)) def _iter_convert_to_object(self, iterable): """Iterable yields tuples of (hexsha, mode, name), which will be converted to the respective object representation""" for hexsha, mode, name in iterable: path = join(self.path, name) type_id = mode >> 12 try: yield self._map_id_to_type[type_id](self.repo, hexsha, mode, path) except KeyError: raise TypeError( "Unknown type %i found in tree data for path '%s'" % (type_id, path)) # END for each item def _iter_from_data(self, data): """ Reads the binary non-pretty printed representation of a tree and converts it into Blob, Tree or Commit objects. Note: This method was inspired by the parse_tree method in dulwich. :yield: Tuple(hexsha, mode, tree_relative_path) """ ord_zero = ord('0') len_data = len(data) i = 0 while i < len_data: mode = 0 # read mode # Some git versions truncate the leading 0, some don't # The type will be extracted from the mode later while data[i] != ' ': # move existing mode integer up one level being 3 bits # and add the actual ordinal value of the character mode = (mode << 3) + (ord(data[i]) - ord_zero) i += 1 # END while reading mode # byte is space now, skip it i += 1 # parse name, it is NULL separated ns = i while data[i] != '\0': i += 1 # END while not reached NULL name = data[ns:i] # byte is NULL, get next 20 i += 1 sha = data[i:i+20] i = i + 20 yield (sha_to_hex(sha), mode, name) # END for each byte in data stream def __div__(self, file): """ Find the named object in this tree's contents Examples:: >>> Repo('/path/to/python-git').tree/'lib' >>> Repo('/path/to/python-git').tree/'README.txt' Returns ``git.Blob`` or ``git.Tree`` Raise KeyError if given file or tree does not exist in tree """ msg = "Blob or Tree named %r not found" if '/' in file: tree = self item = self tokens = file.split('/') for i,token in enumerate(tokens): item = tree[token] if item.type == 'tree': tree = item else: # safety assertion - blobs are at the end of the path if i != len(tokens)-1: raise KeyError(msg % file) return item # END handle item type # END for each token of split path if item == self: raise KeyError(msg % file) return item else: for info in self._cache: if info[2] == file: # [2] == name return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2])) # END for each obj raise KeyError( msg % file ) # END handle long paths def __repr__(self): return '' % self.sha @property def trees(self): """ Returns list(Tree, ...) list of trees directly below this tree """ return [ i for i in self if i.type == "tree" ] @property def blobs(self): """ Returns list(Blob, ...) list of blobs directly below this tree """ return [ i for i in self if i.type == "blob" ] @property def cache(self): """:return: An object allowing to modify the internal cache. This can be used to change the tree's contents. When done, make sure you call ``set_done`` on the tree modifier, or serialization behaviour will be incorrect. See the ``TreeModifier`` for more information on how to alter the cache""" return TreeModifier(self._cache) def traverse( self, predicate = lambda i,d: True, prune = lambda i,d: False, depth = -1, branch_first=True, visit_once = False, ignore_self=1 ): """For documentation, see utils.Traversable.traverse Trees are set to visit_once = False to gain more performance in the traversal""" return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) # List protocol def __getslice__(self, i, j): return list(self._iter_convert_to_object(self._cache[i:j])) def __iter__(self): return self._iter_convert_to_object(self._cache) def __len__(self): return len(self._cache) def __getitem__(self, item): if isinstance(item, int): info = self._cache[item] return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join(self.path, info[2])) if isinstance(item, basestring): # compatability return self.__div__(item) # END index is basestring raise TypeError( "Invalid index type: %r" % item ) def __contains__(self, item): if isinstance(item, base.IndexObject): for info in self._cache: if item.sha == info[0]: return True # END compare sha # END for each entry # END handle item is index object # compatability # treat item as repo-relative path path = self.path for info in self._cache: if item == join(path, info[2]): return True # END for each item return False def __reversed__(self): return reversed(self._iter_convert_to_object(self._cache)) def _serialize(self, stream, presort=False): """Serialize this tree into the stream. Please note that we will assume our tree data to be in a sorted state. If this is not the case, serialization will not generate a correct tree representation as these are assumed to be sorted by algorithms""" ord_zero = ord('0') bit_mask = 7 # 3 bits set hex_to_bin = binascii.a2b_hex for hexsha, mode, name in self._cache: mode_str = '' for i in xrange(6): mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str # END for each 8 octal value # git slices away the first octal if its zero if mode_str[0] == '0': mode_str = mode_str[1:] # END save a byte stream.write("%s %s\0%s" % (mode_str, name, hex_to_bin(hexsha))) # END for each item return self def _deserialize(self, stream): self._cache = self._get_tree_cache(stream.read()) return self # END tree # finalize map definition Tree._map_id_to_type[Tree.tree_id] = Tree