diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-22 00:05:37 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-22 00:05:37 +0200 |
commit | 69dd8750be1fbf55010a738dc1ced4655e727f23 (patch) | |
tree | 1d49d5ff0218a358d63ae1cab93c56731cc5ea46 /lib/git/index | |
parent | 1044116d25f0311033e0951d2ab30579bba4b051 (diff) | |
download | gitpython-69dd8750be1fbf55010a738dc1ced4655e727f23.tar.gz |
index.write_tree: initial version implemented, although its not yet working correctly, a test to explicitly compare the git version with the python implementation is still missing
Tree and Index internally use 20 byte shas, converting them only as needed to reduce memory footprint and processing time
objects: started own 'fun' module containing the most important tree functions, more are likely to be added soon
Diffstat (limited to 'lib/git/index')
-rw-r--r-- | lib/git/index/base.py | 75 | ||||
-rw-r--r-- | lib/git/index/fun.py | 73 | ||||
-rw-r--r-- | lib/git/index/typ.py | 22 |
3 files changed, 117 insertions, 53 deletions
diff --git a/lib/git/index/base.py b/lib/git/index/base.py index a605c3ec..96a9430c 100644 --- a/lib/git/index/base.py +++ b/lib/git/index/base.py @@ -5,13 +5,13 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php """Module containing Index implementation, allowing to perform all kinds of index manipulations such as querying and merging. """ -import binascii import tempfile import os import sys import subprocess import glob from cStringIO import StringIO +from binascii import b2a_hex from stat import ( S_ISLNK, @@ -25,16 +25,12 @@ from stat import ( from typ import ( BaseIndexEntry, IndexEntry, - CE_NAMEMASK, - CE_STAGESHIFT ) from util import ( TemporaryFileSwap, post_clear_cache, default_index, - pack, - unpack ) import git.objects @@ -60,20 +56,17 @@ from git.utils import ( LockedFD, join_path_native, file_contents_ro, - LockFile - ) - - -from gitdb.base import ( - IStream ) from fun import ( write_cache, read_cache, + write_tree_from_cache, entry_key ) +from gitdb.base import IStream + __all__ = ( 'IndexFile', 'CheckoutError' ) @@ -161,10 +154,15 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) return self - def _serialize(self, stream, ignore_tree_extension_data=False): + def _entries_sorted(self): + """:return: list of entries, in a sorted fashion, first by path, then by stage""" entries_sorted = self.entries.values() - entries_sorted.sort(key=lambda e: (e[3], e.stage)) # use path/stage as sort key - write_cache(entries_sorted, + entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key + return entries_sorted + + def _serialize(self, stream, ignore_tree_extension_data=False): + entries = self._entries_sorted() + write_cache(entries, stream, (ignore_tree_extension_data and None) or self._extension_data) return self @@ -403,7 +401,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # TODO: is it necessary to convert the mode ? We did that when adding # it to the index, right ? mode = self._stat_mode_to_index_mode(entry.mode) - blob = Blob(self.repo, entry.sha, mode, entry.path) + blob = Blob(self.repo, entry.hexsha, mode, entry.path) blob.size = entry.size output = (entry.stage, blob) if predicate(output): @@ -490,33 +488,31 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # allows to lazily reread on demand return self - def _write_tree(self, missing_ok=False): + def write_tree(self): """Writes this index to a corresponding Tree object into the repository's object database and return it. - - :param missing_ok: - If True, missing objects referenced by this index will not result - in an error. - - :return: Tree object representing this index""" + + :return: Tree object representing this index + :note: The tree will be written even if one or more objects the tree refers to + does not yet exist in the object database. This could happen if you added + Entries to the index directly. + :raise ValueError: if there are no entries in the cache + :raise UnmergedEntriesError: """ # we obtain no lock as we just flush our contents to disk as tree if not self.entries: raise ValueError("Cannot write empty index") + # TODO: use memory db, this helps to prevent IO if the resulting tree + # already exists + entries = self._entries_sorted() + binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries))) + # note: additional deserialization could be saved if write_tree_from_cache + # would return sorted tree entries + root_tree = Tree(self.repo, b2a_hex(binsha), path='') + root_tree._cache = tree_items + return root_tree - return Tree(self.repo, tree_sha, 0, '') - - def write_tree(self, missing_ok = False): - index_path = self._index_path() - tmp_index_mover = TemporaryFileSwap(index_path) - - self.write(index_path, ignore_tree_extension_data=True) - tree_sha = self.repo.git.write_tree(missing_ok=missing_ok) - - del(tmp_index_mover) # as soon as possible - return Tree(self.repo, tree_sha, 0, '') - def _process_diff_args(self, args): try: args.pop(args.index(self)) @@ -525,7 +521,6 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # END remove self return args - def _to_relative_path(self, path): """:return: Version of path relative to our git directory or raise ValueError if it is not within our git direcotory""" @@ -599,7 +594,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): - BaseIndexEntry or type Handling equals the one of Blob objects, but the stage may be - explicitly set. + explicitly set. Please note that Index Entries require binary sha's. :param force: If True, otherwise ignored or excluded files will be @@ -666,7 +661,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): fprogress(filepath, True, filepath) return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode), - istream.sha, 0, filepath)) + istream.binsha, 0, filepath)) # END utility method @@ -691,14 +686,14 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # HANLDE ENTRY OBJECT CREATION # create objects if required, otherwise go with the existing shas - null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ] + null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ] if null_entries_indices: for ei in null_entries_indices: null_entry = entries[ei] new_entry = store_path(null_entry.path) # update null entry - entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path)) + entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path)) # END for each entry index # END null_entry handling @@ -707,7 +702,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # all object sha's if path_rewriter: for i,e in enumerate(entries): - entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e))) + entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) # END for each entry # END handle path rewriting diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py index 2e653ea6..557941d5 100644 --- a/lib/git/index/fun.py +++ b/lib/git/index/fun.py @@ -2,6 +2,11 @@ Contains standalone functions to accompany the index implementation and make it more versatile """ +from stat import S_IFDIR +from cStringIO import StringIO + +from git.errors import UnmergedEntriesError +from git.objects.fun import tree_to_stream from git.utils import ( IndexFileSHA1Writer, ) @@ -16,12 +21,11 @@ from util import ( unpack ) -from binascii import ( - hexlify, - unhexlify - ) +from gitdb.base import IStream +from gitdb.typ import str_tree_type +from binascii import a2b_hex -__all__ = ('write_cache', 'read_cache' ) +__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' ) def write_cache_entry(entry, stream): """Write the given entry to the stream""" @@ -34,7 +38,7 @@ def write_cache_entry(entry, stream): assert plen == len(path), "Path %s too long to fit into index" % entry[3] flags = plen | entry[2] write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], - entry[8], entry[9], entry[10], unhexlify(entry[1]), flags)) + entry[8], entry[9], entry[10], entry[1], flags)) write(path) real_size = ((stream.tell() - beginoffset + 8) & ~7) write("\0" * ((beginoffset + real_size) - stream.tell())) @@ -80,7 +84,7 @@ def read_entry(stream): real_size = ((stream.tell() - beginoffset + 8) & ~7) data = stream.read((beginoffset + real_size) - stream.tell()) - return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size)) + return IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) def read_header(stream): """Return tuple(version_long, num_entries) from the given stream""" @@ -136,3 +140,58 @@ def read_cache(stream): return (version, entries, extension_data, content_sha) +def write_tree_from_cache(entries, odb, sl, si=0): + """Create a tree from the given sorted list of entries and put the respective + trees into the given object database + :param entries: **sorted** list of IndexEntries + :param odb: object database to store the trees in + :param si: start index at which we should start creating subtrees + :param sl: slice indicating the range we should process on the entries list + :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of + tree entries being a tuple of hexsha, mode, name""" + tree_items = list() + ci = sl.start + end = sl.stop + while ci < end: + entry = entries[ci] + if entry.stage != 0: + raise UnmergedEntriesError(entry) + # END abort on unmerged + ci += 1 + rbound = entry.path.find('/', si) + if rbound == -1: + # its not a tree + tree_items.append((entry.binsha, entry.mode, entry.path[si:])) + else: + # find common base range + base = entry.path[si:rbound] + xi = ci + while xi < end: + oentry = entries[xi] + xi += 1 + orbound = oentry.path.find('/') + if orbound == -1 or oentry.path[si:orbound] != base: + break + # END abort on base mismatch + # END find common base + + # enter recursion + # ci - 1 as we want to count our current item as well + sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1) + tree_items.append((sha, S_IFDIR, base)) + + # skip ahead + ci = xi + # END handle bounds + # END for each entry + + # finally create the tree + sio = StringIO() + tree_to_stream(tree_items, sio.write) + sio.seek(0) + + istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) + return (istream.binsha, tree_items) + + + diff --git a/lib/git/index/typ.py b/lib/git/index/typ.py index b5dac58a..6ef1d2f2 100644 --- a/lib/git/index/typ.py +++ b/lib/git/index/typ.py @@ -5,6 +5,11 @@ from util import ( unpack ) +from binascii import ( + b2a_hex, + a2b_hex + ) + __all__ = ('BlobFilter', 'BaseIndexEntry', 'IndexEntry') #{ Invariants @@ -50,7 +55,7 @@ class BaseIndexEntry(tuple): use numeric indices for performance reasons. """ def __str__(self): - return "%o %s %i\t%s" % (self.mode, self.sha, self.stage, self.path) + return "%o %s %i\t%s" % (self.mode, self.hexsha, self.stage, self.path) @property def mode(self): @@ -58,9 +63,14 @@ class BaseIndexEntry(tuple): return self[0] @property - def sha(self): - """ hex sha of the blob """ + def binsha(self): + """binary sha of the blob """ return self[1] + + @property + def hexsha(self): + """hex version of our sha""" + return b2a_hex(self[1]) @property def stage(self): @@ -88,7 +98,7 @@ class BaseIndexEntry(tuple): @classmethod def from_blob(cls, blob, stage = 0): """:return: Fully equipped BaseIndexEntry at the given stage""" - return cls((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path)) + return cls((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path)) class IndexEntry(BaseIndexEntry): @@ -145,12 +155,12 @@ class IndexEntry(BaseIndexEntry): :param base: Instance of type BaseIndexEntry""" time = pack(">LL", 0, 0) - return IndexEntry((base.mode, base.sha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) + return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) @classmethod def from_blob(cls, blob, stage = 0): """:return: Minimal entry resembling the given blob object""" time = pack(">LL", 0, 0) - return IndexEntry((blob.mode, blob.sha, stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) + return IndexEntry((blob.mode, a2b_hex(blob.sha), stage << CE_STAGESHIFT, blob.path, time, time, 0, 0, 0, 0, blob.size)) |