diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-21 20:16:22 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-21 20:16:22 +0200 |
commit | 1044116d25f0311033e0951d2ab30579bba4b051 (patch) | |
tree | 98832affe9aaf5df77ee11c6a8e5ed2293828e15 | |
parent | 91725f0fc59aa05ef68ab96e9b29009ce84668a5 (diff) | |
download | gitpython-1044116d25f0311033e0951d2ab30579bba4b051.tar.gz |
index: put serialization methods into new 'fun' module, this makes the calls faster as it removes one level of indirection, and makes the main file smaller, improving maintainability
-rw-r--r-- | CHANGES | 1 | ||||
-rw-r--r-- | lib/git/index/base.py | 182 | ||||
-rw-r--r-- | lib/git/index/fun.py | 138 | ||||
-rw-r--r-- | test/git/test_index.py | 12 |
4 files changed, 190 insertions, 143 deletions
@@ -4,6 +4,7 @@ CHANGES 0.3 === * ConcurrentWriteOperation was removed, and replaced by LockedFD + * IndexFile.get_entries_key was renamed to entry_key 0.2 Beta 2 =========== diff --git a/lib/git/index/base.py b/lib/git/index/base.py index b003195c..a605c3ec 100644 --- a/lib/git/index/base.py +++ b/lib/git/index/base.py @@ -59,7 +59,8 @@ from git.utils import ( LazyMixin, LockedFD, join_path_native, - file_contents_ro + file_contents_ro, + LockFile ) @@ -67,6 +68,12 @@ from gitdb.base import ( IStream ) +from fun import ( + write_cache, + read_cache, + entry_key + ) + __all__ = ( 'IndexFile', 'CheckoutError' ) @@ -84,7 +91,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): to facilitate access. You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: - index.entries[index.get_entries_key(index_entry_instance)] = index_entry_instance + index.entries[index.entry_key(index_entry_instance)] = index_entry_instance Otherwise changes to it will be lost when changing the index using its methods. """ __slots__ = ( "repo", "version", "entries", "_extension_data", "_file_path" ) @@ -147,123 +154,34 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): pass # END exception handling - @classmethod - def _read_entry(cls, stream): - """Return: One entry of the given stream""" - beginoffset = stream.tell() - ctime = unpack(">8s", stream.read(8))[0] - mtime = unpack(">8s", stream.read(8))[0] - (dev, ino, mode, uid, gid, size, sha, flags) = \ - unpack(">LLLLLL20sH", stream.read(20 + 4 * 6 + 2)) - path_size = flags & CE_NAMEMASK - path = stream.read(path_size) - - real_size = ((stream.tell() - beginoffset + 8) & ~7) - data = stream.read((beginoffset + real_size) - stream.tell()) - return IndexEntry((mode, binascii.hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size)) - - @classmethod - def _read_header(cls, stream): - """Return tuple(version_long, num_entries) from the given stream""" - type_id = stream.read(4) - if type_id != "DIRC": - raise AssertionError("Invalid index file header: %r" % type_id) - version, num_entries = unpack(">LL", stream.read(4 * 2)) - assert version in (1, 2) - return version, num_entries - #{ Serializable Interface def _deserialize(self, stream): """ Initialize this instance with index values read from the given stream """ - self.version, num_entries = self._read_header(stream) - count = 0 - self.entries = dict() - while count < num_entries: - entry = self._read_entry(stream) - self.entries[self.get_entries_key(entry)] = entry - count += 1 - # END for each entry - - # the footer contains extension data and a sha on the content so far - # Keep the extension footer,and verify we have a sha in the end - # Extension data format is: - # 4 bytes ID - # 4 bytes length of chunk - # repeated 0 - N times - self._extension_data = stream.read(~0) - assert len(self._extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(self._extension_data) - - content_sha = self._extension_data[-20:] - - # truncate the sha in the end as we will dynamically create it anyway - self._extension_data = self._extension_data[:-20] - + self.version, self.entries, self._extension_data, conten_sha = read_cache(stream) return self def _serialize(self, stream, ignore_tree_extension_data=False): - - # wrap the stream into a compatible writer - stream = IndexFileSHA1Writer(stream) - - # header - stream.write("DIRC") - stream.write(pack(">LL", self.version, len(self.entries))) - - # body entries_sorted = self.entries.values() entries_sorted.sort(key=lambda e: (e[3], e.stage)) # use path/stage as sort key - for entry in entries_sorted: - self._write_cache_entry(stream, entry) - # END for each entry - - stored_ext_data = None - if ignore_tree_extension_data and self._extension_data and self._extension_data[:4] == 'TREE': - stored_ext_data = self._extension_data - self._extension_data = '' - # END extension data special handling - - # write previously cached extensions data - stream.write(self._extension_data) - - if stored_ext_data: - self._extension_data = stored_ext_data - # END reset previous ext data - - # write the sha over the content - stream.write_sha() + write_cache(entries_sorted, + stream, + (ignore_tree_extension_data and None) or self._extension_data) return self - + + #} END serializable interface - @classmethod - def _write_cache_entry(cls, stream, entry): - """ Write an IndexEntry to a stream """ - beginoffset = stream.tell() - write = stream.write - write(entry[4]) # ctime - write(entry[5]) # mtime - path = entry[3] - plen = len(path) & CE_NAMEMASK # path length - assert plen == len(path), "Path %s too long to fit into index" % entry[3] - flags = plen | entry[2] - write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], - entry[8], entry[9], entry[10], binascii.unhexlify(entry[1]), flags)) - write(path) - real_size = ((stream.tell() - beginoffset + 8) & ~7) - write("\0" * ((beginoffset + real_size) - stream.tell())) - def write(self, file_path = None, ignore_tree_extension_data=False): - """ - Write the current state to our file path or to the given one + """Write the current state to our file path or to the given one - ``file_path`` + :param file_path: If None, we will write to our stored file path from which we have been initialized. Otherwise we write to the given file path. Please note that this will change the file_path of this index to the one you gave. - ``ignore_tree_extension_data`` + :param ignore_tree_extension_data: If True, the TREE type extension data read in the index will not be written to disk. Use this if you have altered the index and would like to use git-write-tree afterwards to create a tree @@ -273,12 +191,10 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): Alternatively, use IndexFile.write_tree() to handle this case automatically - Returns - self - """ + :return: self""" lfd = LockedFD(file_path or self._file_path) stream = lfd.open(write=True, stream=True) - + self._serialize(stream, ignore_tree_extension_data) lfd.commit() @@ -516,19 +432,8 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): return path_map @classmethod - def get_entries_key(cls, *entry): - """ - Returns - Key suitable to be used for the index.entries dictionary - - ``entry`` - One instance of type BaseIndexEntry or the path and the stage - """ - if len(entry) == 1: - return (entry[0].path, entry[0].stage) - else: - return tuple(entry) - + def entry_key(cls, *entry): + return entry_key(*entry) def resolve_blobs(self, iter_blobs): """ @@ -585,26 +490,31 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): # allows to lazily reread on demand return self - def write_tree(self, missing_ok=False): - """ - Writes the Index in self to a corresponding Tree file into the repository - object database and returns it as corresponding Tree object. + def _write_tree(self, missing_ok=False): + """Writes this index to a corresponding Tree object into the repository's + object database and return it. - ``missing_ok`` + :param missing_ok: If True, missing objects referenced by this index will not result in an error. - Returns - Tree object representing this index - """ + :return: Tree object representing this index""" + # we obtain no lock as we just flush our contents to disk as tree + if not self.entries: + raise ValueError("Cannot write empty index") + + + + return Tree(self.repo, tree_sha, 0, '') + + def write_tree(self, missing_ok = False): index_path = self._index_path() tmp_index_mover = TemporaryFileSwap(index_path) - + self.write(index_path, ignore_tree_extension_data=True) tree_sha = self.repo.git.write_tree(missing_ok=missing_ok) - - del(tmp_index_mover) # as soon as possible - + + del(tmp_index_mover) # as soon as possible return Tree(self.repo, tree_sha, 0, '') def _process_diff_args(self, args): @@ -837,11 +747,10 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): @post_clear_cache @default_index def remove(self, items, working_tree=False, **kwargs): - """ - Remove the given items from the index and optionally from + """Remove the given items from the index and optionally from the working tree as well. - ``items`` + :param items: Multiple types of items are supported which may be be freely mixed. - path string @@ -859,21 +768,20 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable): - BaseIndexEntry or compatible type The only relevant information here Yis the path. The stage is ignored. - ``working_tree`` + :param working_tree: If True, the entry will also be removed from the working tree, physically removing the respective file. This may fail if there are uncommited changes in it. - ``**kwargs`` + :param **kwargs: Additional keyword arguments to be passed to git-rm, such as 'r' to allow recurive removal of - Returns + :return: List(path_string, ...) list of repository relative paths that have been removed effectively. This is interesting to know in case you have provided a directory or - globs. Paths are relative to the repository. - """ + globs. Paths are relative to the repository. """ args = list() if not working_tree: args.append("--cached") diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py new file mode 100644 index 00000000..2e653ea6 --- /dev/null +++ b/lib/git/index/fun.py @@ -0,0 +1,138 @@ +""" +Contains standalone functions to accompany the index implementation and make it +more versatile +""" +from git.utils import ( + IndexFileSHA1Writer, + ) + +from typ import ( + IndexEntry, + CE_NAMEMASK + ) + +from util import ( + pack, + unpack + ) + +from binascii import ( + hexlify, + unhexlify + ) + +__all__ = ('write_cache', 'read_cache' ) + +def write_cache_entry(entry, stream): + """Write the given entry to the stream""" + beginoffset = stream.tell() + write = stream.write + write(entry[4]) # ctime + write(entry[5]) # mtime + path = entry[3] + plen = len(path) & CE_NAMEMASK # path length + assert plen == len(path), "Path %s too long to fit into index" % entry[3] + flags = plen | entry[2] + write(pack(">LLLLLL20sH", entry[6], entry[7], entry[0], + entry[8], entry[9], entry[10], unhexlify(entry[1]), flags)) + write(path) + real_size = ((stream.tell() - beginoffset + 8) & ~7) + write("\0" * ((beginoffset + real_size) - stream.tell())) + +def write_cache(entries, stream, extension_data=None, ShaStreamCls=IndexFileSHA1Writer): + """Write the cache represented by entries to a stream + :param entries: **sorted** list of entries + :param stream: stream to wrap into the AdapterStreamCls - it is used for + final output. + :param ShaStreamCls: Type to use when writing to the stream. It produces a sha + while writing to it, before the data is passed on to the wrapped stream + :param extension_data: any kind of data to write as a trailer, it must begin + a 4 byte identifier, followed by its size ( 4 bytes )""" + # wrap the stream into a compatible writer + stream = ShaStreamCls(stream) + + # header + version = 2 + stream.write("DIRC") + stream.write(pack(">LL", version, len(entries))) + + # body + for entry in entries: + write_cache_entry(entry, stream) + # END for each entry + + # write previously cached extensions data + if extension_data is not None: + stream.write(extension_data) + + # write the sha over the content + stream.write_sha() + +def read_entry(stream): + """Return: One entry of the given stream""" + beginoffset = stream.tell() + ctime = unpack(">8s", stream.read(8))[0] + mtime = unpack(">8s", stream.read(8))[0] + (dev, ino, mode, uid, gid, size, sha, flags) = \ + unpack(">LLLLLL20sH", stream.read(20 + 4 * 6 + 2)) + path_size = flags & CE_NAMEMASK + path = stream.read(path_size) + + real_size = ((stream.tell() - beginoffset + 8) & ~7) + data = stream.read((beginoffset + real_size) - stream.tell()) + return IndexEntry((mode, hexlify(sha), flags, path, ctime, mtime, dev, ino, uid, gid, size)) + +def read_header(stream): + """Return tuple(version_long, num_entries) from the given stream""" + type_id = stream.read(4) + if type_id != "DIRC": + raise AssertionError("Invalid index file header: %r" % type_id) + version, num_entries = unpack(">LL", stream.read(4 * 2)) + + # TODO: handle version 3: extended data, see read-cache.c + assert version in (1, 2) + return version, num_entries + +def entry_key(*entry): + """:return: Key suitable to be used for the index.entries dictionary + :param *entry: One instance of type BaseIndexEntry or the path and the stage""" + if len(entry) == 1: + return (entry[0].path, entry[0].stage) + else: + return tuple(entry) + # END handle entry + +def read_cache(stream): + """Read a cache file from the given stream + :return: tuple(version, entries_dict, extension_data, content_sha) + * version is the integer version number + * entries dict is a dictionary which maps IndexEntry instances to a path + at a stage + * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes + * content_sha is a 20 byte sha on all cache file contents""" + version, num_entries = read_header(stream) + count = 0 + entries = dict() + while count < num_entries: + entry = read_entry(stream) + # entry_key would be the method to use, but we safe the effort + entries[(entry.path, entry.stage)] = entry + count += 1 + # END for each entry + + # the footer contains extension data and a sha on the content so far + # Keep the extension footer,and verify we have a sha in the end + # Extension data format is: + # 4 bytes ID + # 4 bytes length of chunk + # repeated 0 - N times + extension_data = stream.read(~0) + assert len(extension_data) > 19, "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) + + content_sha = extension_data[-20:] + + # truncate the sha in the end as we will dynamically create it anyway + extension_data = extension_data[:-20] + + return (version, entries, extension_data, content_sha) + diff --git a/test/git/test_index.py b/test/git/test_index.py index cbe1f982..21d610db 100644 --- a/test/git/test_index.py +++ b/test/git/test_index.py @@ -155,7 +155,7 @@ class TestIndex(TestBase): # current index is at the (virtual) cur_commit next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" parent_commit = rw_repo.head.commit.parents[0] - manifest_key = IndexFile.get_entries_key('MANIFEST.in', 0) + manifest_key = IndexFile.entry_key('MANIFEST.in', 0) manifest_entry = rw_repo.index.entries[manifest_key] rw_repo.index.merge_tree(next_commit) # only one change should be recorded @@ -464,7 +464,7 @@ class TestIndex(TestBase): entries = index.reset(new_commit).add([link_file], fprogress=self._fprogress_add) self._assert_fprogress(entries) assert len(entries) == 1 and S_ISLNK(entries[0].mode) - assert S_ISLNK(index.entries[index.get_entries_key("my_real_symlink", 0)].mode) + assert S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode) # we expect only the target to be written assert index.repo.odb.stream(entries[0].sha).read() == target @@ -482,7 +482,7 @@ class TestIndex(TestBase): # assure this also works with an alternate method full_index_entry = IndexEntry.from_base(BaseIndexEntry((0120000, entries[0].sha, 0, entries[0].path))) - entry_key = index.get_entries_key(full_index_entry) + entry_key = index.entry_key(full_index_entry) index.reset(new_commit) assert entry_key not in index.entries @@ -552,8 +552,8 @@ class TestIndex(TestBase): # two existing ones, one new one yield 'CHANGES' yield 'ez_setup.py' - yield index.entries[index.get_entries_key('README', 0)] - yield index.entries[index.get_entries_key('.gitignore', 0)] + yield index.entries[index.entry_key('README', 0)] + yield index.entries[index.entry_key('.gitignore', 0)] for fid in range(3): fname = 'newfile%i' % fid @@ -565,5 +565,5 @@ class TestIndex(TestBase): index.add(paths, path_rewriter=rewriter) for filenum in range(len(paths)): - assert index.get_entries_key(str(filenum), 0) in index.entries + assert index.entry_key(str(filenum), 0) in index.entries |