summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-22 21:23:47 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-22 21:23:47 +0200
commitbe97c4558992a437cde235aafc7ae2bd6df84ac8 (patch)
tree3e44a7c38e356817ca81721725709d7374f95012
parent778234d544b3f58dd415aaf10679d15b01a5281f (diff)
downloadgitpython-be97c4558992a437cde235aafc7ae2bd6df84ac8.tar.gz
Initial frame for implementing read_tree using pure python. As git-read-tree can do much more than we can ( and faster assumably ), the .new method is used to create new index instances from up to 3 trees.
Implemented multi-tree traversal to facilitate building a stage list more efficiently ( although I am not sure whether it could be faster to use a dictionary together with some intensive lookup ), including test Added performance to learn how fast certain operations are, and whether one should be preferred over another
-rw-r--r--lib/git/index/base.py24
-rw-r--r--lib/git/index/fun.py29
-rw-r--r--lib/git/objects/fun.py118
-rw-r--r--lib/git/repo.py2
-rw-r--r--test/git/performance/test_utils.py94
-rw-r--r--test/git/test_fun.py70
-rw-r--r--test/git/test_index.py6
-rw-r--r--test/git/test_tree.py5
8 files changed, 339 insertions, 9 deletions
diff --git a/lib/git/index/base.py b/lib/git/index/base.py
index 06437702..f1be00e0 100644
--- a/lib/git/index/base.py
+++ b/lib/git/index/base.py
@@ -133,7 +133,6 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
def _index_path(self):
return join_path_native(self.repo.git_dir, "index")
-
@property
def path(self):
""" :return: Path to the index file we are representing """
@@ -241,6 +240,26 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
return self
@classmethod
+ def new(cls, repo, *tree_sha):
+ """ Merge the given treeish revisions into a new index which is returned.
+ This method behaves like git-read-tree --aggressive when doing the merge.
+
+ :param repo: The repository treeish are located in.
+
+ :param *tree_sha:
+ see ``from_tree``
+
+ :return:
+ New IndexFile instance. Its path will be undefined.
+ If you intend to write such a merged Index, supply an alternate file_path
+ to its 'write' method."""
+ base_entries = aggressive_tree_merge(repo.odb, tree_sha)
+
+ inst = cls(self.repo)
+ raise NotImplementedError("convert to entries")
+
+
+ @classmethod
def from_tree(cls, repo, *treeish, **kwargs):
"""
Merge the given treeish revisions into a new index which is returned.
@@ -275,8 +294,7 @@ class IndexFile(LazyMixin, diff.Diffable, Serializable):
As the underlying git-read-tree command takes into account the current index,
it will be temporarily moved out of the way to assure there are no unsuspected
- interferences.
- """
+ interferences."""
if len(treeish) == 0 or len(treeish) > 3:
raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish))
diff --git a/lib/git/index/fun.py b/lib/git/index/fun.py
index 9f877a66..962e139a 100644
--- a/lib/git/index/fun.py
+++ b/lib/git/index/fun.py
@@ -12,8 +12,10 @@ from git.utils import (
)
from typ import (
+ BaseIndexEntry,
IndexEntry,
- CE_NAMEMASK
+ CE_NAMEMASK,
+ CE_STAGESHIFT
)
from util import (
@@ -23,7 +25,6 @@ from util import (
from gitdb.base import IStream
from gitdb.typ import str_tree_type
-from binascii import a2b_hex
__all__ = ('write_cache', 'read_cache', 'write_tree_from_cache', 'entry_key' )
@@ -150,6 +151,7 @@ def write_tree_from_cache(entries, odb, sl, si=0):
:return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of
tree entries being a tuple of hexsha, mode, name"""
tree_items = list()
+ tree_items_append = tree_items.append
ci = sl.start
end = sl.stop
while ci < end:
@@ -161,7 +163,7 @@ def write_tree_from_cache(entries, odb, sl, si=0):
rbound = entry.path.find('/', si)
if rbound == -1:
# its not a tree
- tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
+ tree_items_append((entry.binsha, entry.mode, entry.path[si:]))
else:
# find common base range
base = entry.path[si:rbound]
@@ -178,7 +180,7 @@ def write_tree_from_cache(entries, odb, sl, si=0):
# enter recursion
# ci - 1 as we want to count our current item as well
sha, tree_entry_list = write_tree_from_cache(entries, odb, slice(ci-1, xi), rbound+1)
- tree_items.append((sha, S_IFDIR, base))
+ tree_items_append((sha, S_IFDIR, base))
# skip ahead
ci = xi
@@ -193,5 +195,24 @@ def write_tree_from_cache(entries, odb, sl, si=0):
istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
return (istream.binsha, tree_items)
+def _tree_entry_to_baseindexentry(tree_entry, stage):
+ return BaseIndexEntry(tree_entry[1], tree_entry[0], stage <<CE_STAGESHIFT, tree_entry[2])
+def aggressive_tree_merge(odb, tree_shas):
+ """
+ :return: list of BaseIndexEntries representing the aggressive merge of the given
+ trees. All valid entries are on stage 0, whereas the conflicting ones are left
+ on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree,
+ 2 to our tree and 3 to 'their' tree.
+ :param tree_shas: 1, 2 or 3 trees as identified by their shas"""
+ out = list()
+ out_append = out.append
+ if len(tree_shas) == 1:
+ for entry in traverse_tree_recursive(odb, tree_shas[0]):
+ out_append(_tree_entry_to_baseindexentry(entry, 0))
+ # END for each entry
+ else:
+ raise ValueError("Cannot handle %i trees at once" % len(tree_shas))
+ # END handle tree shas
+ return out
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
index 7882437d..d21a7dad 100644
--- a/lib/git/objects/fun.py
+++ b/lib/git/objects/fun.py
@@ -2,6 +2,9 @@
__all__ = ('tree_to_stream', 'tree_entries_from_data')
+from stat import S_ISDIR
+
+
def tree_to_stream(entries, write):
"""Write the give list of entries into a stream using its write method
:param entries: **sorted** list of tuples with (binsha, mode, name)
@@ -64,3 +67,118 @@ def tree_entries_from_data(data):
out.append((sha, mode, name))
# END for each byte in data stream
return out
+
+
+def _find_by_name(tree_data, name, is_dir, start_at):
+ """return data entry matching the given name and tree mode
+ or None.
+ Before the item is returned, the respective data item is set
+ None in the tree_data list to mark it done"""
+ try:
+ item = tree_data[start_at]
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[start_at] = None
+ return item
+ except IndexError:
+ pass
+ # END exception handling
+ for index, item in enumerate(tree_data):
+ if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+ tree_data[index] = None
+ return item
+ # END if item matches
+ # END for each item
+ return None
+
+def _to_full_path(item, path_prefix):
+ """Rebuild entry with given path prefix"""
+ if not item:
+ return item
+ return (item[0], item[1], path_prefix+item[2])
+
+def traverse_trees_recursive(odb, tree_shas, path_prefix):
+ """
+ :return: list with entries according to the given tree-shas.
+ The result is encoded in a list
+ of n tuple|None per blob/commit, (n == len(tree_shas)), where
+ * [0] == 20 byte sha
+ * [1] == mode as int
+ * [2] == path relative to working tree root
+ The entry tuple is None if the respective blob/commit did not
+ exist in the given tree.
+ :param tree_shas: iterable of shas pointing to trees. All trees must
+ be on the same level. A tree-sha may be None in which case None
+ :param path_prefix: a prefix to be added to the returned paths on this level,
+ set it '' for the first iteration
+ :note: The ordering of the returned items will be partially lost"""
+ trees_data = list()
+ nt = len(tree_shas)
+ for tree_sha in tree_shas:
+ if tree_sha is None:
+ data = list()
+ else:
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+ # END handle muted trees
+ trees_data.append(data)
+ # END for each sha to get data for
+
+ out = list()
+ out_append = out.append
+
+ # find all matching entries and recursively process them together if the match
+ # is a tree. If the match is a non-tree item, put it into the result.
+ # Processed items will be set None
+ for ti, tree_data in enumerate(trees_data):
+ for ii, item in enumerate(tree_data):
+ if not item:
+ continue
+ # END skip already done items
+ entries = [ None for n in range(nt) ]
+ entries[ti] = item
+ sha, mode, name = item # its faster to unpack
+ is_dir = S_ISDIR(mode) # type mode bits
+
+ # find this item in all other tree data items
+ # wrap around, but stop one before our current index, hence
+ # ti+nt, not ti+1+nt
+ for tio in range(ti+1, ti+nt):
+ tio = tio % nt
+ entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
+ # END for each other item data
+
+ # if we are a directory, enter recursion
+ if is_dir:
+ out.extend(traverse_trees_recursive(odb, [ei[0] for ei in entries if ei], path_prefix+name+'/'))
+ else:
+ out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
+ # END handle recursion
+
+ # finally mark it done
+ tree_data[ii] = None
+ # END for each item
+
+ # we are done with one tree, set all its data empty
+ del(tree_data[:])
+ # END for each tree_data chunk
+ return out
+
+def traverse_tree_recursive(odb, tree_sha, path_prefix):
+ """
+ :return: list of entries of the tree pointed to by tree_sha. An entry
+ has the following format:
+ * [0] 20 byte sha
+ * [1] mode as int
+ * [2] path relative to the repository
+ :param path_prefix: prefix to prepend to the front of all returned paths"""
+ entries = list()
+ data = tree_entries_from_data(odb.stream(tree_sha).read())
+
+ # unpacking/packing is faster than accessing individual items
+ for sha, mode, name in data:
+ if S_ISDIR(mode):
+ entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
+ else:
+ entries.append((sha, mode, path_prefix+name))
+ # END for each item
+
+ return entries
diff --git a/lib/git/repo.py b/lib/git/repo.py
index 74525403..f97126ea 100644
--- a/lib/git/repo.py
+++ b/lib/git/repo.py
@@ -71,7 +71,7 @@ class Repo(object):
# represents the configuration level of a configuration file
config_level = ("system", "global", "repository")
- def __init__(self, path=None, odbt = GitCmdObjectDB):
+ def __init__(self, path=None, odbt = GitDB):
""" Create a new Repo instance
:param path: is the path to either the root git directory or the bare git repo::
diff --git a/test/git/performance/test_utils.py b/test/git/performance/test_utils.py
index 76adffec..16100f8b 100644
--- a/test/git/performance/test_utils.py
+++ b/test/git/performance/test_utils.py
@@ -57,3 +57,97 @@ class TestUtilPerformance(TestBigRepoR):
na = ni * 3
print >> sys.stderr, "Accessed %s[x] %i times in %s s ( %f acc / s)" % (cls.__name__, na, elapsed, na / elapsed)
# END for each sequence
+
+ def test_instantiation(self):
+ ni = 100000
+ max_num_items = 4
+ for mni in range(max_num_items+1):
+ for cls in (tuple, list):
+ st = time()
+ for i in xrange(ni):
+ if mni == 0:
+ cls()
+ elif mni == 1:
+ cls((1,))
+ elif mni == 2:
+ cls((1,2))
+ elif mni == 3:
+ cls((1,2,3))
+ elif mni == 4:
+ cls((1,2,3,4))
+ else:
+ cls(x for x in xrange(mni))
+ # END handle empty cls
+ # END for each item
+ elapsed = time() - st
+ print >> sys.stderr, "Created %i %ss of size %i in %f s ( %f inst / s)" % (ni, cls.__name__, mni, elapsed, ni / elapsed)
+ # END for each type
+ # END for each item count
+
+ # tuple and tuple direct
+ st = time()
+ for i in xrange(ni):
+ t = (1,2,3,4)
+ # END for each item
+ elapsed = time() - st
+ print >> sys.stderr, "Created %i tuples (1,2,3,4) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed)
+
+ st = time()
+ for i in xrange(ni):
+ t = tuple((1,2,3,4))
+ # END for each item
+ elapsed = time() - st
+ print >> sys.stderr, "Created %i tuples tuple((1,2,3,4)) in %f s ( %f tuples / s)" % (ni, elapsed, ni / elapsed)
+
+ def test_unpacking_vs_indexing(self):
+ ni = 1000000
+ list_items = [1,2,3,4]
+ tuple_items = (1,2,3,4)
+
+ for sequence in (list_items, tuple_items):
+ st = time()
+ for i in xrange(ni):
+ one, two, three, four = sequence
+ # END for eac iteration
+ elapsed = time() - st
+ print >> sys.stderr, "Unpacked %i %ss of size %i in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
+
+ st = time()
+ for i in xrange(ni):
+ one, two, three, four = sequence[0], sequence[1], sequence[2], sequence[3]
+ # END for eac iteration
+ elapsed = time() - st
+ print >> sys.stderr, "Unpacked %i %ss of size %i individually in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
+
+ st = time()
+ for i in xrange(ni):
+ one, two = sequence[0], sequence[1]
+ # END for eac iteration
+ elapsed = time() - st
+ print >> sys.stderr, "Unpacked %i %ss of size %i individually (2 of 4) in %f s ( %f acc / s)" % (ni, type(sequence).__name__, len(sequence), elapsed, ni / elapsed)
+ # END for each sequence
+
+ def test_large_list_vs_iteration(self):
+ # what costs more: alloc/realloc of lists, or the cpu strain of iterators ?
+ def slow_iter(ni):
+ for i in xrange(ni):
+ yield i
+ # END slow iter - be closer to the real world
+
+ # alloc doesn't play a role here it seems
+ for ni in (500, 1000, 10000, 20000, 40000):
+ st = time()
+ for i in list(xrange(ni)):
+ i
+ # END for each item
+ elapsed = time() - st
+ print >> sys.stderr, "Iterated %i items from list in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed)
+
+ st = time()
+ for i in slow_iter(ni):
+ i
+ # END for each item
+ elapsed = time() - st
+ print >> sys.stderr, "Iterated %i items from iterator in %f s ( %f acc / s)" % (ni, elapsed, ni / elapsed)
+ # END for each number of iterations
+
diff --git a/test/git/test_fun.py b/test/git/test_fun.py
new file mode 100644
index 00000000..ccf15c77
--- /dev/null
+++ b/test/git/test_fun.py
@@ -0,0 +1,70 @@
+from test.testlib import *
+from git.objects.fun import (
+ traverse_tree_recursive,
+ traverse_trees_recursive
+ )
+
+from git.index.fun import (
+ aggressive_tree_merge
+ )
+
+class TestFun(TestBase):
+
+ def test_aggressive_tree_merge(self):
+ # head tree with additions, removals and modification compared to its predecessor
+ HC = self.rorepo.commit("6c1faef799095f3990e9970bc2cb10aa0221cf9c")
+ H = HC.tree
+ B = HC.parents[0].tree
+
+ # test new index from single tree
+
+ def _assert_entries(self, entries, num_trees):
+ assert len(entries[0]) == num_trees
+ for entry in entries:
+ paths = set(e[2] for e in entry if e)
+
+ # only one path per set of entries
+ assert len(paths) == 1
+ # END verify entry
+
+ def test_tree_traversal(self):
+ # low level tree tarversal
+ odb = self.rorepo.odb
+ H = self.rorepo.tree('29eb123beb1c55e5db4aa652d843adccbd09ae18') # head tree
+ M = self.rorepo.tree('e14e3f143e7260de9581aee27e5a9b2645db72de') # merge tree
+ B = self.rorepo.tree('f606937a7a21237c866efafcad33675e6539c103') # base tree
+ B_old = self.rorepo.tree('1f66cfbbce58b4b552b041707a12d437cc5f400a') # old base tree
+
+ # two very different trees
+ entries = traverse_trees_recursive(odb, [B_old.sha, H.sha], '')
+ self._assert_entries(entries, 2)
+
+ oentries = traverse_trees_recursive(odb, [H.sha, B_old.sha], '')
+ assert len(oentries) == len(entries)
+ self._assert_entries(oentries, 2)
+
+ # single tree
+ is_no_tree = lambda i, d: i.type != 'tree'
+ entries = traverse_trees_recursive(odb, [B.sha], '')
+ assert len(entries) == len(list(B.traverse(predicate=is_no_tree)))
+ self._assert_entries(entries, 1)
+
+ # two trees
+ entries = traverse_trees_recursive(odb, [B.sha, H.sha], '')
+ self._assert_entries(entries, 2)
+
+ # tree trees
+ entries = traverse_trees_recursive(odb, [B.sha, H.sha, M.sha], '')
+ self._assert_entries(entries, 3)
+
+ def test_tree_traversal_single(self):
+ max_count = 50
+ count = 0
+ odb = self.rorepo.odb
+ for commit in self.rorepo.commit("29eb123beb1c55e5db4aa652d843adccbd09ae18").traverse():
+ if count >= max_count:
+ break
+ count += 1
+ entries = traverse_tree_recursive(odb, commit.tree.sha, '')
+ assert entries
+ # END for each commit
diff --git a/test/git/test_index.py b/test/git/test_index.py
index d0063e89..ae754430 100644
--- a/test/git/test_index.py
+++ b/test/git/test_index.py
@@ -580,7 +580,7 @@ class TestIndex(TestBase):
# write all trees and compare them
# its important to have a few submodules in there too
- max_count = 100
+ max_count = 25
count = 0
for commit in rw_repo.head.commit.traverse():
if count >= max_count:
@@ -593,3 +593,7 @@ class TestIndex(TestBase):
assert index.write_tree() == orig_tree
# END for each commit
+ def test_index_new(self):
+ self.fail("todo index new")
+
+
diff --git a/test/git/test_tree.py b/test/git/test_tree.py
index d983cb2f..a443bd97 100644
--- a/test/git/test_tree.py
+++ b/test/git/test_tree.py
@@ -7,6 +7,10 @@
import os
from test.testlib import *
from git import *
+from git.objects.fun import (
+ traverse_tree_recursive,
+ traverse_trees_recursive
+ )
from cStringIO import StringIO
class TestTree(TestBase):
@@ -136,3 +140,4 @@ class TestTree(TestBase):
def test_repr(self):
tree = Tree(self.rorepo, 'abc')
assert_equal('<git.Tree "abc">', repr(tree))
+