Initial frame for implementing read_tree using pure python. As git-read-tree can do much more than we can ( and faster assumably ), the .new method is used to create new index instances from up to 3 trees.

Implemented multi-tree traversal to facilitate building a stage list more efficiently ( although I am not sure whether it could be faster to use a dictionary together with some intensive lookup ), including test Added performance to learn how fast certain operations are, and whether one should be preferred over another
author: Sebastian Thiel <byronimo@gmail.com> 2010-06-22 21:23:47 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2010-06-22 21:23:47 +0200
commit: be97c4558992a437cde235aafc7ae2bd6df84ac8 (patch)
tree: 3e44a7c38e356817ca81721725709d7374f95012 /lib/git/objects/fun.py
parent: 778234d544b3f58dd415aaf10679d15b01a5281f (diff)
download: gitpython-be97c4558992a437cde235aafc7ae2bd6df84ac8.tar.gz
1 files changed, 118 insertions, 0 deletions
diff --git a/lib/git/objects/fun.py b/lib/git/objects/fun.py
index 7882437d..d21a7dad 100644
--- a/lib/git/objects/fun.py
+++ b/lib/git/objects/fun.py
@@ -2,6 +2,9 @@
 
 __all__ = ('tree_to_stream', 'tree_entries_from_data')
 
+from stat import S_ISDIR
+				
+
 def tree_to_stream(entries, write):
 	"""Write the give list of entries into a stream using its write method
 	:param entries: **sorted** list of tuples with (binsha, mode, name)
@@ -64,3 +67,118 @@ def tree_entries_from_data(data):
 		out.append((sha, mode, name))
 	# END for each byte in data stream
 	return out
+	
+	
+def _find_by_name(tree_data, name, is_dir, start_at):
+	"""return data entry matching the given name and tree mode
+	or None.
+	Before the item is returned, the respective data item is set 
+	None in the tree_data list to mark it done"""
+	try:
+		item = tree_data[start_at]
+		if item and  item[2] == name and S_ISDIR(item[1]) == is_dir:
+			tree_data[start_at] = None
+			return item
+	except IndexError:
+		pass
+	# END exception handling
+	for index, item in enumerate(tree_data):
+		if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
+			tree_data[index] = None
+			return item
+		# END if item matches
+	# END for each item
+	return None
+
+def _to_full_path(item, path_prefix):
+	"""Rebuild entry with given path prefix"""
+	if not item:
+		return item
+	return (item[0], item[1], path_prefix+item[2])
+	
+def traverse_trees_recursive(odb, tree_shas, path_prefix):
+	"""
+	:return: list with entries according to the given tree-shas. 
+		The result is encoded in a list
+		of n tuple|None per blob/commit, (n == len(tree_shas)), where 
+		* [0] == 20 byte sha
+		* [1] == mode as int
+		* [2] == path relative to working tree root
+		The entry tuple is None if the respective blob/commit did not 
+		exist in the given tree.
+	:param tree_shas: iterable of shas pointing to trees. All trees must 
+		be on the same level. A tree-sha may be None in which case None
+	:param path_prefix: a prefix to be added to the returned paths on this level, 
+		set it '' for the first iteration
+	:note: The ordering of the returned items will be partially lost"""
+	trees_data = list()
+	nt = len(tree_shas)
+	for tree_sha in tree_shas:
+		if tree_sha is None:
+			data = list()
+		else:
+			data = tree_entries_from_data(odb.stream(tree_sha).read())
+		# END handle muted trees
+		trees_data.append(data)
+	# END for each sha to get data for
+	
+	out = list()
+	out_append = out.append
+	
+	# find all matching entries and recursively process them together if the match
+	# is a tree. If the match is a non-tree item, put it into the result.
+	# Processed items will be set None
+	for ti, tree_data in enumerate(trees_data):
+		for ii, item in enumerate(tree_data):
+			if not item:
+				continue
+			# END skip already done items
+			entries = [ None for n in range(nt) ]
+			entries[ti] = item
+			sha, mode, name = item							# its faster to unpack
+			is_dir = S_ISDIR(mode)							# type mode bits
+			
+			# find this item in all other tree data items
+			# wrap around, but stop one before our current index, hence 
+			# ti+nt, not ti+1+nt
+			for tio in range(ti+1, ti+nt):
+				tio = tio % nt
+				entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
+			# END for each other item data
+			
+			# if we are a directory, enter recursion
+			if is_dir:
+				out.extend(traverse_trees_recursive(odb, [ei[0] for ei in entries if ei], path_prefix+name+'/'))
+			else:
+				out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
+			# END handle recursion
+			
+			# finally mark it done
+			tree_data[ii] = None
+		# END for each item
+		
+		# we are done with one tree, set all its data empty
+		del(tree_data[:])
+	# END for each tree_data chunk
+	return out
+	
+def traverse_tree_recursive(odb, tree_sha, path_prefix):
+	"""
+	:return: list of entries of the tree pointed to by tree_sha. An entry
+		has the following format:
+		* [0] 20 byte sha
+		* [1] mode as int
+		* [2] path relative to the repository
+	:param path_prefix: prefix to prepend to the front of all returned paths"""
+	entries = list()
+	data = tree_entries_from_data(odb.stream(tree_sha).read())
+	
+	# unpacking/packing is faster than accessing individual items
+	for sha, mode, name in data:
+		if S_ISDIR(mode):
+			entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
+		else:
+			entries.append((sha, mode, path_prefix+name))
+	# END for each item
+	
+	return entries
author	Sebastian Thiel <byronimo@gmail.com>	2010-06-22 21:23:47 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2010-06-22 21:23:47 +0200
commit	be97c4558992a437cde235aafc7ae2bd6df84ac8 (patch)
tree	3e44a7c38e356817ca81721725709d7374f95012 /lib/git/objects/fun.py
parent	778234d544b3f58dd415aaf10679d15b01a5281f (diff)
download	gitpython-be97c4558992a437cde235aafc7ae2bd6df84ac8.tar.gz