Removed plenty of code which went into git-python. This is just for completeness, gitdb doesn't need to be worked on anymoregitdbmerger

author: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:44:36 +0200
committer: Sebastian Thiel <byronimo@gmail.com> 2011-05-05 19:44:36 +0200
commit: e03093dd392b92f51b7d7cf66d7b1949b9f843e6 (patch)
tree: dbd2b85d8b201cb5f2848b3aaa11cffbc96031c2 /gitdb/object
parent: 6463c10db377573e695bc504a9451bdb6cbf61f5 (diff)
download: gitdbmerger.tar.gz
7 files changed, 1 insertions, 1290 deletions
diff --git a/gitdb/object/base.py b/gitdb/object/base.py
index ee76da1..e8158c9 100644
--- a/gitdb/object/base.py
+++ b/gitdb/object/base.py
@@ -3,176 +3,4 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from util import get_object_type_by_name
-from gitdb.util import (
-							hex_to_bin,
-							bin_to_hex,
-							dirname,
-							basename, 
-							LazyMixin, 
-							join_path_native, 
-							stream_copy
-						)
 
-from gitdb.typ import ObjectType
-	
-_assertion_msg_format = "Created object %r whose python type %r disagrees with the acutal git object type %r"
-
-__all__ = ("Object", "IndexObject")
-
-class Object(LazyMixin):
-	"""Implements an Object which may be Blobs, Trees, Commits and Tags"""
-	NULL_HEX_SHA = '0'*40
-	NULL_BIN_SHA = '\0'*20
-	
-	TYPES = (ObjectType.blob, ObjectType.tree, ObjectType.commit, ObjectType.tag)
-	__slots__ = ("odb", "binsha", "size" )
-	
-	type = None			# to be set by subclass
-	type_id = None		# to be set by subclass
-	
-	def __init__(self, odb, binsha):
-		"""Initialize an object by identifying it by its binary sha. 
-		All keyword arguments will be set on demand if None.
-		
-		:param odb: repository this object is located in
-			
-		:param binsha: 20 byte SHA1"""
-		super(Object,self).__init__()
-		self.odb = odb
-		self.binsha = binsha
-		assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % (binsha, len(binsha))
-
-	@classmethod
-	def new(cls, odb, id):
-		"""
-		:return: New Object instance of a type appropriate to the object type behind 
-			id. The id of the newly created object will be a binsha even though 
-			the input id may have been a Reference or Rev-Spec
-			
-		:param id: reference, rev-spec, or hexsha
-			
-		:note: This cannot be a __new__ method as it would always call __init__
-			with the input id which is not necessarily a binsha."""
-		return odb.rev_parse(str(id))
-		
-	@classmethod
-	def new_from_sha(cls, odb, sha1):
-		"""
-		:return: new object instance of a type appropriate to represent the given 
-			binary sha1
-		:param sha1: 20 byte binary sha1"""
-		if sha1 == cls.NULL_BIN_SHA:
-			# the NULL binsha is always the root commit
-			return get_object_type_by_name('commit')(odb, sha1)
-		#END handle special case
-		oinfo = odb.info(sha1)
-		inst = get_object_type_by_name(oinfo.type)(odb, oinfo.binsha)
-		inst.size = oinfo.size
-		return inst 
-	
-	def _set_cache_(self, attr):
-		"""Retrieve object information"""
-		if attr	 == "size":
-			oinfo = self.odb.info(self.binsha)
-			self.size = oinfo.size
-			# assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type)
-		else:
-			super(Object,self)._set_cache_(attr)
-		
-	def __eq__(self, other):
-		""":return: True if the objects have the same SHA1"""
-		return self.binsha == other.binsha
-		
-	def __ne__(self, other):
-		""":return: True if the objects do not have the same SHA1 """
-		return self.binsha != other.binsha
-		
-	def __hash__(self):
-		""":return: Hash of our id allowing objects to be used in dicts and sets"""
-		return hash(self.binsha)
-		
-	def __str__(self):
-		""":return: string of our SHA1 as understood by all git commands"""
-		return bin_to_hex(self.binsha)
-		
-	def __repr__(self):
-		""":return: string with pythonic representation of our object"""
-		return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha)
-
-	@property
-	def hexsha(self):
-		""":return: 40 byte hex version of our 20 byte binary sha"""
-		return bin_to_hex(self.binsha)
-
-	@property
-	def data_stream(self):
-		""" :return:  File Object compatible stream to the uncompressed raw data of the object
-		:note: returned streams must be read in order"""
-		return self.odb.stream(self.binsha)
-
-	def stream_data(self, ostream):
-		"""Writes our data directly to the given output stream
-		:param ostream: File object compatible stream object.
-		:return: self"""
-		istream = self.odb.stream(self.binsha)
-		stream_copy(istream, ostream)
-		return self
-		
-
-class IndexObject(Object):
-	"""Base for all objects that can be part of the index file , namely Tree, Blob and
-	SubModule objects"""
-	__slots__ = ("path", "mode")
-	
-	# for compatability with iterable lists
-	_id_attribute_ = 'path'
-	
-	def __init__(self, odb, binsha, mode=None, path=None):
-		"""Initialize a newly instanced IndexObject
-		:param odb: is the object database we are located in
-		:param binsha: 20 byte sha1
-		:param mode: is the stat compatible file mode as int, use the stat module
-			to evaluate the infomration
-		:param path:
-			is the path to the file in the file system, relative to the git repository root, i.e.
-			file.ext or folder/other.ext
-		:note:
-			Path may not be set of the index object has been created directly as it cannot
-			be retrieved without knowing the parent tree."""
-		super(IndexObject, self).__init__(odb, binsha)
-		if mode is not None:
-			self.mode = mode
-		if path is not None:
-			self.path = path
-	
-	def __hash__(self):
-		""":return:
-			Hash of our path as index items are uniquely identifyable by path, not 
-			by their data !"""
-		return hash(self.path)
-	
-	def _set_cache_(self, attr):
-		if attr in IndexObject.__slots__:
-			# they cannot be retrieved lateron ( not without searching for them )
-			raise AttributeError( "path and mode attributes must have been set during %s object creation" % type(self).__name__ )
-		else:
-			super(IndexObject, self)._set_cache_(attr)
-		# END hanlde slot attribute
-	
-	@property
-	def name(self):
-		""":return: Name portion of the path, effectively being the basename"""
-		return basename(self.path)
-		
-	@property
-	def abspath(self):
-		"""
-		:return:
-			Absolute path to this index object in the file system ( as opposed to the 
-			.path field which is a path relative to the git repository ).
-			
-			The returned path will be native to the system and contains '\' on windows. """
-		assert False, "Only works if repository is not bare - provide this check in an interface"
-		return join_path_native(dirname(self.odb.root_path()), self.path)
-		
diff --git a/gitdb/object/commit.py b/gitdb/object/commit.py
index 7f3d9e4..cb0c639 100644
--- a/gitdb/object/commit.py
+++ b/gitdb/object/commit.py
@@ -3,257 +3,4 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-from gitdb.typ import ObjectType
-from tree import Tree
-from cStringIO import StringIO
 
-import base
-from gitdb.util import (
-						hex_to_bin,
-						Actor,
-						)
-from util import (
-						Traversable,
-						Serializable,
-						altz_to_utctz_str,
-						parse_actor_and_date
-					)
-import sys
-
-__all__ = ('Commit', )
-
-class Commit(base.Object, Traversable, Serializable):
-	"""Wraps a git Commit object.
-	
-	This class will act lazily on some of its attributes and will query the 
-	value on demand only if it involves calling the git binary."""
-	
-	# ENVIRONMENT VARIABLES
-	# read when creating new commits
-	env_author_date = "GIT_AUTHOR_DATE"
-	env_committer_date = "GIT_COMMITTER_DATE"
-	
-	# CONFIGURATION KEYS
-	conf_encoding = 'i18n.commitencoding'
-	
-	# INVARIANTS
-	default_encoding = "UTF-8"
-	
-	
-	# object configuration 
-	type = ObjectType.commit
-	type_id = ObjectType.commit_id
-	
-	__slots__ = ("tree",
-				 "author", "authored_date", "author_tz_offset",
-				 "committer", "committed_date", "committer_tz_offset",
-				 "message", "parents", "encoding")
-	_id_attribute_ = "binsha"
-	
-	def __init__(self, odb, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None,
-				 committer=None, committed_date=None, committer_tz_offset=None, 
-				 message=None,  parents=None, encoding=None):
-		"""Instantiate a new Commit. All keyword arguments taking None as default will 
-		be implicitly set on first query. 
-		
-		:param binsha: 20 byte sha1
-		:param parents: tuple( Commit, ... ) 
-			is a tuple of commit ids or actual Commits
-		:param tree: Tree
-			Tree object
-		:param author: Actor
-			is the author string ( will be implicitly converted into an Actor object )
-		:param authored_date: int_seconds_since_epoch
-			is the authored DateTime - use time.gmtime() to convert it into a 
-			different format
-		:param author_tz_offset: int_seconds_west_of_utc
-			is the timezone that the authored_date is in
-		:param committer: Actor
-			is the committer string
-		:param committed_date: int_seconds_since_epoch
-			is the committed DateTime - use time.gmtime() to convert it into a 
-			different format
-		:param committer_tz_offset: int_seconds_west_of_utc
-			is the timezone that the authored_date is in
-		:param message: string
-			is the commit message
-		:param encoding: string
-			encoding of the message, defaults to UTF-8
-		:param parents:
-			List or tuple of Commit objects which are our parent(s) in the commit 
-			dependency graph
-		:return: git.Commit
-		
-		:note: Timezone information is in the same format and in the same sign 
-			as what time.altzone returns. The sign is inverted compared to git's 
-			UTC timezone."""
-		super(Commit,self).__init__(odb, binsha)
-		if tree is not None:
-			assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree)
-		if tree is not None:
-			self.tree = tree
-		if author is not None:
-			self.author = author
-		if authored_date is not None:
-			self.authored_date = authored_date
-		if author_tz_offset is not None:
-			self.author_tz_offset = author_tz_offset
-		if committer is not None:
-			self.committer = committer
-		if committed_date is not None:
-			self.committed_date = committed_date
-		if committer_tz_offset is not None:
-			self.committer_tz_offset = committer_tz_offset
-		if message is not None:
-			self.message = message
-		if parents is not None:
-			self.parents = parents
-		if encoding is not None:
-			self.encoding = encoding
-		
-	@classmethod
-	def _get_intermediate_items(cls, commit):
-		return commit.parents
-
-	def _set_cache_(self, attr):
-		if attr in Commit.__slots__:
-			# read the data in a chunk, its faster - then provide a file wrapper
-			binsha, typename, self.size, stream = self.odb.stream(self.binsha)
-			self._deserialize(StringIO(stream.read()))
-		else:
-			super(Commit, self)._set_cache_(attr)
-		# END handle attrs
-
-	@property
-	def summary(self):
-		""":return: First line of the commit message"""
-		return self.message.split('\n', 1)[0]
-		
-	@classmethod
-	def _iter_from_process_or_stream(cls, odb, proc_or_stream):
-		"""Parse out commit information into a list of Commit objects
-		We expect one-line per commit, and parse the actual commit information directly
-		from our lighting fast object database
-
-		:param proc: git-rev-list process instance - one sha per line
-		:return: iterator returning Commit objects"""
-		stream = proc_or_stream
-		if not hasattr(stream,'readline'):
-			stream = proc_or_stream.stdout
-			
-		readline = stream.readline
-		while True:
-			line = readline()
-			if not line:
-				break
-			hexsha = line.strip()
-			if len(hexsha) > 40:
-				# split additional information, as returned by bisect for instance
-				hexsha, rest = line.split(None, 1)
-			# END handle extra info
-			
-			assert len(hexsha) == 40, "Invalid line: %s" % hexsha
-			yield cls(odb, hex_to_bin(hexsha))
-		# END for each line in stream
-	
-	#{ Serializable Implementation
-	
-	def _serialize(self, stream):
-		write = stream.write
-		write("tree %s\n" % self.tree)
-		for p in self.parents:
-			write("parent %s\n" % p)
-			
-		a = self.author
-		aname = a.name
-		if isinstance(aname, unicode):
-			aname = aname.encode(self.encoding)
-		# END handle unicode in name
-		
-		c = self.committer
-		fmt = "%s %s <%s> %s %s\n"
-		write(fmt % ("author", aname, a.email, 
-						self.authored_date, 
-						altz_to_utctz_str(self.author_tz_offset)))
-			
-		# encode committer
-		aname = c.name
-		if isinstance(aname, unicode):
-			aname = aname.encode(self.encoding)
-		# END handle unicode in name
-		write(fmt % ("committer", aname, c.email, 
-						self.committed_date,
-						altz_to_utctz_str(self.committer_tz_offset)))
-		
-		if self.encoding != self.default_encoding:
-			write("encoding %s\n" % self.encoding)
-		
-		write("\n")
-		
-		# write plain bytes, be sure its encoded according to our encoding
-		if isinstance(self.message, unicode):
-			write(self.message.encode(self.encoding))
-		else:
-			write(self.message)
-		# END handle encoding
-		return self
-	
-	def _deserialize(self, stream):
-		""":param from_rev_list: if true, the stream format is coming from the rev-list command
-		Otherwise it is assumed to be a plain data stream from our object"""
-		readline = stream.readline
-		self.tree = Tree(self.odb, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '')
-
-		self.parents = list()
-		next_line = None
-		while True:
-			parent_line = readline()
-			if not parent_line.startswith('parent'):
-				next_line = parent_line
-				break
-			# END abort reading parents
-			self.parents.append(type(self)(self.odb, hex_to_bin(parent_line.split()[-1])))
-		# END for each parent line
-		self.parents = tuple(self.parents)
-		
-		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line)
-		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline())
-		
-		
-		# now we can have the encoding line, or an empty line followed by the optional
-		# message.
-		self.encoding = self.default_encoding
-		# read encoding or empty line to separate message
-		enc = readline()
-		enc = enc.strip()
-		if enc:
-			self.encoding = enc[enc.find(' ')+1:]
-			# now comes the message separator 
-			readline()
-		# END handle encoding
-		
-		# decode the authors name
-		try:
-			self.author.name = self.author.name.decode(self.encoding) 
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding)
-		# END handle author's encoding
-		
-		# decode committer name
-		try:
-			self.committer.name = self.committer.name.decode(self.encoding) 
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode committer name '%s' using encoding %s" % (self.committer.name, self.encoding)
-		# END handle author's encoding
-		
-		# a stream from our data simply gives us the plain message
-		# The end of our message stream is marked with a newline that we strip
-		self.message = stream.read()
-		try:
-			self.message = self.message.decode(self.encoding)
-		except UnicodeDecodeError:
-			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding)
-		# END exception handling 
-		return self
-		
-	#} END serializable implementation
diff --git a/gitdb/object/fun.py b/gitdb/object/fun.py
index 9b0a377..22016b2 100644
--- a/gitdb/object/fun.py
+++ b/gitdb/object/fun.py
@@ -1,199 +1,2 @@
 """Module with functions which are supposed to be as fast as possible"""
-from stat import S_ISDIR
 
-__all__ = ('tree_to_stream', 'tree_entries_from_data', 'traverse_trees_recursive',
-			'traverse_tree_recursive')
-
-
-				
-
-def tree_to_stream(entries, write):
-	"""Write the give list of entries into a stream using its write method
-	:param entries: **sorted** list of tuples with (binsha, mode, name)
-	:param write: write method which takes a data string"""
-	ord_zero = ord('0')
-	bit_mask = 7			# 3 bits set
-	
-	for binsha, mode, name in entries:
-		mode_str = ''
-		for i in xrange(6):
-			mode_str = chr(((mode >> (i*3)) & bit_mask) + ord_zero) + mode_str
-		# END for each 8 octal value
-		
-		# git slices away the first octal if its zero
-		if mode_str[0] == '0':
-			mode_str = mode_str[1:]
-		# END save a byte
-
-		# here it comes:  if the name is actually unicode, the replacement below
-		# will not work as the binsha is not part of the ascii unicode encoding - 
-		# hence we must convert to an utf8 string for it to work properly.
-		# According to my tests, this is exactly what git does, that is it just
-		# takes the input literally, which appears to be utf8 on linux.
-		if isinstance(name, unicode):
-			name = name.encode("utf8")
-		write("%s %s\0%s" % (mode_str, name, binsha)) 
-	# END for each item
-
-
-def tree_entries_from_data(data):
-	"""Reads the binary representation of a tree and returns tuples of Tree items
-	:param data: data block with tree data
-	:return: list(tuple(binsha, mode, tree_relative_path), ...)"""
-	ord_zero = ord('0')
-	len_data = len(data)
-	i = 0
-	out = list()
-	while i < len_data:
-		mode = 0
-		
-		# read mode
-		# Some git versions truncate the leading 0, some don't
-		# The type will be extracted from the mode later
-		while data[i] != ' ':
-			# move existing mode integer up one level being 3 bits
-			# and add the actual ordinal value of the character
-			mode = (mode << 3) + (ord(data[i]) - ord_zero)
-			i += 1
-		# END while reading mode
-		
-		# byte is space now, skip it
-		i += 1
-		
-		# parse name, it is NULL separated
-		
-		ns = i
-		while data[i] != '\0':
-			i += 1
-		# END while not reached NULL
-		
-		# default encoding for strings in git is utf8
-		# Only use the respective unicode object if the byte stream was encoded
-		name = data[ns:i]
-		name_enc = name.decode("utf-8")
-		if len(name) > len(name_enc):
-			name = name_enc
-		# END handle encoding
-		
-		# byte is NULL, get next 20
-		i += 1
-		sha = data[i:i+20]
-		i = i + 20
-		out.append((sha, mode, name))
-	# END for each byte in data stream
-	return out
-	
-	
-def _find_by_name(tree_data, name, is_dir, start_at):
-	"""return data entry matching the given name and tree mode
-	or None.
-	Before the item is returned, the respective data item is set 
-	None in the tree_data list to mark it done"""
-	try:
-		item = tree_data[start_at]
-		if item and  item[2] == name and S_ISDIR(item[1]) == is_dir:
-			tree_data[start_at] = None
-			return item
-	except IndexError:
-		pass
-	# END exception handling
-	for index, item in enumerate(tree_data):
-		if item and item[2] == name and S_ISDIR(item[1]) == is_dir:
-			tree_data[index] = None
-			return item
-		# END if item matches
-	# END for each item
-	return None
-
-def _to_full_path(item, path_prefix):
-	"""Rebuild entry with given path prefix"""
-	if not item:
-		return item
-	return (item[0], item[1], path_prefix+item[2])
-	
-def traverse_trees_recursive(odb, tree_shas, path_prefix):
-	"""
-	:return: list with entries according to the given binary tree-shas. 
-		The result is encoded in a list
-		of n tuple|None per blob/commit, (n == len(tree_shas)), where 
-		* [0] == 20 byte sha
-		* [1] == mode as int
-		* [2] == path relative to working tree root
-		The entry tuple is None if the respective blob/commit did not 
-		exist in the given tree.
-	:param tree_shas: iterable of shas pointing to trees. All trees must 
-		be on the same level. A tree-sha may be None in which case None
-	:param path_prefix: a prefix to be added to the returned paths on this level, 
-		set it '' for the first iteration
-	:note: The ordering of the returned items will be partially lost"""
-	trees_data = list()
-	nt = len(tree_shas)
-	for tree_sha in tree_shas:
-		if tree_sha is None:
-			data = list()
-		else:
-			data = tree_entries_from_data(odb.stream(tree_sha).read())
-		# END handle muted trees
-		trees_data.append(data)
-	# END for each sha to get data for
-	
-	out = list()
-	out_append = out.append
-	
-	# find all matching entries and recursively process them together if the match
-	# is a tree. If the match is a non-tree item, put it into the result.
-	# Processed items will be set None
-	for ti, tree_data in enumerate(trees_data):
-		for ii, item in enumerate(tree_data):
-			if not item:
-				continue
-			# END skip already done items
-			entries = [ None for n in range(nt) ]
-			entries[ti] = item
-			sha, mode, name = item							# its faster to unpack
-			is_dir = S_ISDIR(mode)							# type mode bits
-			
-			# find this item in all other tree data items
-			# wrap around, but stop one before our current index, hence 
-			# ti+nt, not ti+1+nt
-			for tio in range(ti+1, ti+nt):
-				tio = tio % nt
-				entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii)
-			# END for each other item data
-			
-			# if we are a directory, enter recursion
-			if is_dir:
-				out.extend(traverse_trees_recursive(odb, [((ei and ei[0]) or None) for ei in entries], path_prefix+name+'/'))
-			else:
-				out_append(tuple(_to_full_path(e, path_prefix) for e in entries))
-			# END handle recursion
-			
-			# finally mark it done
-			tree_data[ii] = None
-		# END for each item
-		
-		# we are done with one tree, set all its data empty
-		del(tree_data[:])
-	# END for each tree_data chunk
-	return out
-	
-def traverse_tree_recursive(odb, tree_sha, path_prefix):
-	"""
-	:return: list of entries of the tree pointed to by the binary tree_sha. An entry
-		has the following format:
-		* [0] 20 byte sha
-		* [1] mode as int
-		* [2] path relative to the repository
-	:param path_prefix: prefix to prepend to the front of all returned paths"""
-	entries = list()
-	data = tree_entries_from_data(odb.stream(tree_sha).read())
-	
-	# unpacking/packing is faster than accessing individual items
-	for sha, mode, name in data:
-		if S_ISDIR(mode):
-			entries.extend(traverse_tree_recursive(odb, sha, path_prefix+name+'/'))
-		else:
-			entries.append((sha, mode, path_prefix+name))
-	# END for each item
-	
-	return entries
diff --git a/gitdb/object/submodule.py b/gitdb/object/submodule.py
index 77669b3..33cb649 100644
--- a/gitdb/object/submodule.py
+++ b/gitdb/object/submodule.py
@@ -1,12 +1,6 @@
 from base import IndexObject
 
-class Submodule(IndexObject):
-	"""Dummy type representing submodules. At some point an implemenation might be add
-	( it currently is located in GitPython )"""
 	
-	# this is a bogus type for base class compatability
-	type = 'submodule'
-	# this type doesn't really have a type id
-	type_id = 0
+	
 	
 
diff --git a/gitdb/object/tag.py b/gitdb/object/tag.py
index ce702c7..c09daab 100644
--- a/gitdb/object/tag.py
+++ b/gitdb/object/tag.py
@@ -4,76 +4,4 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """ Module containing all object based types. """
-import base
-from gitdb.util import hex_to_bin
-from util import (
-					get_object_type_by_name,
-					parse_actor_and_date
-				)
-from gitdb.typ import ObjectType
-
-__all__ = ("TagObject", )
-
-class TagObject(base.Object):
-	"""Non-Lightweight tag carrying additional information about an object we are pointing to."""
-	type = ObjectType.tag
-	type_id = ObjectType.tag_id
-	
-	__slots__ = ( "object", "tag", "tagger", "tagged_date", "tagger_tz_offset", "message" )
-		
-	def __init__(self, odb, binsha, object=None, tag=None, 
-				tagger=None, tagged_date=None, tagger_tz_offset=None, message=None):
-		"""Initialize a tag object with additional data
-		
-		:param odb: repository this object is located in
-		:param binsha: 20 byte SHA1
-		:param object: Object instance of object we are pointing to
-		:param tag: name of this tag
-		:param tagger: Actor identifying the tagger
-		:param tagged_date: int_seconds_since_epoch
-			is the DateTime of the tag creation - use time.gmtime to convert 
-			it into a different format
-		:param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the 
-			authored_date is in, in a format similar to time.altzone"""
-		super(TagObject, self).__init__(odb, binsha )
-		if object is not None:
-			self.object = object
-		if tag is not None:
-			self.tag = tag
-		if tagger is not None:
-			self.tagger = tagger
-		if tagged_date is not None:
-			self.tagged_date = tagged_date
-		if tagger_tz_offset is not None:
-			self.tagger_tz_offset = tagger_tz_offset
-		if message is not None:
-			self.message = message
-		
-	def _set_cache_(self, attr):
-		"""Cache all our attributes at once"""
-		if attr in TagObject.__slots__:
-			ostream = self.odb.stream(self.binsha)
-			lines = ostream.read().splitlines()
-			
-			obj, hexsha = lines[0].split(" ")		# object <hexsha>
-			type_token, type_name = lines[1].split(" ") # type <type_name>
-			self.object = get_object_type_by_name(type_name)(self.odb, hex_to_bin(hexsha))
-			
-			self.tag = lines[2][4:]	 # tag <tag name>
-			
-			tagger_info = lines[3][7:]# tagger <actor> <date>
-			self.tagger, self.tagged_date, self.tagger_tz_offset = parse_actor_and_date(tagger_info)
-			
-			# line 4 empty - it could mark the beginning of the next header
-			# in case there really is no message, it would not exist. Otherwise 
-			# a newline separates header from message
-			if len(lines) > 5:
-				self.message = "\n".join(lines[5:])
-			else:
-				self.message = ''
-		# END check our attributes
-		else:
-			super(TagObject, self)._set_cache_(attr)
-		
-		
 
diff --git a/gitdb/object/tree.py b/gitdb/object/tree.py
index 8dabd6e..110fa7d 100644
--- a/gitdb/object/tree.py
+++ b/gitdb/object/tree.py
@@ -3,285 +3,4 @@
 #
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
-import util
-from gitdb.typ import ObjectType
-from base import IndexObject
-from blob import Blob
-from submodule import Submodule
 
-from fun import (
-					tree_entries_from_data, 
-					tree_to_stream
-				 )
-
-from gitdb.util import (
-						to_bin_sha,
-						join_path
-						)
-
-__all__ = ("TreeModifier", "Tree")
-
-class TreeModifier(object):
-	"""A utility class providing methods to alter the underlying cache in a list-like fashion.
-	
-	Once all adjustments are complete, the _cache, which really is a refernce to 
-	the cache of a tree, will be sorted. Assuring it will be in a serializable state"""
-	__slots__ = '_cache'
-	
-	def __init__(self, cache):
-		self._cache = cache
-	
-	def _index_by_name(self, name):
-		""":return: index of an item with name, or -1 if not found"""
-		for i, t in enumerate(self._cache):
-			if t[2] == name:
-				return i
-			# END found item
-		# END for each item in cache
-		return -1
-	
-	#{ Interface 
-	def set_done(self):
-		"""Call this method once you are done modifying the tree information.
-		It may be called several times, but be aware that each call will cause 
-		a sort operation
-		:return self:"""
-		self._cache.sort(key=lambda t: t[2])	# sort by name
-		return self
-	#} END interface
-	
-	#{ Mutators
-	def add(self, sha, mode, name, force=False):
-		"""Add the given item to the tree. If an item with the given name already
-		exists, nothing will be done, but a ValueError will be raised if the 
-		sha and mode of the existing item do not match the one you add, unless 
-		force is True
-		
-		:param sha: The 20 or 40 byte sha of the item to add
-		:param mode: int representing the stat compatible mode of the item
-		:param force: If True, an item with your name and information will overwrite
-			any existing item with the same name, no matter which information it has
-		:return: self"""
-		if '/' in name:
-			raise ValueError("Name must not contain '/' characters")
-		if (mode >> 12) not in Tree._map_id_to_type:
-			raise ValueError("Invalid object type according to mode %o" % mode)
-			
-		sha = to_bin_sha(sha)
-		index = self._index_by_name(name)
-		item = (sha, mode, name)
-		if index == -1:
-			self._cache.append(item)
-		else:
-			if force:
-				self._cache[index] = item
-			else:
-				ex_item = self._cache[index]
-				if ex_item[0] != sha or ex_item[1] != mode:
-					raise ValueError("Item %r existed with different properties" % name)
-				# END handle mismatch
-			# END handle force
-		# END handle name exists
-		return self
-		
-	def add_unchecked(self, binsha, mode, name):
-		"""Add the given item to the tree, its correctness is assumed, which 
-		puts the caller into responsibility to assure the input is correct. 
-		For more information on the parameters, see ``add``
-		:param binsha: 20 byte binary sha"""
-		self._cache.append((binsha, mode, name))
-		
-	def __delitem__(self, name):
-		"""Deletes an item with the given name if it exists"""
-		index = self._index_by_name(name)
-		if index > -1:
-			del(self._cache[index])
-		
-	#} END mutators
-
-
-class Tree(IndexObject, util.Traversable, util.Serializable):
-	"""Tree objects represent an ordered list of Blobs and other Trees.
-	
-	``Tree as a list``::
-		
-		Access a specific blob using the  
-		tree['filename'] notation.
-		
-		You may as well access by index
-		blob = tree[0]
-	"""
-	
-	type = ObjectType.tree
-	type_id = ObjectType.tree_id
-	
-	__slots__ = "_cache"
-	
-	# actual integer ids for comparison 
-	commit_id = 016		# equals stat.S_IFDIR | stat.S_IFLNK - a directory link
-	blob_id = 010
-	symlink_id = 012
-	tree_id = 004
-	
-	#{ Configuration
-	
-	# override in subclass if you would like your own types to be instantiated instead
-	_map_id_to_type = {
-						commit_id : Submodule, 
-						blob_id : Blob, 
-						symlink_id : Blob
-						# tree id added once Tree is defined
-						}
-	
-	#} end configuration
-	
-	
-	def __init__(self, repo, binsha, mode=tree_id<<12, path=None):
-		super(Tree, self).__init__(repo, binsha, mode, path)
-
-	@classmethod
-	def _get_intermediate_items(cls, index_object):
-		if index_object.type == "tree":
-			return tuple(index_object._iter_convert_to_object(index_object._cache))
-		return tuple()
-
-	def _set_cache_(self, attr):
-		if attr == "_cache":
-			# Set the data when we need it
-			ostream = self.odb.stream(self.binsha)
-			self._cache = tree_entries_from_data(ostream.read())
-		else:
-			super(Tree, self)._set_cache_(attr)
-		# END handle attribute 
-
-	def _iter_convert_to_object(self, iterable):
-		"""Iterable yields tuples of (binsha, mode, name), which will be converted
-		to the respective object representation"""
-		for binsha, mode, name in iterable:
-			path = join_path(self.path, name)
-			try:
-				yield self._map_id_to_type[mode >> 12](self.repo, binsha, mode, path)
-			except KeyError:
-				raise TypeError("Unknown mode %o found in tree data for path '%s'" % (mode, path))
-		# END for each item 
-
-	def __div__(self, file):
-		"""Find the named object in this tree's contents
-		:return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule``
-		
-		:raise KeyError: if given file or tree does not exist in tree"""
-		msg = "Blob or Tree named %r not found"
-		if '/' in file:
-			tree = self
-			item = self
-			tokens = file.split('/')
-			for i,token in enumerate(tokens):
-				item = tree[token]
-				if item.type == 'tree':
-					tree = item
-				else:
-					# safety assertion - blobs are at the end of the path
-					if i != len(tokens)-1:
-						raise KeyError(msg % file)
-					return item
-				# END handle item type
-			# END for each token of split path
-			if item == self:
-				raise KeyError(msg % file)
-			return item
-		else:
-			for info in self._cache:
-				if info[2] == file:		# [2] == name
-					return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
-			# END for each obj
-			raise KeyError( msg % file )
-		# END handle long paths
-
-
-	@property
-	def trees(self):
-		""":return: list(Tree, ...) list of trees directly below this tree"""
-		return [ i for i in self if i.type == "tree" ]
-		
-	@property
-	def blobs(self):
-		""":return: list(Blob, ...) list of blobs directly below this tree"""
-		return [ i for i in self if i.type == "blob" ]
-
-	@property
-	def cache(self):
-		"""
-		:return: An object allowing to modify the internal cache. This can be used
-			to change the tree's contents. When done, make sure you call ``set_done``
-			on the tree modifier, or serialization behaviour will be incorrect.
-			See the ``TreeModifier`` for more information on how to alter the cache"""
-		return TreeModifier(self._cache)
-
-	def traverse( self, predicate = lambda i,d: True,
-						   prune = lambda i,d: False, depth = -1, branch_first=True,
-						   visit_once = False, ignore_self=1 ):
-		"""For documentation, see util.Traversable.traverse
-		Trees are set to visit_once = False to gain more performance in the traversal"""
-		return super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self)
-
-	# List protocol
-	def __getslice__(self, i, j):
-		return list(self._iter_convert_to_object(self._cache[i:j]))
-		
-	def __iter__(self):
-		return self._iter_convert_to_object(self._cache)
-		
-	def __len__(self):
-		return len(self._cache)
-		
-	def __getitem__(self, item):
-		if isinstance(item, int):
-			info = self._cache[item]
-			return self._map_id_to_type[info[1] >> 12](self.repo, info[0], info[1], join_path(self.path, info[2]))
-		
-		if isinstance(item, basestring):
-			# compatability
-			return self.__div__(item)
-		# END index is basestring 
-		
-		raise TypeError( "Invalid index type: %r" % item )
-		
-		
-	def __contains__(self, item):
-		if isinstance(item, IndexObject):
-			for info in self._cache:
-				if item.binsha == info[0]:
-					return True
-				# END compare sha
-			# END for each entry
-		# END handle item is index object
-		# compatability
-		
-		# treat item as repo-relative path
-		path = self.path
-		for info in self._cache:
-			if item == join_path(path, info[2]):
-				return True
-		# END for each item
-		return False
-	
-	def __reversed__(self):
-		return reversed(self._iter_convert_to_object(self._cache))
-		
-	def _serialize(self, stream):
-		"""Serialize this tree into the stream. Please note that we will assume 
-		our tree data to be in a sorted state. If this is not the case, serialization
-		will not generate a correct tree representation as these are assumed to be sorted
-		by algorithms"""
-		tree_to_stream(self._cache, stream.write)
-		return self
-		
-	def _deserialize(self, stream):
-		self._cache = tree_entries_from_data(stream.read())
-		return self
-		
-		
-# END tree
-
-# finalize map definition
-Tree._map_id_to_type[Tree.tree_id] = Tree
diff --git a/gitdb/object/util.py b/gitdb/object/util.py
index e63cb30..5f61495 100644
--- a/gitdb/object/util.py
+++ b/gitdb/object/util.py
@@ -4,312 +4,4 @@
 # This module is part of GitPython and is released under
 # the BSD License: http://www.opensource.org/licenses/bsd-license.php
 """Module for general utility functions"""
-from gitdb.util import (
-						IterableList, 
-						Actor
-					)
 
-import re
-from collections import deque as Deque
-
-from string import digits
-import time
-import os
-
-__all__ = ('get_object_type_by_name', 'parse_date', 'parse_actor_and_date', 
-			'ProcessStreamAdapter', 'Traversable', 'altz_to_utctz_str', 'utctz_to_altz', 
-			'verify_utctz', 'Actor')
-
-#{ Functions
-
-def mode_str_to_int(modestr):
-	"""
-	:param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used
-	:return:
-		String identifying a mode compatible to the mode methods ids of the 
-		stat module regarding the rwx permissions for user, group and other, 
-		special flags and file system flags, i.e. whether it is a symlink
-		for example."""
-	mode = 0
-	for iteration, char in enumerate(reversed(modestr[-6:])):
-		mode += int(char) << iteration*3
-	# END for each char
-	return mode
-
-def get_object_type_by_name(object_type_name):
-	"""
-	:return: type suitable to handle the given object type name.
-		Use the type to create new instances.
-		
-	:param object_type_name: Member of TYPES
-		
-	:raise ValueError: In case object_type_name is unknown"""
-	if object_type_name == "commit":
-		import commit
-		return commit.Commit
-	elif object_type_name == "tag":
-		import tag
-		return tag.TagObject
-	elif object_type_name == "blob":
-		import blob
-		return blob.Blob
-	elif object_type_name == "tree":
-		import tree
-		return tree.Tree
-	else:
-		raise ValueError("Cannot handle unknown object type: %s" % object_type_name)
-		
-def utctz_to_altz(utctz):
-	"""we convert utctz to the timezone in seconds, it is the format time.altzone
-	returns. Git stores it as UTC timezone which has the opposite sign as well, 
-	which explains the -1 * ( that was made explicit here )
-	:param utctz: git utc timezone string, i.e. +0200"""
-	return -1 * int(float(utctz)/100*3600)
-	
-def altz_to_utctz_str(altz):
-	"""As above, but inverses the operation, returning a string that can be used
-	in commit objects"""
-	utci = -1 * int((altz / 3600)*100)
-	utcs = str(abs(utci))
-	utcs = "0"*(4-len(utcs)) + utcs
-	prefix = (utci < 0 and '-') or '+'
-	return prefix + utcs
-	
-
-def verify_utctz(offset):
-	""":raise ValueError: if offset is incorrect
-	:return: offset"""
-	fmt_exc = ValueError("Invalid timezone offset format: %s" % offset)
-	if len(offset) != 5:
-		raise fmt_exc
-	if offset[0] not in "+-":
-		raise fmt_exc
-	if	offset[1] not in digits or \
-		offset[2] not in digits or \
-		offset[3] not in digits or \
-		offset[4] not in digits:
-		raise fmt_exc
-	# END for each char
-	return offset
-
-def parse_date(string_date):
-	"""
-	Parse the given date as one of the following
-	
-		* Git internal format: timestamp offset
-		* RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. 
-		* ISO 8601 2005-04-07T22:13:13
-			The T can be a space as well
-		 
-	:return: Tuple(int(timestamp), int(offset)), both in seconds since epoch
-	:raise ValueError: If the format could not be understood
-	:note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY"""
-	# git time
-	try:
-		if string_date.count(' ') == 1 and string_date.rfind(':') == -1:
-			timestamp, offset = string_date.split()
-			timestamp = int(timestamp)
-			return timestamp, utctz_to_altz(verify_utctz(offset))
-		else:
-			offset = "+0000"					# local time by default
-			if string_date[-5] in '-+':
-				offset = verify_utctz(string_date[-5:])
-				string_date = string_date[:-6]	# skip space as well
-			# END split timezone info
-			
-			# now figure out the date and time portion - split time
-			date_formats = list()
-			splitter = -1
-			if ',' in string_date:
-				date_formats.append("%a, %d %b %Y")
-				splitter = string_date.rfind(' ')
-			else:
-				# iso plus additional
-				date_formats.append("%Y-%m-%d")
-				date_formats.append("%Y.%m.%d")
-				date_formats.append("%m/%d/%Y")
-				date_formats.append("%d.%m.%Y")
-				
-				splitter = string_date.rfind('T')
-				if splitter == -1:
-					splitter = string_date.rfind(' ')
-				# END handle 'T' and ' '
-			# END handle rfc or iso 
-			
-			assert splitter > -1
-			
-			# split date and time
-			time_part = string_date[splitter+1:]	# skip space
-			date_part = string_date[:splitter]
-			
-			# parse time
-			tstruct = time.strptime(time_part, "%H:%M:%S")
-			
-			for fmt in date_formats:
-				try:
-					dtstruct = time.strptime(date_part, fmt)
-					fstruct = time.struct_time((dtstruct.tm_year, dtstruct.tm_mon, dtstruct.tm_mday, 
-												tstruct.tm_hour, tstruct.tm_min, tstruct.tm_sec,
-												dtstruct.tm_wday, dtstruct.tm_yday, tstruct.tm_isdst))
-					return int(time.mktime(fstruct)), utctz_to_altz(offset)
-				except ValueError:
-					continue
-				# END exception handling
-			# END for each fmt
-			
-			# still here ? fail
-			raise ValueError("no format matched")
-		# END handle format
-	except Exception:
-		raise ValueError("Unsupported date format: %s" % string_date)  
-	# END handle exceptions
-
-	
-# precompiled regex
-_re_actor_epoch = re.compile(r'^.+? (.*) (\d+) ([+-]\d+).*$')
-
-def parse_actor_and_date(line):
-	"""Parse out the actor (author or committer) info from a line like::
-	
-		author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700
-	
-	:return: [Actor, int_seconds_since_epoch, int_timezone_offset]"""
-	m = _re_actor_epoch.search(line)
-	actor, epoch, offset = m.groups()
-	return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
-	
-
-#} END functions
-
-
-#{ Classes 
-	
-class ProcessStreamAdapter(object):
-	"""Class wireing all calls to the contained Process instance.
-	
-	Use this type to hide the underlying process to provide access only to a specified 
-	stream. The process is usually wrapped into an AutoInterrupt class to kill 
-	it if the instance goes out of scope."""
-	__slots__ = ("_proc", "_stream")
-	def __init__(self, process, stream_name):
-		self._proc = process
-		self._stream = getattr(process, stream_name)
-	
-	def __getattr__(self, attr):
-		return getattr(self._stream, attr)
-		
-		
-class Traversable(object):
-	"""Simple interface to perforam depth-first or breadth-first traversals 
-	into one direction.
-	Subclasses only need to implement one function.
-	Instances of the Subclass must be hashable"""
-	__slots__ = tuple()
-	
-	@classmethod
-	def _get_intermediate_items(cls, item):
-		"""
-		Returns:
-			List of items connected to the given item.
-			Must be implemented in subclass
-		"""
-		raise NotImplementedError("To be implemented in subclass")
-			
-	def list_traverse(self, *args, **kwargs):
-		"""
-		:return: IterableList with the results of the traversal as produced by
-			traverse()"""
-		out = IterableList(self._id_attribute_)
-		out.extend(self.traverse(*args, **kwargs))
-		return out
-	
-	def traverse( self, predicate = lambda i,d: True,
-						   prune = lambda i,d: False, depth = -1, branch_first=True,
-						   visit_once = True, ignore_self=1, as_edge = False ):
-		""":return: iterator yieling of items found when traversing self
-			
-		:param predicate: f(i,d) returns False if item i at depth d should not be included in the result
-			
-		:param prune: 
-			f(i,d) return True if the search should stop at item i at depth d.
-			Item i will not be returned.
-			
-		:param depth:
-			define at which level the iteration should not go deeper
-			if -1, there is no limit
-			if 0, you would effectively only get self, the root of the iteration
-			i.e. if 1, you would only get the first level of predessessors/successors
-			
-		:param branch_first:
-			if True, items will be returned branch first, otherwise depth first
-			
-		:param visit_once:
-			if True, items will only be returned once, although they might be encountered
-			several times. Loops are prevented that way.
-		
-		:param ignore_self:
-			if True, self will be ignored and automatically pruned from
-			the result. Otherwise it will be the first item to be returned.
-			If as_edge is True, the source of the first edge is None
-			
-		:param as_edge:
-			if True, return a pair of items, first being the source, second the 
-			destinatination, i.e. tuple(src, dest) with the edge spanning from 
-			source to destination"""
-		visited = set()
-		stack = Deque()
-		stack.append( ( 0 ,self, None ) )		# self is always depth level 0
-	
-		def addToStack( stack, item, branch_first, depth ):
-			lst = self._get_intermediate_items( item )
-			if not lst:
-				return
-			if branch_first:
-				stack.extendleft( ( depth , i, item ) for i in lst )
-			else:
-				reviter = ( ( depth , lst[i], item ) for i in range( len( lst )-1,-1,-1) )
-				stack.extend( reviter )
-		# END addToStack local method
-	
-		while stack:
-			d, item, src = stack.pop()			# depth of item, item, item_source
-			
-			if visit_once and item in visited:
-				continue
-				
-			if visit_once:
-				visited.add(item)
-			
-			rval = ( as_edge and (src, item) ) or item
-			if prune( rval, d ):
-				continue
-	
-			skipStartItem = ignore_self and ( item is self )
-			if not skipStartItem and predicate( rval, d ):
-				yield rval
-	
-			# only continue to next level if this is appropriate !
-			nd = d + 1
-			if depth > -1 and nd > depth:
-				continue
-	
-			addToStack( stack, item, branch_first, nd )
-		# END for each item on work stack
-		
-
-class Serializable(object):
-	"""Defines methods to serialize and deserialize objects from and into a data stream"""
-	__slots__ = tuple()
-	
-	def _serialize(self, stream):
-		"""Serialize the data of this object into the given data stream
-		:note: a serialized object would ``_deserialize`` into the same objet
-		:param stream: a file-like object
-		:return: self"""
-		raise NotImplementedError("To be implemented in subclass")
-		
-	def _deserialize(self, stream):
-		"""Deserialize all information regarding this object from the stream
-		:param stream: a file-like object
-		:return: self"""
-		raise NotImplementedError("To be implemented in subclass")
author	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:44:36 +0200
committer	Sebastian Thiel <byronimo@gmail.com>	2011-05-05 19:44:36 +0200
commit	e03093dd392b92f51b7d7cf66d7b1949b9f843e6 (patch)
tree	dbd2b85d8b201cb5f2848b3aaa11cffbc96031c2 /gitdb/object
parent	6463c10db377573e695bc504a9451bdb6cbf61f5 (diff)
download	gitdbmerger.tar.gz