diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2010-11-21 19:17:48 +0100 | 
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2010-11-21 19:17:48 +0100 | 
| commit | fafe8a77db75083de3e7af92185ecdb7f2d542d3 (patch) | |
| tree | 0b2d17ca62fc1f986bbaefe8ca6ea641e3fa2d8d /objects/commit.py | |
| parent | 0b813371f5a8af95152cae109d28c7c97bfaf79f (diff) | |
| download | gitpython-fafe8a77db75083de3e7af92185ecdb7f2d542d3.tar.gz | |
moved all contents, incl. submodule gitdb, up to the root directory
Diffstat (limited to 'objects/commit.py')
| -rw-r--r-- | objects/commit.py | 472 | 
1 files changed, 472 insertions, 0 deletions
| diff --git a/objects/commit.py b/objects/commit.py new file mode 100644 index 00000000..a2b6c554 --- /dev/null +++ b/objects/commit.py @@ -0,0 +1,472 @@ +# commit.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +from git.util import ( +							Iterable, +							Stats, +						) +from git.diff import Diffable +from tree import Tree +from gitdb import IStream +from cStringIO import StringIO + +import base +from gitdb.util import ( +						hex_to_bin +						) +from util import ( +						Traversable, +						Serializable, +						get_user_id, +						parse_date, +						Actor, +						altz_to_utctz_str, +						parse_actor_and_date +					) +from time import ( +					time,  +					altzone +				) +import os +import sys + +__all__ = ('Commit', ) + +class Commit(base.Object, Iterable, Diffable, Traversable, Serializable): +	"""Wraps a git Commit object. +	 +	This class will act lazily on some of its attributes and will query the  +	value on demand only if it involves calling the git binary.""" +	 +	# ENVIRONMENT VARIABLES +	# read when creating new commits +	env_author_name = "GIT_AUTHOR_NAME" +	env_author_email = "GIT_AUTHOR_EMAIL" +	env_author_date = "GIT_AUTHOR_DATE" +	env_committer_name = "GIT_COMMITTER_NAME" +	env_committer_email = "GIT_COMMITTER_EMAIL" +	env_committer_date = "GIT_COMMITTER_DATE" +	env_email = "EMAIL" +	 +	# CONFIGURATION KEYS +	conf_name = 'name' +	conf_email = 'email' +	conf_encoding = 'i18n.commitencoding' +	 +	# INVARIANTS +	default_encoding = "UTF-8" +	 +	 +	# object configuration  +	type = "commit" +	__slots__ = ("tree", +				 "author", "authored_date", "author_tz_offset", +				 "committer", "committed_date", "committer_tz_offset", +				 "message", "parents", "encoding") +	_id_attribute_ = "binsha" +	 +	def __init__(self, repo, binsha, tree=None, author=None, authored_date=None, author_tz_offset=None, +				 committer=None, committed_date=None, committer_tz_offset=None,  +				 message=None,  parents=None, encoding=None): +		"""Instantiate a new Commit. All keyword arguments taking None as default will  +		be implicitly set on first query.  +		 +		:param binsha: 20 byte sha1 +		:param parents: tuple( Commit, ... )  +			is a tuple of commit ids or actual Commits +		:param tree: Tree +			Tree object +		:param author: Actor +			is the author string ( will be implicitly converted into an Actor object ) +		:param authored_date: int_seconds_since_epoch +			is the authored DateTime - use time.gmtime() to convert it into a  +			different format +		:param author_tz_offset: int_seconds_west_of_utc +			is the timezone that the authored_date is in +		:param committer: Actor +			is the committer string +		:param committed_date: int_seconds_since_epoch +			is the committed DateTime - use time.gmtime() to convert it into a  +			different format +		:param committer_tz_offset: int_seconds_west_of_utc +			is the timezone that the authored_date is in +		:param message: string +			is the commit message +		:param encoding: string +			encoding of the message, defaults to UTF-8 +		:param parents: +			List or tuple of Commit objects which are our parent(s) in the commit  +			dependency graph +		:return: git.Commit +		 +		:note: Timezone information is in the same format and in the same sign  +			as what time.altzone returns. The sign is inverted compared to git's  +			UTC timezone.""" +		super(Commit,self).__init__(repo, binsha) +		if tree is not None: +			assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) +		if tree is not None: +			self.tree = tree +		if author is not None: +			self.author = author +		if authored_date is not None: +			self.authored_date = authored_date +		if author_tz_offset is not None: +			self.author_tz_offset = author_tz_offset +		if committer is not None: +			self.committer = committer +		if committed_date is not None: +			self.committed_date = committed_date +		if committer_tz_offset is not None: +			self.committer_tz_offset = committer_tz_offset +		if message is not None: +			self.message = message +		if parents is not None: +			self.parents = parents +		if encoding is not None: +			self.encoding = encoding +		 +	@classmethod +	def _get_intermediate_items(cls, commit): +		return commit.parents + +	def _set_cache_(self, attr): +		if attr in Commit.__slots__: +			# read the data in a chunk, its faster - then provide a file wrapper +			binsha, typename, self.size, stream = self.repo.odb.stream(self.binsha) +			self._deserialize(StringIO(stream.read())) +		else: +			super(Commit, self)._set_cache_(attr) +		# END handle attrs + +	@property +	def summary(self): +		""":return: First line of the commit message""" +		return self.message.split('\n', 1)[0] +		 +	def count(self, paths='', **kwargs): +		"""Count the number of commits reachable from this commit + +		:param paths: +			is an optinal path or a list of paths restricting the return value  +			to commits actually containing the paths + +		:param kwargs: +			Additional options to be passed to git-rev-list. They must not alter +			the ouput style of the command, or parsing will yield incorrect results +		:return: int defining the number of reachable commits""" +		# yes, it makes a difference whether empty paths are given or not in our case +		# as the empty paths version will ignore merge commits for some reason. +		if paths: +			return len(self.repo.git.rev_list(self.hexsha, '--', paths, **kwargs).splitlines()) +		else: +			return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) +		 + +	@property +	def name_rev(self): +		""" +		:return: +			String describing the commits hex sha based on the closest Reference. +			Mostly useful for UI purposes""" +		return self.repo.git.name_rev(self) + +	@classmethod +	def iter_items(cls, repo, rev, paths='', **kwargs): +		"""Find all commits matching the given criteria. + +		:param repo: is the Repo +		:param rev: revision specifier, see git-rev-parse for viable options +		:param paths: +			is an optinal path or list of paths, if set only Commits that include the path  +			or paths will be considered +		:param kwargs: +			optional keyword arguments to git rev-list where +			``max_count`` is the maximum number of commits to fetch +			``skip`` is the number of commits to skip +			``since`` all commits since i.e. '1970-01-01' +		:return: iterator yielding Commit items""" +		if 'pretty' in kwargs: +			raise ValueError("--pretty cannot be used as parsing expects single sha's only") +		# END handle pretty +		args = list() +		if paths: +			args.extend(('--', paths)) +		# END if paths + +		proc = repo.git.rev_list(rev, args, as_process=True, **kwargs) +		return cls._iter_from_process_or_stream(repo, proc) +		 +	def iter_parents(self, paths='', **kwargs): +		"""Iterate _all_ parents of this commit. +		 +		:param paths: +			Optional path or list of paths limiting the Commits to those that  +			contain at least one of the paths +		:param kwargs: All arguments allowed by git-rev-list +		:return: Iterator yielding Commit objects which are parents of self """ +		# skip ourselves +		skip = kwargs.get("skip", 1) +		if skip == 0:	# skip ourselves  +			skip = 1 +		kwargs['skip'] = skip +		 +		return self.iter_items(self.repo, self, paths, **kwargs) + +	@property +	def stats(self): +		"""Create a git stat from changes between this commit and its first parent  +		or from all changes done if this is the very first commit. +		 +		:return: git.Stats""" +		if not self.parents: +			text = self.repo.git.diff_tree(self.hexsha, '--', numstat=True, root=True) +			text2 = "" +			for line in text.splitlines()[1:]: +				(insertions, deletions, filename) = line.split("\t") +				text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) +			text = text2 +		else: +			text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, '--', numstat=True) +		return Stats._list_from_string(self.repo, text) + +	@classmethod +	def _iter_from_process_or_stream(cls, repo, proc_or_stream): +		"""Parse out commit information into a list of Commit objects +		We expect one-line per commit, and parse the actual commit information directly +		from our lighting fast object database + +		:param proc: git-rev-list process instance - one sha per line +		:return: iterator returning Commit objects""" +		stream = proc_or_stream +		if not hasattr(stream,'readline'): +			stream = proc_or_stream.stdout +			 +		readline = stream.readline +		while True: +			line = readline() +			if not line: +				break +			hexsha = line.strip() +			if len(hexsha) > 40: +				# split additional information, as returned by bisect for instance +				hexsha, rest = line.split(None, 1) +			# END handle extra info +			 +			assert len(hexsha) == 40, "Invalid line: %s" % hexsha +			yield Commit(repo, hex_to_bin(hexsha)) +		# END for each line in stream +		 +		 +	@classmethod +	def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False): +		"""Commit the given tree, creating a commit object. +		 +		:param repo: Repo object the commit should be part of  +		:param tree: Tree object or hex or bin sha  +			the tree of the new commit +		:param message: Commit message. It may be an empty string if no message is provided. +			It will be converted to a string in any case. +		:param parent_commits: +			Optional Commit objects to use as parents for the new commit. +			If empty list, the commit will have no parents at all and become  +			a root commit. +			If None , the current head commit will be the parent of the  +			new commit object +		:param head: +			If True, the HEAD will be advanced to the new commit automatically. +			Else the HEAD will remain pointing on the previous commit. This could  +			lead to undesired results when diffing files. +			 +		:return: Commit object representing the new commit +			 +		:note: +			Additional information about the committer and Author are taken from the +			environment or from the git configuration, see git-commit-tree for  +			more information""" +		parents = parent_commits +		if parent_commits is None: +			try: +				parent_commits = [ repo.head.commit ] +			except ValueError: +				# empty repositories have no head commit +				parent_commits = list() +			# END handle parent commits +		# END if parent commits are unset +		 +		# retrieve all additional information, create a commit object, and  +		# serialize it +		# Generally:  +		# * Environment variables override configuration values +		# * Sensible defaults are set according to the git documentation +		 +		# COMMITER AND AUTHOR INFO +		cr = repo.config_reader() +		env = os.environ +		default_email = get_user_id() +		default_name = default_email.split('@')[0] +		 +		conf_name = cr.get_value('user', cls.conf_name, default_name) +		conf_email = cr.get_value('user', cls.conf_email, default_email) +		 +		author_name = env.get(cls.env_author_name, conf_name) +		author_email = env.get(cls.env_author_email, conf_email) +		 +		committer_name = env.get(cls.env_committer_name, conf_name) +		committer_email = env.get(cls.env_committer_email, conf_email) +		 +		# PARSE THE DATES +		unix_time = int(time()) +		offset = altzone +		 +		author_date_str = env.get(cls.env_author_date, '') +		if author_date_str: +			author_time, author_offset = parse_date(author_date_str) +		else: +			author_time, author_offset = unix_time, offset +		# END set author time +		 +		committer_date_str = env.get(cls.env_committer_date, '') +		if committer_date_str:  +			committer_time, committer_offset = parse_date(committer_date_str) +		else: +			committer_time, committer_offset = unix_time, offset +		# END set committer time +		 +		# assume utf8 encoding +		enc_section, enc_option = cls.conf_encoding.split('.') +		conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) +		 +		author = Actor(author_name, author_email) +		committer = Actor(committer_name, committer_email) +		 +		 +		# if the tree is no object, make sure we create one - otherwise +		# the created commit object is invalid +		if isinstance(tree, str): +			tree = repo.tree(tree) +		# END tree conversion +		 +		# CREATE NEW COMMIT +		new_commit = cls(repo, cls.NULL_BIN_SHA, tree,  +						author, author_time, author_offset,  +						committer, committer_time, committer_offset, +						message, parent_commits, conf_encoding) +		 +		stream = StringIO() +		new_commit._serialize(stream) +		streamlen = stream.tell() +		stream.seek(0) +		 +		istream = repo.odb.store(IStream(cls.type, streamlen, stream)) +		new_commit.binsha = istream.binsha +		 +		if head: +			# need late import here, importing git at the very beginning throws +			# as well ...  +			import git.refs +			try: +				repo.head.commit = new_commit +			except ValueError: +				# head is not yet set to the ref our HEAD points to +				# Happens on first commit +				import git.refs +				master = git.refs.Head.create(repo, repo.head.ref, commit=new_commit) +				repo.head.reference = master +			# END handle empty repositories +		# END advance head handling  +		 +		return new_commit +	 +	#{ Serializable Implementation +	 +	def _serialize(self, stream): +		write = stream.write +		write("tree %s\n" % self.tree) +		for p in self.parents: +			write("parent %s\n" % p) +			 +		a = self.author +		aname = a.name +		if isinstance(aname, unicode): +			aname = aname.encode(self.encoding) +		# END handle unicode in name +		 +		c = self.committer +		fmt = "%s %s <%s> %s %s\n" +		write(fmt % ("author", aname, a.email,  +						self.authored_date,  +						altz_to_utctz_str(self.author_tz_offset))) +			 +		write(fmt % ("committer", c.name, c.email,  +						self.committed_date, +						altz_to_utctz_str(self.committer_tz_offset))) +		 +		if self.encoding != self.default_encoding: +			write("encoding %s\n" % self.encoding) +		 +		write("\n") +		 +		# write plain bytes, be sure its encoded according to our encoding +		if isinstance(self.message, unicode): +			write(self.message.encode(self.encoding)) +		else: +			write(self.message) +		# END handle encoding +		return self +	 +	def _deserialize(self, stream): +		""":param from_rev_list: if true, the stream format is coming from the rev-list command +		Otherwise it is assumed to be a plain data stream from our object""" +		readline = stream.readline +		self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id<<12, '') + +		self.parents = list() +		next_line = None +		while True: +			parent_line = readline() +			if not parent_line.startswith('parent'): +				next_line = parent_line +				break +			# END abort reading parents +			self.parents.append(type(self)(self.repo, hex_to_bin(parent_line.split()[-1]))) +		# END for each parent line +		self.parents = tuple(self.parents) +		 +		self.author, self.authored_date, self.author_tz_offset = parse_actor_and_date(next_line) +		self.committer, self.committed_date, self.committer_tz_offset = parse_actor_and_date(readline()) +		 +		 +		# now we can have the encoding line, or an empty line followed by the optional +		# message. +		self.encoding = self.default_encoding +		# read encoding or empty line to separate message +		enc = readline() +		enc = enc.strip() +		if enc: +			self.encoding = enc[enc.find(' ')+1:] +			# now comes the message separator  +			readline() +		# END handle encoding +		 +		# decode the authors name +		try: +			self.author.name = self.author.name.decode(self.encoding)  +		except UnicodeDecodeError: +			print >> sys.stderr, "Failed to decode author name '%s' using encoding %s" % (self.author.name, self.encoding) +		# END handle author's encoding +		 +		# a stream from our data simply gives us the plain message +		# The end of our message stream is marked with a newline that we strip +		self.message = stream.read() +		try: +			self.message = self.message.decode(self.encoding) +		except UnicodeDecodeError: +			print >> sys.stderr, "Failed to decode message '%s' using encoding %s" % (self.message, self.encoding) +		# END exception handling  +		return self +		 +	#} END serializable implementation | 
