diff options
| author | Sebastian Thiel <byronimo@gmail.com> | 2010-11-25 17:01:25 +0100 | 
|---|---|---|
| committer | Sebastian Thiel <byronimo@gmail.com> | 2010-11-25 17:01:25 +0100 | 
| commit | cb68eef0865df6aedbc11cd81888625a70da6777 (patch) | |
| tree | bd3018e6257574687b271b6c2e652ef943657471 /git/util.py | |
| parent | 65747a216c67c3101c6ae2edaa8119d786b793cb (diff) | |
| download | gitpython-cb68eef0865df6aedbc11cd81888625a70da6777.tar.gz | |
Moved everything into the git subdirectory - some tests still need to be adjusted
Diffstat (limited to 'git/util.py')
| -rw-r--r-- | git/util.py | 602 | 
1 files changed, 602 insertions, 0 deletions
| diff --git a/git/util.py b/git/util.py new file mode 100644 index 00000000..8c0b6697 --- /dev/null +++ b/git/util.py @@ -0,0 +1,602 @@ +# utils.py +# Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors +# +# This module is part of GitPython and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import os +import re +import sys +import time +import platform +import tempfile + +from gitdb.util import ( +							make_sha,  +							LockedFD,  +							file_contents_ro,  +							LazyMixin,  +							to_hex_sha,  +							to_bin_sha +						) + +__all__ = ( "stream_copy", "join_path", "to_native_path_windows", "to_native_path_linux",  +			"join_path_native", "Stats", "IndexFileSHA1Writer", "Iterable", "IterableList",  +			"BlockingLockFile", "LockFile", 'Actor', 'get_user_id', 'assure_directory_exists', +			'RemoteProgress') + +#{ Utility Methods + +def stream_copy(source, destination, chunk_size=512*1024): +	"""Copy all data from the source stream into the destination stream in chunks +	of size chunk_size +	 +	:return: amount of bytes written""" +	br = 0 +	while True: +		chunk = source.read(chunk_size) +		destination.write(chunk) +		br += len(chunk) +		if len(chunk) < chunk_size: +			break +	# END reading output stream +	return br + +def join_path(a, *p): +	"""Join path tokens together similar to os.path.join, but always use  +	'/' instead of possibly '\' on windows.""" +	path = a +	for b in p: +		if b.startswith('/'): +			path += b[1:] +		elif path == '' or path.endswith('/'): +			path +=	 b +		else: +			path += '/' + b +	return path +	 +def to_native_path_windows(path): +	return path.replace('/','\\') +	 +def to_native_path_linux(path): +	return path.replace('\\','/') + +if sys.platform.startswith('win'): +	to_native_path = to_native_path_windows +else: +	# no need for any work on linux +	def to_native_path_linux(path): +		return path +	to_native_path = to_native_path_linux + +def join_path_native(a, *p): +	""" +	As join path, but makes sure an OS native path is returned. This is only  +		needed to play it safe on my dear windows and to assure nice paths that only  +		use '\'""" +	return to_native_path(join_path(a, *p)) +	 +def assure_directory_exists(path, is_file=False): +	"""Assure that the directory pointed to by path exists. +	 +	:param is_file: If True, path is assumed to be a file and handled correctly. +		Otherwise it must be a directory +	:return: True if the directory was created, False if it already existed""" +	if is_file: +		path = os.path.dirname(path) +	#END handle file  +	if not os.path.isdir(path): +		os.makedirs(path) +		return True +	return False +	 + +def get_user_id(): +	""":return: string identifying the currently active system user as name@node +	:note: user can be set with the 'USER' environment variable, usually set on windows""" +	ukn = 'UNKNOWN' +	username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) +	if username == ukn and hasattr(os, 'getlogin'): +		username = os.getlogin() +	# END get username from login +	return "%s@%s" % (username, platform.node()) + +#} END utilities + +#{ Classes + +class RemoteProgress(object): +	""" +	Handler providing an interface to parse progress information emitted by git-push +	and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. +	""" +	_num_op_codes = 5 +	BEGIN, END, COUNTING, COMPRESSING, WRITING =  [1 << x for x in range(_num_op_codes)] +	STAGE_MASK = BEGIN|END +	OP_MASK = ~STAGE_MASK +	 +	__slots__ = ("_cur_line", "_seen_ops") +	re_op_absolute = re.compile("(remote: )?([\w\s]+):\s+()(\d+)()(.*)") +	re_op_relative = re.compile("(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") +	 +	def __init__(self): +		self._seen_ops = list() +	 +	def _parse_progress_line(self, line): +		"""Parse progress information from the given line as retrieved by git-push +		or git-fetch +		 +		:return: list(line, ...) list of lines that could not be processed""" +		# handle +		# Counting objects: 4, done.  +		# Compressing objects:	50% (1/2)	\rCompressing objects: 100% (2/2)	\rCompressing objects: 100% (2/2), done. +		self._cur_line = line +		sub_lines = line.split('\r') +		failed_lines = list() +		for sline in sub_lines: +			# find esacpe characters and cut them away - regex will not work with  +			# them as they are non-ascii. As git might expect a tty, it will send them +			last_valid_index = None +			for i,c in enumerate(reversed(sline)): +				if ord(c) < 32: +					# its a slice index +					last_valid_index = -i-1  +				# END character was non-ascii +			# END for each character in sline +			if last_valid_index is not None: +				sline = sline[:last_valid_index] +			# END cut away invalid part +			sline = sline.rstrip() +			 +			cur_count, max_count = None, None +			match = self.re_op_relative.match(sline) +			if match is None: +				match = self.re_op_absolute.match(sline) +				 +			if not match: +				self.line_dropped(sline) +				failed_lines.append(sline) +				continue +			# END could not get match +			 +			op_code = 0 +			remote, op_name, percent, cur_count, max_count, message = match.groups() +			 +			# get operation id +			if op_name == "Counting objects": +				op_code |= self.COUNTING +			elif op_name == "Compressing objects": +				op_code |= self.COMPRESSING +			elif op_name == "Writing objects": +				op_code |= self.WRITING +			else: +				raise ValueError("Operation name %r unknown" % op_name) +			 +			# figure out stage +			if op_code not in self._seen_ops: +				self._seen_ops.append(op_code) +				op_code |= self.BEGIN +			# END begin opcode +			 +			if message is None: +				message = '' +			# END message handling +			 +			message = message.strip() +			done_token = ', done.' +			if message.endswith(done_token): +				op_code |= self.END +				message = message[:-len(done_token)] +			# END end message handling +			 +			self.update(op_code, cur_count, max_count, message) +		# END for each sub line +		return failed_lines +	 +	def line_dropped(self, line): +		"""Called whenever a line could not be understood and was therefore dropped.""" +		pass +	 +	def update(self, op_code, cur_count, max_count=None, message=''): +		"""Called whenever the progress changes +		 +		:param op_code: +			Integer allowing to be compared against Operation IDs and stage IDs. +			 +			Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation  +			ID as well as END. It may be that BEGIN and END are set at once in case only +			one progress message was emitted due to the speed of the operation. +			Between BEGIN and END, none of these flags will be set +			 +			Operation IDs are all held within the OP_MASK. Only one Operation ID will  +			be active per call. +		:param cur_count: Current absolute count of items +			 +		:param max_count: +			The maximum count of items we expect. It may be None in case there is  +			no maximum number of items or if it is (yet) unknown. +		 +		:param message: +			In case of the 'WRITING' operation, it contains the amount of bytes +			transferred. It may possibly be used for other purposes as well. +		 +		You may read the contents of the current line in self._cur_line""" +		pass + + +class Actor(object): +	"""Actors hold information about a person acting on the repository. They  +	can be committers and authors or anything with a name and an email as  +	mentioned in the git log entries.""" +	# PRECOMPILED REGEX +	name_only_regex = re.compile( r'<(.+)>' ) +	name_email_regex = re.compile( r'(.*) <(.+?)>' ) +	 +	# ENVIRONMENT VARIABLES +	# read when creating new commits +	env_author_name = "GIT_AUTHOR_NAME" +	env_author_email = "GIT_AUTHOR_EMAIL" +	env_committer_name = "GIT_COMMITTER_NAME" +	env_committer_email = "GIT_COMMITTER_EMAIL" +	 +	# CONFIGURATION KEYS +	conf_name = 'name' +	conf_email = 'email' +	 +	__slots__ = ('name', 'email') +	 +	def __init__(self, name, email): +		self.name = name +		self.email = email + +	def __eq__(self, other): +		return self.name == other.name and self.email == other.email +		 +	def __ne__(self, other): +		return not (self == other) +		 +	def __hash__(self): +		return hash((self.name, self.email)) + +	def __str__(self): +		return self.name + +	def __repr__(self): +		return '<git.Actor "%s <%s>">' % (self.name, self.email) + +	@classmethod +	def _from_string(cls, string): +		"""Create an Actor from a string. +		:param string: is the string, which is expected to be in regular git format + +				John Doe <jdoe@example.com> +				 +		:return: Actor """ +		m = cls.name_email_regex.search(string) +		if m: +			name, email = m.groups() +			return Actor(name, email) +		else: +			m = cls.name_only_regex.search(string) +			if m: +				return Actor(m.group(1), None) +			else: +				# assume best and use the whole string as name +				return Actor(string, None) +			# END special case name +		# END handle name/email matching +		 +	@classmethod +	def _main_actor(cls, env_name, env_email, config_reader=None): +		actor = Actor('', '') +		default_email = get_user_id() +		default_name = default_email.split('@')[0] +		 +		for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name),  +										('email', env_email, cls.conf_email, default_email)): +			try: +				setattr(actor, attr, os.environ[evar]) +			except KeyError: +				if config_reader is not None: +					setattr(actor, attr, config_reader.get_value('user', cvar, default)) +				#END config-reader handling +				if not getattr(actor, attr): +					setattr(actor, attr, default) +			#END handle name +		#END for each item to retrieve +		return actor +		 +		 +	@classmethod +	def committer(cls, config_reader=None): +		""" +		:return: Actor instance corresponding to the configured committer. It behaves +			similar to the git implementation, such that the environment will override  +			configuration values of config_reader. If no value is set at all, it will be +			generated +		:param config_reader: ConfigReader to use to retrieve the values from in case +			they are not set in the environment""" +		return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) +		 +	@classmethod +	def author(cls, config_reader=None): +		"""Same as committer(), but defines the main author. It may be specified in the environment,  +		but defaults to the committer""" +		return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) +		 + +class Stats(object): +	""" +	Represents stat information as presented by git at the end of a merge. It is  +	created from the output of a diff operation. +	 +	``Example``:: +	 +	 c = Commit( sha1 ) +	 s = c.stats +	 s.total		 # full-stat-dict +	 s.files		 # dict( filepath : stat-dict ) +	  +	``stat-dict`` +	 +	A dictionary with the following keys and values:: +	  +	  deletions = number of deleted lines as int +	  insertions = number of inserted lines as int +	  lines = total number of lines changed as int, or deletions + insertions +	   +	``full-stat-dict`` +	 +	In addition to the items in the stat-dict, it features additional information:: +	 +	 files = number of changed files as int""" +	__slots__ = ("total", "files") +	 +	def __init__(self, total, files): +		self.total = total +		self.files = files + +	@classmethod +	def _list_from_string(cls, repo, text): +		"""Create a Stat object from output retrieved by git-diff. +		 +		:return: git.Stat""" +		hsh = {'total': {'insertions': 0, 'deletions': 0, 'lines': 0, 'files': 0}, 'files': dict()} +		for line in text.splitlines(): +			(raw_insertions, raw_deletions, filename) = line.split("\t") +			insertions = raw_insertions != '-' and int(raw_insertions) or 0 +			deletions = raw_deletions != '-' and int(raw_deletions) or 0 +			hsh['total']['insertions'] += insertions +			hsh['total']['deletions'] += deletions +			hsh['total']['lines'] += insertions + deletions +			hsh['total']['files'] += 1 +			hsh['files'][filename.strip()] = {'insertions': insertions, +											  'deletions': deletions, +											  'lines': insertions + deletions} +		return Stats(hsh['total'], hsh['files']) + + +class IndexFileSHA1Writer(object): +	"""Wrapper around a file-like object that remembers the SHA1 of  +	the data written to it. It will write a sha when the stream is closed +	or if the asked for explicitly usign write_sha. +	 +	Only useful to the indexfile +	 +	:note: Based on the dulwich project""" +	__slots__ = ("f", "sha1") +	 +	def __init__(self, f): +		self.f = f +		self.sha1 = make_sha("") + +	def write(self, data): +		self.sha1.update(data) +		return self.f.write(data) + +	def write_sha(self): +		sha = self.sha1.digest() +		self.f.write(sha) +		return sha + +	def close(self): +		sha = self.write_sha() +		self.f.close() +		return sha + +	def tell(self): +		return self.f.tell() + + +class LockFile(object): +	"""Provides methods to obtain, check for, and release a file based lock which  +	should be used to handle concurrent access to the same file. +	 +	As we are a utility class to be derived from, we only use protected methods. +	 +	Locks will automatically be released on destruction""" +	__slots__ = ("_file_path", "_owns_lock") +	 +	def __init__(self, file_path): +		self._file_path = file_path +		self._owns_lock = False +	 +	def __del__(self): +		self._release_lock() +	 +	def _lock_file_path(self): +		""":return: Path to lockfile""" +		return "%s.lock" % (self._file_path) +	 +	def _has_lock(self): +		""":return: True if we have a lock and if the lockfile still exists +		:raise AssertionError: if our lock-file does not exist""" +		if not self._owns_lock: +			return False +		 +		return True +		 +	def _obtain_lock_or_raise(self): +		"""Create a lock file as flag for other instances, mark our instance as lock-holder +		 +		:raise IOError: if a lock was already present or a lock file could not be written""" +		if self._has_lock(): +			return  +		lock_file = self._lock_file_path() +		if os.path.isfile(lock_file): +			raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) +			 +		try: +			fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) +			os.close(fd) +		except OSError,e: +			raise IOError(str(e)) +		 +		self._owns_lock = True +		 +	def _obtain_lock(self): +		"""The default implementation will raise if a lock cannot be obtained. +		Subclasses may override this method to provide a different implementation""" +		return self._obtain_lock_or_raise() +		 +	def _release_lock(self): +		"""Release our lock if we have one""" +		if not self._has_lock(): +			return +			 +		# if someone removed our file beforhand, lets just flag this issue +		# instead of failing, to make it more usable. +		lfp = self._lock_file_path() +		try: +			# on bloody windows, the file needs write permissions to be removable. +			# Why ...  +			if os.name == 'nt': +				os.chmod(lfp, 0777) +			# END handle win32 +			os.remove(lfp) +		except OSError: +			pass +		self._owns_lock = False + + +class BlockingLockFile(LockFile): +	"""The lock file will block until a lock could be obtained, or fail after  +	a specified timeout. +	 +	:note: If the directory containing the lock was removed, an exception will  +		be raised during the blocking period, preventing hangs as the lock  +		can never be obtained.""" +	__slots__ = ("_check_interval", "_max_block_time") +	def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): +		"""Configure the instance +		 +		:parm check_interval_s: +			Period of time to sleep until the lock is checked the next time. +			By default, it waits a nearly unlimited time +		 +		:parm max_block_time_s: Maximum amount of seconds we may lock""" +		super(BlockingLockFile, self).__init__(file_path) +		self._check_interval = check_interval_s +		self._max_block_time = max_block_time_s +		 +	def _obtain_lock(self): +		"""This method blocks until it obtained the lock, or raises IOError if  +		it ran out of time or if the parent directory was not available anymore. +		If this method returns, you are guranteed to own the lock""" +		starttime = time.time() +		maxtime = starttime + float(self._max_block_time) +		while True: +			try: +				super(BlockingLockFile, self)._obtain_lock() +			except IOError: +				# synity check: if the directory leading to the lockfile is not +				# readable anymore, raise an execption +				curtime = time.time() +				if not os.path.isdir(os.path.dirname(self._lock_file_path())): +					msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) +					raise IOError(msg) +				# END handle missing directory +				 +				if curtime >= maxtime: +					msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) +					raise IOError(msg) +				# END abort if we wait too long +				time.sleep(self._check_interval) +			else: +				break +		# END endless loop +	 + +class IterableList(list): +	""" +	List of iterable objects allowing to query an object by id or by named index:: +	  +	 heads = repo.heads +	 heads.master +	 heads['master'] +	 heads[0] +	  +	It requires an id_attribute name to be set which will be queried from its  +	contained items to have a means for comparison. +	 +	A prefix can be specified which is to be used in case the id returned by the  +	items always contains a prefix that does not matter to the user, so it  +	can be left out.""" +	__slots__ = ('_id_attr', '_prefix') +	 +	def __new__(cls, id_attr, prefix=''): +		return super(IterableList,cls).__new__(cls) +		 +	def __init__(self, id_attr, prefix=''): +		self._id_attr = id_attr +		self._prefix = prefix +		if not isinstance(id_attr, basestring): +			raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") +		# END help debugging ! +		 +	def __getattr__(self, attr): +		attr = self._prefix + attr +		for item in self: +			if getattr(item, self._id_attr) == attr: +				return item +		# END for each item +		return list.__getattribute__(self, attr) +		 +	def __getitem__(self, index): +		if isinstance(index, int): +			return list.__getitem__(self,index) +		 +		try: +			return getattr(self, index) +		except AttributeError: +			raise IndexError( "No item found with id %r" % (self._prefix + index) ) +			 + +class Iterable(object): +	"""Defines an interface for iterable items which is to assure a uniform  +	way to retrieve and iterate items within the git repository""" +	__slots__ = tuple() +	_id_attribute_ = "attribute that most suitably identifies your instance" +	 +	@classmethod +	def list_items(cls, repo, *args, **kwargs): +		""" +		Find all items of this type - subclasses can specify args and kwargs differently. +		If no args are given, subclasses are obliged to return all items if no additional  +		arguments arg given. +		 +		:note: Favor the iter_items method as it will +		 +		:return:list(Item,...) list of item instances""" +		out_list = IterableList( cls._id_attribute_ ) +		out_list.extend(cls.iter_items(repo, *args, **kwargs)) +		return out_list +		 +		 +	@classmethod +	def iter_items(cls, repo, *args, **kwargs): +		"""For more information about the arguments, see list_items +		:return:  iterator yielding Items""" +		raise NotImplementedError("To be implemented by Subclass") +		 +#} END classes | 
