diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2011-05-05 19:44:36 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2011-05-05 19:44:36 +0200 |
commit | e03093dd392b92f51b7d7cf66d7b1949b9f843e6 (patch) | |
tree | dbd2b85d8b201cb5f2848b3aaa11cffbc96031c2 /gitdb/util.py | |
parent | 6463c10db377573e695bc504a9451bdb6cbf61f5 (diff) | |
download | gitdbmerger.tar.gz |
Removed plenty of code which went into git-python. This is just for completeness, gitdb doesn't need to be worked on anymoregitdbmerger
Diffstat (limited to 'gitdb/util.py')
-rw-r--r-- | gitdb/util.py | 741 |
1 files changed, 0 insertions, 741 deletions
diff --git a/gitdb/util.py b/gitdb/util.py index 75f41aa..3dc30d8 100644 --- a/gitdb/util.py +++ b/gitdb/util.py @@ -1,744 +1,3 @@ # Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors # -# This module is part of GitDB and is released under -# the New BSD License: http://www.opensource.org/licenses/bsd-license.php -import platform -import binascii -import os -import mmap -import sys -import errno -import re -from cStringIO import StringIO - -# in py 2.4, StringIO is only StringI, without write support. -# Hence we must use the python implementation for this -if sys.version_info[1] < 5: - from StringIO import StringIO -# END handle python 2.4 - -try: - import async.mod.zlib as zlib -except ImportError: - import zlib -# END try async zlib - -from async import ThreadPool - -try: - import hashlib -except ImportError: - import sha - -try: - from struct import unpack_from -except ImportError: - from struct import unpack, calcsize - __calcsize_cache = dict() - def unpack_from(fmt, data, offset=0): - try: - size = __calcsize_cache[fmt] - except KeyError: - size = calcsize(fmt) - __calcsize_cache[fmt] = size - # END exception handling - return unpack(fmt, data[offset : offset + size]) - # END own unpack_from implementation - - -#{ Globals - -# A pool distributing tasks, initially with zero threads, hence everything -# will be handled in the main thread -pool = ThreadPool(0) - -#} END globals - - -#{ Aliases - -hex_to_bin = binascii.a2b_hex -bin_to_hex = binascii.b2a_hex - -# errors -ENOENT = errno.ENOENT - -# os shortcuts -exists = os.path.exists -mkdir = os.mkdir -chmod = os.chmod -isdir = os.path.isdir -isfile = os.path.isfile -rename = os.rename -remove = os.remove -dirname = os.path.dirname -basename = os.path.basename -normpath = os.path.normpath -expandvars = os.path.expandvars -expanduser = os.path.expanduser -abspath = os.path.abspath -join = os.path.join -read = os.read -write = os.write -close = os.close -fsync = os.fsync - -# constants -NULL_HEX_SHA = "0"*40 -NULL_BIN_SHA = "\0"*20 - -#} END Aliases - -#{ compatibility stuff ... - -class _RandomAccessStringIO(object): - """Wrapper to provide required functionality in case memory maps cannot or may - not be used. This is only really required in python 2.4""" - __slots__ = '_sio' - - def __init__(self, buf=''): - self._sio = StringIO(buf) - - def __getattr__(self, attr): - return getattr(self._sio, attr) - - def __len__(self): - return len(self.getvalue()) - - def __getitem__(self, i): - return self.getvalue()[i] - - def __getslice__(self, start, end): - return self.getvalue()[start:end] - -#} END compatibility stuff ... - -#{ Routines - -def get_user_id(): - """:return: string identifying the currently active system user as name@node - :note: user can be set with the 'USER' environment variable, usually set on windows""" - ukn = 'UNKNOWN' - username = os.environ.get('USER', os.environ.get('USERNAME', ukn)) - if username == ukn and hasattr(os, 'getlogin'): - username = os.getlogin() - # END get username from login - return "%s@%s" % (username, platform.node()) - -def is_git_dir(d): - """ This is taken from the git setup.c:is_git_directory - function.""" - if isdir(d) and \ - isdir(join(d, 'objects')) and \ - isdir(join(d, 'refs')): - headref = join(d, 'HEAD') - return isfile(headref) or \ - (os.path.islink(headref) and - os.readlink(headref).startswith('refs')) - return False - - -def stream_copy(source, destination, chunk_size=512*1024): - """Copy all data from the source stream into the destination stream in chunks - of size chunk_size - - :return: amount of bytes written""" - br = 0 - while True: - chunk = source.read(chunk_size) - destination.write(chunk) - br += len(chunk) - if len(chunk) < chunk_size: - break - # END reading output stream - return br - -def make_sha(source=''): - """A python2.4 workaround for the sha/hashlib module fiasco - :note: From the dulwich project """ - try: - return hashlib.sha1(source) - except NameError: - sha1 = sha.sha(source) - return sha1 - -def allocate_memory(size): - """:return: a file-protocol accessible memory block of the given size""" - if size == 0: - return _RandomAccessStringIO('') - # END handle empty chunks gracefully - - try: - return mmap.mmap(-1, size) # read-write by default - except EnvironmentError: - # setup real memory instead - # this of course may fail if the amount of memory is not available in - # one chunk - would only be the case in python 2.4, being more likely on - # 32 bit systems. - return _RandomAccessStringIO("\0"*size) - # END handle memory allocation - - -def file_contents_ro(fd, stream=False, allow_mmap=True): - """:return: read-only contents of the file represented by the file descriptor fd - :param fd: file descriptor opened for reading - :param stream: if False, random access is provided, otherwise the stream interface - is provided. - :param allow_mmap: if True, its allowed to map the contents into memory, which - allows large files to be handled and accessed efficiently. The file-descriptor - will change its position if this is False""" - try: - if allow_mmap: - # supports stream and random access - try: - return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - except EnvironmentError: - # python 2.4 issue, 0 wants to be the actual size - return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ) - # END handle python 2.4 - except OSError: - pass - # END exception handling - - # read manully - contents = os.read(fd, os.fstat(fd).st_size) - if stream: - return _RandomAccessStringIO(contents) - return contents - -def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0): - """Get the file contents at filepath as fast as possible - :return: random access compatible memory of the given filepath - :param stream: see ``file_contents_ro`` - :param allow_mmap: see ``file_contents_ro`` - :param flags: additional flags to pass to os.open - :raise OSError: If the file could not be opened - :note: for now we don't try to use O_NOATIME directly as the right value needs to be - shared per database in fact. It only makes a real difference for loose object - databases anyway, and they use it with the help of the ``flags`` parameter""" - fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags) - try: - return file_contents_ro(fd, stream, allow_mmap) - finally: - close(fd) - # END assure file is closed - -def to_hex_sha(sha): - """:return: hexified version of sha""" - if len(sha) == 40: - return sha - return bin_to_hex(sha) - -def to_bin_sha(sha): - if len(sha) == 20: - return sha - return hex_to_bin(sha) - -def join_path(a, *p): - """Join path tokens together similar to os.path.join, but always use - '/' instead of possibly '\' on windows.""" - path = a - for b in p: - if b.startswith('/'): - path += b[1:] - elif path == '' or path.endswith('/'): - path += b - else: - path += '/' + b - return path - -def to_native_path_windows(path): - return path.replace('/','\\') - -def to_native_path_linux(path): - return path.replace('\\','/') - - -if sys.platform.startswith('win'): - to_native_path = to_native_path_windows -else: - # no need for any work on linux - def to_native_path_linux(path): - return path - to_native_path = to_native_path_linux - -def join_path_native(a, *p): - """ - As join path, but makes sure an OS native path is returned. This is only - needed to play it safe on my dear windows and to assure nice paths that only - use '\'""" - return to_native_path(join_path(a, *p)) - -def assure_directory_exists(path, is_file=False): - """Assure that the directory pointed to by path exists. - - :param is_file: If True, path is assumed to be a file and handled correctly. - Otherwise it must be a directory - :return: True if the directory was created, False if it already existed""" - if is_file: - path = os.path.dirname(path) - #END handle file - if not os.path.isdir(path): - os.makedirs(path) - return True - return False - - -#} END routines - - -#{ Utilities - -class LazyMixin(object): - """ - Base class providing an interface to lazily retrieve attribute values upon - first access. If slots are used, memory will only be reserved once the attribute - is actually accessed and retrieved the first time. All future accesses will - return the cached value as stored in the Instance's dict or slot. - """ - - __slots__ = tuple() - - def __getattr__(self, attr): - """ - Whenever an attribute is requested that we do not know, we allow it - to be created and set. Next time the same attribute is reqeusted, it is simply - returned from our dict/slots. """ - self._set_cache_(attr) - # will raise in case the cache was not created - return object.__getattribute__(self, attr) - - def _set_cache_(self, attr): - """ - This method should be overridden in the derived class. - It should check whether the attribute named by attr can be created - and cached. Do nothing if you do not know the attribute or call your subclass - - The derived class may create as many additional attributes as it deems - necessary in case a git command returns more information than represented - in the single attribute.""" - pass - - -class LockedFD(object): - """ - This class facilitates a safe read and write operation to a file on disk. - If we write to 'file', we obtain a lock file at 'file.lock' and write to - that instead. If we succeed, the lock file will be renamed to overwrite - the original file. - - When reading, we obtain a lock file, but to prevent other writers from - succeeding while we are reading the file. - - This type handles error correctly in that it will assure a consistent state - on destruction. - - :note: with this setup, parallel reading is not possible""" - __slots__ = ("_filepath", '_fd', '_write') - - def __init__(self, filepath): - """Initialize an instance with the givne filepath""" - self._filepath = filepath - self._fd = None - self._write = None # if True, we write a file - - def __del__(self): - # will do nothing if the file descriptor is already closed - if self._fd is not None: - self.rollback() - - def _lockfilepath(self): - return "%s.lock" % self._filepath - - def open(self, write=False, stream=False): - """ - Open the file descriptor for reading or writing, both in binary mode. - - :param write: if True, the file descriptor will be opened for writing. Other - wise it will be opened read-only. - :param stream: if True, the file descriptor will be wrapped into a simple stream - object which supports only reading or writing - :return: fd to read from or write to. It is still maintained by this instance - and must not be closed directly - :raise IOError: if the lock could not be retrieved - :raise OSError: If the actual file could not be opened for reading - :note: must only be called once""" - if self._write is not None: - raise AssertionError("Called %s multiple times" % self.open) - - self._write = write - - # try to open the lock file - binary = getattr(os, 'O_BINARY', 0) - lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary - try: - fd = os.open(self._lockfilepath(), lockmode, 0600) - if not write: - os.close(fd) - else: - self._fd = fd - # END handle file descriptor - except OSError: - raise IOError("Lock at %r could not be obtained" % self._lockfilepath()) - # END handle lock retrieval - - # open actual file if required - if self._fd is None: - # we could specify exlusive here, as we obtained the lock anyway - try: - self._fd = os.open(self._filepath, os.O_RDONLY | binary) - except: - # assure we release our lockfile - os.remove(self._lockfilepath()) - raise - # END handle lockfile - # END open descriptor for reading - - if stream: - # need delayed import - from stream import FDStream - return FDStream(self._fd) - else: - return self._fd - # END handle stream - - def commit(self): - """When done writing, call this function to commit your changes into the - actual file. - The file descriptor will be closed, and the lockfile handled. - :note: can be called multiple times""" - self._end_writing(successful=True) - - def rollback(self): - """Abort your operation without any changes. The file descriptor will be - closed, and the lock released. - :note: can be called multiple times""" - self._end_writing(successful=False) - - def _end_writing(self, successful=True): - """Handle the lock according to the write mode """ - if self._write is None: - raise AssertionError("Cannot end operation if it wasn't started yet") - - if self._fd is None: - return - - os.close(self._fd) - self._fd = None - - lockfile = self._lockfilepath() - if self._write and successful: - # on windows, rename does not silently overwrite the existing one - if sys.platform == "win32": - if isfile(self._filepath): - os.remove(self._filepath) - # END remove if exists - # END win32 special handling - os.rename(lockfile, self._filepath) - - # assure others can at least read the file - the tmpfile left it at rw-- - # We may also write that file, on windows that boils down to a remove- - # protection as well - chmod(self._filepath, 0644) - else: - # just delete the file so far, we failed - os.remove(lockfile) - # END successful handling - - -class LockFile(object): - """Provides methods to obtain, check for, and release a file based lock which - should be used to handle concurrent access to the same file. - - As we are a utility class to be derived from, we only use protected methods. - - Locks will automatically be released on destruction""" - __slots__ = ("_file_path", "_owns_lock") - - def __init__(self, file_path): - self._file_path = file_path - self._owns_lock = False - - def __del__(self): - self._release_lock() - - def _lock_file_path(self): - """:return: Path to lockfile""" - return "%s.lock" % (self._file_path) - - def _has_lock(self): - """:return: True if we have a lock and if the lockfile still exists - :raise AssertionError: if our lock-file does not exist""" - if not self._owns_lock: - return False - - return True - - def _obtain_lock_or_raise(self): - """Create a lock file as flag for other instances, mark our instance as lock-holder - - :raise IOError: if a lock was already present or a lock file could not be written""" - if self._has_lock(): - return - lock_file = self._lock_file_path() - if os.path.isfile(lock_file): - raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file)) - - try: - fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0) - os.close(fd) - except OSError,e: - raise IOError(str(e)) - - self._owns_lock = True - - def _obtain_lock(self): - """The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation""" - return self._obtain_lock_or_raise() - - def _release_lock(self): - """Release our lock if we have one""" - if not self._has_lock(): - return - - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. - lfp = self._lock_file_path() - try: - # on bloody windows, the file needs write permissions to be removable. - # Why ... - if os.name == 'nt': - os.chmod(lfp, 0777) - # END handle win32 - os.remove(lfp) - except OSError: - pass - self._owns_lock = False - - -class BlockingLockFile(LockFile): - """The lock file will block until a lock could be obtained, or fail after - a specified timeout. - - :note: If the directory containing the lock was removed, an exception will - be raised during the blocking period, preventing hangs as the lock - can never be obtained.""" - __slots__ = ("_check_interval", "_max_block_time") - def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint): - """Configure the instance - - :parm check_interval_s: - Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time - - :parm max_block_time_s: Maximum amount of seconds we may lock""" - super(BlockingLockFile, self).__init__(file_path) - self._check_interval = check_interval_s - self._max_block_time = max_block_time_s - - def _obtain_lock(self): - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time or if the parent directory was not available anymore. - If this method returns, you are guranteed to own the lock""" - starttime = time.time() - maxtime = starttime + float(self._max_block_time) - while True: - try: - super(BlockingLockFile, self)._obtain_lock() - except IOError: - # synity check: if the directory leading to the lockfile is not - # readable anymore, raise an execption - curtime = time.time() - if not os.path.isdir(os.path.dirname(self._lock_file_path())): - msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime) - raise IOError(msg) - # END handle missing directory - - if curtime >= maxtime: - msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path()) - raise IOError(msg) - # END abort if we wait too long - time.sleep(self._check_interval) - else: - break - # END endless loop - - -class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" - # PRECOMPILED REGEX - name_only_regex = re.compile( r'<(.+)>' ) - name_email_regex = re.compile( r'(.*) <(.+?)>' ) - - # ENVIRONMENT VARIABLES - # read when creating new commits - env_author_name = "GIT_AUTHOR_NAME" - env_author_email = "GIT_AUTHOR_EMAIL" - env_committer_name = "GIT_COMMITTER_NAME" - env_committer_email = "GIT_COMMITTER_EMAIL" - - # CONFIGURATION KEYS - conf_name = 'name' - conf_email = 'email' - - __slots__ = ('name', 'email') - - def __init__(self, name, email): - self.name = name - self.email = email - - def __eq__(self, other): - return self.name == other.name and self.email == other.email - - def __ne__(self, other): - return not (self == other) - - def __hash__(self): - return hash((self.name, self.email)) - - def __str__(self): - return self.name - - def __repr__(self): - return '<git.Actor "%s <%s>">' % (self.name, self.email) - - @classmethod - def _from_string(cls, string): - """Create an Actor from a string. - :param string: is the string, which is expected to be in regular git format - - John Doe <jdoe@example.com> - - :return: Actor """ - m = cls.name_email_regex.search(string) - if m: - name, email = m.groups() - return cls(name, email) - else: - m = cls.name_only_regex.search(string) - if m: - return cls(m.group(1), None) - else: - # assume best and use the whole string as name - return cls(string, None) - # END special case name - # END handle name/email matching - - @classmethod - def _main_actor(cls, env_name, env_email, config_reader=None): - actor = cls('', '') - default_email = get_user_id() - default_name = default_email.split('@')[0] - - for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name), - ('email', env_email, cls.conf_email, default_email)): - try: - setattr(actor, attr, os.environ[evar]) - except KeyError: - if config_reader is not None: - setattr(actor, attr, config_reader.get_value('user', cvar, default)) - #END config-reader handling - if not getattr(actor, attr): - setattr(actor, attr, default) - #END handle name - #END for each item to retrieve - return actor - - - @classmethod - def committer(cls, config_reader=None): - """ - :return: Actor instance corresponding to the configured committer. It behaves - similar to the git implementation, such that the environment will override - configuration values of config_reader. If no value is set at all, it will be - generated - :param config_reader: ConfigReader to use to retrieve the values from in case - they are not set in the environment""" - return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) - - @classmethod - def author(cls, config_reader=None): - """Same as committer(), but defines the main author. It may be specified in the environment, - but defaults to the committer""" - return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) - - -class Iterable(object): - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository""" - __slots__ = tuple() - _id_attribute_ = "attribute that most suitably identifies your instance" - - @classmethod - def list_items(cls, repo, *args, **kwargs): - """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - :note: Favor the iter_items method as it will - - :return:list(Item,...) list of item instances""" - out_list = IterableList( cls._id_attribute_ ) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list - - - @classmethod - def iter_items(cls, repo, *args, **kwargs): - """For more information about the arguments, see list_items - :return: iterator yielding Items""" - raise NotImplementedError("To be implemented by Subclass") - - -class IterableList(list): - """ - List of iterable objects allowing to query an object by id or by named index:: - - heads = repo.heads - heads.master - heads['master'] - heads[0] - - It requires an id_attribute name to be set which will be queried from its - contained items to have a means for comparison. - - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out.""" - __slots__ = ('_id_attr', '_prefix') - - def __new__(cls, id_attr, prefix=''): - return super(IterableList,cls).__new__(cls) - - def __init__(self, id_attr, prefix=''): - self._id_attr = id_attr - self._prefix = prefix - if not isinstance(id_attr, basestring): - raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization") - # END help debugging ! - - def __getattr__(self, attr): - attr = self._prefix + attr - for item in self: - if getattr(item, self._id_attr) == attr: - return item - # END for each item - return list.__getattribute__(self, attr) - - def __getitem__(self, index): - if isinstance(index, int): - return list.__getitem__(self,index) - - try: - return getattr(self, index) - except AttributeError: - raise IndexError( "No item found with id %r" % (self._prefix + index) ) - - - -#} END utilities |