summaryrefslogtreecommitdiff
path: root/gitdb/util.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2011-05-05 19:44:36 +0200
committerSebastian Thiel <byronimo@gmail.com>2011-05-05 19:44:36 +0200
commite03093dd392b92f51b7d7cf66d7b1949b9f843e6 (patch)
treedbd2b85d8b201cb5f2848b3aaa11cffbc96031c2 /gitdb/util.py
parent6463c10db377573e695bc504a9451bdb6cbf61f5 (diff)
downloadgitdbmerger.tar.gz
Removed plenty of code which went into git-python. This is just for completeness, gitdb doesn't need to be worked on anymoregitdbmerger
Diffstat (limited to 'gitdb/util.py')
-rw-r--r--gitdb/util.py741
1 files changed, 0 insertions, 741 deletions
diff --git a/gitdb/util.py b/gitdb/util.py
index 75f41aa..3dc30d8 100644
--- a/gitdb/util.py
+++ b/gitdb/util.py
@@ -1,744 +1,3 @@
# Copyright (C) 2010, 2011 Sebastian Thiel (byronimo@gmail.com) and contributors
#
-# This module is part of GitDB and is released under
-# the New BSD License: http://www.opensource.org/licenses/bsd-license.php
-import platform
-import binascii
-import os
-import mmap
-import sys
-import errno
-import re
-from cStringIO import StringIO
-
-# in py 2.4, StringIO is only StringI, without write support.
-# Hence we must use the python implementation for this
-if sys.version_info[1] < 5:
- from StringIO import StringIO
-# END handle python 2.4
-
-try:
- import async.mod.zlib as zlib
-except ImportError:
- import zlib
-# END try async zlib
-
-from async import ThreadPool
-
-try:
- import hashlib
-except ImportError:
- import sha
-
-try:
- from struct import unpack_from
-except ImportError:
- from struct import unpack, calcsize
- __calcsize_cache = dict()
- def unpack_from(fmt, data, offset=0):
- try:
- size = __calcsize_cache[fmt]
- except KeyError:
- size = calcsize(fmt)
- __calcsize_cache[fmt] = size
- # END exception handling
- return unpack(fmt, data[offset : offset + size])
- # END own unpack_from implementation
-
-
-#{ Globals
-
-# A pool distributing tasks, initially with zero threads, hence everything
-# will be handled in the main thread
-pool = ThreadPool(0)
-
-#} END globals
-
-
-#{ Aliases
-
-hex_to_bin = binascii.a2b_hex
-bin_to_hex = binascii.b2a_hex
-
-# errors
-ENOENT = errno.ENOENT
-
-# os shortcuts
-exists = os.path.exists
-mkdir = os.mkdir
-chmod = os.chmod
-isdir = os.path.isdir
-isfile = os.path.isfile
-rename = os.rename
-remove = os.remove
-dirname = os.path.dirname
-basename = os.path.basename
-normpath = os.path.normpath
-expandvars = os.path.expandvars
-expanduser = os.path.expanduser
-abspath = os.path.abspath
-join = os.path.join
-read = os.read
-write = os.write
-close = os.close
-fsync = os.fsync
-
-# constants
-NULL_HEX_SHA = "0"*40
-NULL_BIN_SHA = "\0"*20
-
-#} END Aliases
-
-#{ compatibility stuff ...
-
-class _RandomAccessStringIO(object):
- """Wrapper to provide required functionality in case memory maps cannot or may
- not be used. This is only really required in python 2.4"""
- __slots__ = '_sio'
-
- def __init__(self, buf=''):
- self._sio = StringIO(buf)
-
- def __getattr__(self, attr):
- return getattr(self._sio, attr)
-
- def __len__(self):
- return len(self.getvalue())
-
- def __getitem__(self, i):
- return self.getvalue()[i]
-
- def __getslice__(self, start, end):
- return self.getvalue()[start:end]
-
-#} END compatibility stuff ...
-
-#{ Routines
-
-def get_user_id():
- """:return: string identifying the currently active system user as name@node
- :note: user can be set with the 'USER' environment variable, usually set on windows"""
- ukn = 'UNKNOWN'
- username = os.environ.get('USER', os.environ.get('USERNAME', ukn))
- if username == ukn and hasattr(os, 'getlogin'):
- username = os.getlogin()
- # END get username from login
- return "%s@%s" % (username, platform.node())
-
-def is_git_dir(d):
- """ This is taken from the git setup.c:is_git_directory
- function."""
- if isdir(d) and \
- isdir(join(d, 'objects')) and \
- isdir(join(d, 'refs')):
- headref = join(d, 'HEAD')
- return isfile(headref) or \
- (os.path.islink(headref) and
- os.readlink(headref).startswith('refs'))
- return False
-
-
-def stream_copy(source, destination, chunk_size=512*1024):
- """Copy all data from the source stream into the destination stream in chunks
- of size chunk_size
-
- :return: amount of bytes written"""
- br = 0
- while True:
- chunk = source.read(chunk_size)
- destination.write(chunk)
- br += len(chunk)
- if len(chunk) < chunk_size:
- break
- # END reading output stream
- return br
-
-def make_sha(source=''):
- """A python2.4 workaround for the sha/hashlib module fiasco
- :note: From the dulwich project """
- try:
- return hashlib.sha1(source)
- except NameError:
- sha1 = sha.sha(source)
- return sha1
-
-def allocate_memory(size):
- """:return: a file-protocol accessible memory block of the given size"""
- if size == 0:
- return _RandomAccessStringIO('')
- # END handle empty chunks gracefully
-
- try:
- return mmap.mmap(-1, size) # read-write by default
- except EnvironmentError:
- # setup real memory instead
- # this of course may fail if the amount of memory is not available in
- # one chunk - would only be the case in python 2.4, being more likely on
- # 32 bit systems.
- return _RandomAccessStringIO("\0"*size)
- # END handle memory allocation
-
-
-def file_contents_ro(fd, stream=False, allow_mmap=True):
- """:return: read-only contents of the file represented by the file descriptor fd
- :param fd: file descriptor opened for reading
- :param stream: if False, random access is provided, otherwise the stream interface
- is provided.
- :param allow_mmap: if True, its allowed to map the contents into memory, which
- allows large files to be handled and accessed efficiently. The file-descriptor
- will change its position if this is False"""
- try:
- if allow_mmap:
- # supports stream and random access
- try:
- return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
- except EnvironmentError:
- # python 2.4 issue, 0 wants to be the actual size
- return mmap.mmap(fd, os.fstat(fd).st_size, access=mmap.ACCESS_READ)
- # END handle python 2.4
- except OSError:
- pass
- # END exception handling
-
- # read manully
- contents = os.read(fd, os.fstat(fd).st_size)
- if stream:
- return _RandomAccessStringIO(contents)
- return contents
-
-def file_contents_ro_filepath(filepath, stream=False, allow_mmap=True, flags=0):
- """Get the file contents at filepath as fast as possible
- :return: random access compatible memory of the given filepath
- :param stream: see ``file_contents_ro``
- :param allow_mmap: see ``file_contents_ro``
- :param flags: additional flags to pass to os.open
- :raise OSError: If the file could not be opened
- :note: for now we don't try to use O_NOATIME directly as the right value needs to be
- shared per database in fact. It only makes a real difference for loose object
- databases anyway, and they use it with the help of the ``flags`` parameter"""
- fd = os.open(filepath, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags)
- try:
- return file_contents_ro(fd, stream, allow_mmap)
- finally:
- close(fd)
- # END assure file is closed
-
-def to_hex_sha(sha):
- """:return: hexified version of sha"""
- if len(sha) == 40:
- return sha
- return bin_to_hex(sha)
-
-def to_bin_sha(sha):
- if len(sha) == 20:
- return sha
- return hex_to_bin(sha)
-
-def join_path(a, *p):
- """Join path tokens together similar to os.path.join, but always use
- '/' instead of possibly '\' on windows."""
- path = a
- for b in p:
- if b.startswith('/'):
- path += b[1:]
- elif path == '' or path.endswith('/'):
- path += b
- else:
- path += '/' + b
- return path
-
-def to_native_path_windows(path):
- return path.replace('/','\\')
-
-def to_native_path_linux(path):
- return path.replace('\\','/')
-
-
-if sys.platform.startswith('win'):
- to_native_path = to_native_path_windows
-else:
- # no need for any work on linux
- def to_native_path_linux(path):
- return path
- to_native_path = to_native_path_linux
-
-def join_path_native(a, *p):
- """
- As join path, but makes sure an OS native path is returned. This is only
- needed to play it safe on my dear windows and to assure nice paths that only
- use '\'"""
- return to_native_path(join_path(a, *p))
-
-def assure_directory_exists(path, is_file=False):
- """Assure that the directory pointed to by path exists.
-
- :param is_file: If True, path is assumed to be a file and handled correctly.
- Otherwise it must be a directory
- :return: True if the directory was created, False if it already existed"""
- if is_file:
- path = os.path.dirname(path)
- #END handle file
- if not os.path.isdir(path):
- os.makedirs(path)
- return True
- return False
-
-
-#} END routines
-
-
-#{ Utilities
-
-class LazyMixin(object):
- """
- Base class providing an interface to lazily retrieve attribute values upon
- first access. If slots are used, memory will only be reserved once the attribute
- is actually accessed and retrieved the first time. All future accesses will
- return the cached value as stored in the Instance's dict or slot.
- """
-
- __slots__ = tuple()
-
- def __getattr__(self, attr):
- """
- Whenever an attribute is requested that we do not know, we allow it
- to be created and set. Next time the same attribute is reqeusted, it is simply
- returned from our dict/slots. """
- self._set_cache_(attr)
- # will raise in case the cache was not created
- return object.__getattribute__(self, attr)
-
- def _set_cache_(self, attr):
- """
- This method should be overridden in the derived class.
- It should check whether the attribute named by attr can be created
- and cached. Do nothing if you do not know the attribute or call your subclass
-
- The derived class may create as many additional attributes as it deems
- necessary in case a git command returns more information than represented
- in the single attribute."""
- pass
-
-
-class LockedFD(object):
- """
- This class facilitates a safe read and write operation to a file on disk.
- If we write to 'file', we obtain a lock file at 'file.lock' and write to
- that instead. If we succeed, the lock file will be renamed to overwrite
- the original file.
-
- When reading, we obtain a lock file, but to prevent other writers from
- succeeding while we are reading the file.
-
- This type handles error correctly in that it will assure a consistent state
- on destruction.
-
- :note: with this setup, parallel reading is not possible"""
- __slots__ = ("_filepath", '_fd', '_write')
-
- def __init__(self, filepath):
- """Initialize an instance with the givne filepath"""
- self._filepath = filepath
- self._fd = None
- self._write = None # if True, we write a file
-
- def __del__(self):
- # will do nothing if the file descriptor is already closed
- if self._fd is not None:
- self.rollback()
-
- def _lockfilepath(self):
- return "%s.lock" % self._filepath
-
- def open(self, write=False, stream=False):
- """
- Open the file descriptor for reading or writing, both in binary mode.
-
- :param write: if True, the file descriptor will be opened for writing. Other
- wise it will be opened read-only.
- :param stream: if True, the file descriptor will be wrapped into a simple stream
- object which supports only reading or writing
- :return: fd to read from or write to. It is still maintained by this instance
- and must not be closed directly
- :raise IOError: if the lock could not be retrieved
- :raise OSError: If the actual file could not be opened for reading
- :note: must only be called once"""
- if self._write is not None:
- raise AssertionError("Called %s multiple times" % self.open)
-
- self._write = write
-
- # try to open the lock file
- binary = getattr(os, 'O_BINARY', 0)
- lockmode = os.O_WRONLY | os.O_CREAT | os.O_EXCL | binary
- try:
- fd = os.open(self._lockfilepath(), lockmode, 0600)
- if not write:
- os.close(fd)
- else:
- self._fd = fd
- # END handle file descriptor
- except OSError:
- raise IOError("Lock at %r could not be obtained" % self._lockfilepath())
- # END handle lock retrieval
-
- # open actual file if required
- if self._fd is None:
- # we could specify exlusive here, as we obtained the lock anyway
- try:
- self._fd = os.open(self._filepath, os.O_RDONLY | binary)
- except:
- # assure we release our lockfile
- os.remove(self._lockfilepath())
- raise
- # END handle lockfile
- # END open descriptor for reading
-
- if stream:
- # need delayed import
- from stream import FDStream
- return FDStream(self._fd)
- else:
- return self._fd
- # END handle stream
-
- def commit(self):
- """When done writing, call this function to commit your changes into the
- actual file.
- The file descriptor will be closed, and the lockfile handled.
- :note: can be called multiple times"""
- self._end_writing(successful=True)
-
- def rollback(self):
- """Abort your operation without any changes. The file descriptor will be
- closed, and the lock released.
- :note: can be called multiple times"""
- self._end_writing(successful=False)
-
- def _end_writing(self, successful=True):
- """Handle the lock according to the write mode """
- if self._write is None:
- raise AssertionError("Cannot end operation if it wasn't started yet")
-
- if self._fd is None:
- return
-
- os.close(self._fd)
- self._fd = None
-
- lockfile = self._lockfilepath()
- if self._write and successful:
- # on windows, rename does not silently overwrite the existing one
- if sys.platform == "win32":
- if isfile(self._filepath):
- os.remove(self._filepath)
- # END remove if exists
- # END win32 special handling
- os.rename(lockfile, self._filepath)
-
- # assure others can at least read the file - the tmpfile left it at rw--
- # We may also write that file, on windows that boils down to a remove-
- # protection as well
- chmod(self._filepath, 0644)
- else:
- # just delete the file so far, we failed
- os.remove(lockfile)
- # END successful handling
-
-
-class LockFile(object):
- """Provides methods to obtain, check for, and release a file based lock which
- should be used to handle concurrent access to the same file.
-
- As we are a utility class to be derived from, we only use protected methods.
-
- Locks will automatically be released on destruction"""
- __slots__ = ("_file_path", "_owns_lock")
-
- def __init__(self, file_path):
- self._file_path = file_path
- self._owns_lock = False
-
- def __del__(self):
- self._release_lock()
-
- def _lock_file_path(self):
- """:return: Path to lockfile"""
- return "%s.lock" % (self._file_path)
-
- def _has_lock(self):
- """:return: True if we have a lock and if the lockfile still exists
- :raise AssertionError: if our lock-file does not exist"""
- if not self._owns_lock:
- return False
-
- return True
-
- def _obtain_lock_or_raise(self):
- """Create a lock file as flag for other instances, mark our instance as lock-holder
-
- :raise IOError: if a lock was already present or a lock file could not be written"""
- if self._has_lock():
- return
- lock_file = self._lock_file_path()
- if os.path.isfile(lock_file):
- raise IOError("Lock for file %r did already exist, delete %r in case the lock is illegal" % (self._file_path, lock_file))
-
- try:
- fd = os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0)
- os.close(fd)
- except OSError,e:
- raise IOError(str(e))
-
- self._owns_lock = True
-
- def _obtain_lock(self):
- """The default implementation will raise if a lock cannot be obtained.
- Subclasses may override this method to provide a different implementation"""
- return self._obtain_lock_or_raise()
-
- def _release_lock(self):
- """Release our lock if we have one"""
- if not self._has_lock():
- return
-
- # if someone removed our file beforhand, lets just flag this issue
- # instead of failing, to make it more usable.
- lfp = self._lock_file_path()
- try:
- # on bloody windows, the file needs write permissions to be removable.
- # Why ...
- if os.name == 'nt':
- os.chmod(lfp, 0777)
- # END handle win32
- os.remove(lfp)
- except OSError:
- pass
- self._owns_lock = False
-
-
-class BlockingLockFile(LockFile):
- """The lock file will block until a lock could be obtained, or fail after
- a specified timeout.
-
- :note: If the directory containing the lock was removed, an exception will
- be raised during the blocking period, preventing hangs as the lock
- can never be obtained."""
- __slots__ = ("_check_interval", "_max_block_time")
- def __init__(self, file_path, check_interval_s=0.3, max_block_time_s=sys.maxint):
- """Configure the instance
-
- :parm check_interval_s:
- Period of time to sleep until the lock is checked the next time.
- By default, it waits a nearly unlimited time
-
- :parm max_block_time_s: Maximum amount of seconds we may lock"""
- super(BlockingLockFile, self).__init__(file_path)
- self._check_interval = check_interval_s
- self._max_block_time = max_block_time_s
-
- def _obtain_lock(self):
- """This method blocks until it obtained the lock, or raises IOError if
- it ran out of time or if the parent directory was not available anymore.
- If this method returns, you are guranteed to own the lock"""
- starttime = time.time()
- maxtime = starttime + float(self._max_block_time)
- while True:
- try:
- super(BlockingLockFile, self)._obtain_lock()
- except IOError:
- # synity check: if the directory leading to the lockfile is not
- # readable anymore, raise an execption
- curtime = time.time()
- if not os.path.isdir(os.path.dirname(self._lock_file_path())):
- msg = "Directory containing the lockfile %r was not readable anymore after waiting %g seconds" % (self._lock_file_path(), curtime - starttime)
- raise IOError(msg)
- # END handle missing directory
-
- if curtime >= maxtime:
- msg = "Waited %g seconds for lock at %r" % ( maxtime - starttime, self._lock_file_path())
- raise IOError(msg)
- # END abort if we wait too long
- time.sleep(self._check_interval)
- else:
- break
- # END endless loop
-
-
-class Actor(object):
- """Actors hold information about a person acting on the repository. They
- can be committers and authors or anything with a name and an email as
- mentioned in the git log entries."""
- # PRECOMPILED REGEX
- name_only_regex = re.compile( r'<(.+)>' )
- name_email_regex = re.compile( r'(.*) <(.+?)>' )
-
- # ENVIRONMENT VARIABLES
- # read when creating new commits
- env_author_name = "GIT_AUTHOR_NAME"
- env_author_email = "GIT_AUTHOR_EMAIL"
- env_committer_name = "GIT_COMMITTER_NAME"
- env_committer_email = "GIT_COMMITTER_EMAIL"
-
- # CONFIGURATION KEYS
- conf_name = 'name'
- conf_email = 'email'
-
- __slots__ = ('name', 'email')
-
- def __init__(self, name, email):
- self.name = name
- self.email = email
-
- def __eq__(self, other):
- return self.name == other.name and self.email == other.email
-
- def __ne__(self, other):
- return not (self == other)
-
- def __hash__(self):
- return hash((self.name, self.email))
-
- def __str__(self):
- return self.name
-
- def __repr__(self):
- return '<git.Actor "%s <%s>">' % (self.name, self.email)
-
- @classmethod
- def _from_string(cls, string):
- """Create an Actor from a string.
- :param string: is the string, which is expected to be in regular git format
-
- John Doe <jdoe@example.com>
-
- :return: Actor """
- m = cls.name_email_regex.search(string)
- if m:
- name, email = m.groups()
- return cls(name, email)
- else:
- m = cls.name_only_regex.search(string)
- if m:
- return cls(m.group(1), None)
- else:
- # assume best and use the whole string as name
- return cls(string, None)
- # END special case name
- # END handle name/email matching
-
- @classmethod
- def _main_actor(cls, env_name, env_email, config_reader=None):
- actor = cls('', '')
- default_email = get_user_id()
- default_name = default_email.split('@')[0]
-
- for attr, evar, cvar, default in (('name', env_name, cls.conf_name, default_name),
- ('email', env_email, cls.conf_email, default_email)):
- try:
- setattr(actor, attr, os.environ[evar])
- except KeyError:
- if config_reader is not None:
- setattr(actor, attr, config_reader.get_value('user', cvar, default))
- #END config-reader handling
- if not getattr(actor, attr):
- setattr(actor, attr, default)
- #END handle name
- #END for each item to retrieve
- return actor
-
-
- @classmethod
- def committer(cls, config_reader=None):
- """
- :return: Actor instance corresponding to the configured committer. It behaves
- similar to the git implementation, such that the environment will override
- configuration values of config_reader. If no value is set at all, it will be
- generated
- :param config_reader: ConfigReader to use to retrieve the values from in case
- they are not set in the environment"""
- return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader)
-
- @classmethod
- def author(cls, config_reader=None):
- """Same as committer(), but defines the main author. It may be specified in the environment,
- but defaults to the committer"""
- return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader)
-
-
-class Iterable(object):
- """Defines an interface for iterable items which is to assure a uniform
- way to retrieve and iterate items within the git repository"""
- __slots__ = tuple()
- _id_attribute_ = "attribute that most suitably identifies your instance"
-
- @classmethod
- def list_items(cls, repo, *args, **kwargs):
- """
- Find all items of this type - subclasses can specify args and kwargs differently.
- If no args are given, subclasses are obliged to return all items if no additional
- arguments arg given.
-
- :note: Favor the iter_items method as it will
-
- :return:list(Item,...) list of item instances"""
- out_list = IterableList( cls._id_attribute_ )
- out_list.extend(cls.iter_items(repo, *args, **kwargs))
- return out_list
-
-
- @classmethod
- def iter_items(cls, repo, *args, **kwargs):
- """For more information about the arguments, see list_items
- :return: iterator yielding Items"""
- raise NotImplementedError("To be implemented by Subclass")
-
-
-class IterableList(list):
- """
- List of iterable objects allowing to query an object by id or by named index::
-
- heads = repo.heads
- heads.master
- heads['master']
- heads[0]
-
- It requires an id_attribute name to be set which will be queried from its
- contained items to have a means for comparison.
-
- A prefix can be specified which is to be used in case the id returned by the
- items always contains a prefix that does not matter to the user, so it
- can be left out."""
- __slots__ = ('_id_attr', '_prefix')
-
- def __new__(cls, id_attr, prefix=''):
- return super(IterableList,cls).__new__(cls)
-
- def __init__(self, id_attr, prefix=''):
- self._id_attr = id_attr
- self._prefix = prefix
- if not isinstance(id_attr, basestring):
- raise ValueError("First parameter must be a string identifying the name-property. Extend the list after initialization")
- # END help debugging !
-
- def __getattr__(self, attr):
- attr = self._prefix + attr
- for item in self:
- if getattr(item, self._id_attr) == attr:
- return item
- # END for each item
- return list.__getattribute__(self, attr)
-
- def __getitem__(self, index):
- if isinstance(index, int):
- return list.__getitem__(self,index)
-
- try:
- return getattr(self, index)
- except AttributeError:
- raise IndexError( "No item found with id %r" % (self._prefix + index) )
-
-
-
-#} END utilities