diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-04 17:30:31 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-04 17:30:31 +0200 |
commit | 6fbb69306c0e14bacb8dcb92a89af27d3d5d631f (patch) | |
tree | 4a6e0c14b412315c13cc4ac6466f4888644195a3 /lib/git/odb/db.py | |
parent | 25dca42bac17d511b7e2ebdd9d1d679e7626db5f (diff) | |
parent | e746f96bcc29238b79118123028ca170adc4ff0f (diff) | |
download | gitpython-6fbb69306c0e14bacb8dcb92a89af27d3d5d631f.tar.gz |
Merge branch 'odb'
Conflicts:
lib/git/cmd.py
Diffstat (limited to 'lib/git/odb/db.py')
-rw-r--r-- | lib/git/odb/db.py | 337 |
1 files changed, 337 insertions, 0 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py new file mode 100644 index 00000000..a8de28ec --- /dev/null +++ b/lib/git/odb/db.py @@ -0,0 +1,337 @@ +"""Contains implementations of database retrieveing objects""" +from git.utils import IndexFileSHA1Writer +from git.errors import ( + InvalidDBRoot, + BadObject, + BadObjectType + ) + +from stream import ( + DecompressMemMapReader, + FDCompressedSha1Writer, + Sha1Writer, + OStream, + OInfo + ) + +from utils import ( + ENOENT, + to_hex_sha, + exists, + hex_to_bin, + isdir, + mkdir, + rename, + dirname, + join + ) + +from fun import ( + chunk_size, + loose_object_header_info, + write_object + ) + +import tempfile +import mmap +import os + + +__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB', + 'CompoundDB', 'ReferenceDB', 'GitObjectDB' ) + +class ObjectDBR(object): + """Defines an interface for object database lookup. + Objects are identified either by hex-sha (40 bytes) or + by sha (20 bytes)""" + + def __contains__(self, sha): + return self.has_obj + + #{ Query Interface + def has_object(self, sha): + """ + :return: True if the object identified by the given 40 byte hexsha or 20 bytes + binary sha is contained in the database + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def info(self, sha): + """ :return: OInfo instance + :param sha: 40 bytes hexsha or 20 bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def info_async(self, input_channel): + """Retrieve information of a multitude of objects asynchronously + :param input_channel: Channel yielding the sha's of the objects of interest + :return: Channel yielding OInfo|InvalidOInfo, in any order""" + raise NotImplementedError("To be implemented in subclass") + + def stream(self, sha): + """:return: OStream instance + :param sha: 40 bytes hexsha or 20 bytes binary sha + :raise BadObject:""" + raise NotImplementedError("To be implemented in subclass") + + def stream_async(self, input_channel): + """Retrieve the OStream of multiple objects + :param input_channel: see ``info`` + :param max_threads: see ``ObjectDBW.store`` + :return: Channel yielding OStream|InvalidOStream instances in any order""" + raise NotImplementedError("To be implemented in subclass") + + #} END query interface + +class ObjectDBW(object): + """Defines an interface to create objects in the database""" + + def __init__(self, *args, **kwargs): + self._ostream = None + + #{ Edit Interface + def set_ostream(self, stream): + """Adjusts the stream to which all data should be sent when storing new objects + :param stream: if not None, the stream to use, if None the default stream + will be used. + :return: previously installed stream, or None if there was no override + :raise TypeError: if the stream doesn't have the supported functionality""" + cstream = self._ostream + self._ostream = stream + return cstream + + def ostream(self): + """:return: overridden output stream this instance will write to, or None + if it will write to the default stream""" + return self._ostream + + def store(self, istream): + """Create a new object in the database + :return: the input istream object with its sha set to its corresponding value + :param istream: IStream compatible instance. If its sha is already set + to a value, the object will just be stored in the our database format, + in which case the input stream is expected to be in object format ( header + contents ). + :raise IOError: if data could not be written""" + raise NotImplementedError("To be implemented in subclass") + + def store_async(self, input_channel): + """Create multiple new objects in the database asynchronously. The method will + return right away, returning an output channel which receives the results as + they are computed. + + :return: Channel yielding your IStream which served as input, in any order. + The IStreams sha will be set to the sha it received during the process, + or its error attribute will be set to the exception informing about the error. + :param input_channel: Channel yielding IStream instance. + As the same instances will be used in the output channel, you can create a map + between the id(istream) -> istream + :note:As some ODB implementations implement this operation as atomic, they might + abort the whole operation if one item could not be processed. Hence check how + many items have actually been produced.""" + raise NotImplementedError("To be implemented in subclass") + + #} END edit interface + + +class FileDBBase(object): + """Provides basic facilities to retrieve files of interest, including + caching facilities to help mapping hexsha's to objects""" + + def __init__(self, root_path): + """Initialize this instance to look for its files at the given root path + All subsequent operations will be relative to this path + :raise InvalidDBRoot: + :note: The base will perform basic checking for accessability, but the subclass + is required to verify that the root_path contains the database structure it needs""" + super(FileDBBase, self).__init__() + if not os.path.isdir(root_path): + raise InvalidDBRoot(root_path) + self._root_path = root_path + + + #{ Interface + def root_path(self): + """:return: path at which this db operates""" + return self._root_path + + def db_path(self, rela_path): + """ + :return: the given relative path relative to our database root, allowing + to pontentially access datafiles""" + return join(self._root_path, rela_path) + #} END interface + + + +class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): + """A database which operates on loose object files""" + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = chunk_size + + + def __init__(self, root_path): + super(LooseObjectDB, self).__init__(root_path) + self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = getattr(os, 'O_NOATIME', 0) + + #{ Interface + def object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + def readable_db_object_path(self, hexsha): + """ + :return: readable object path to the object identified by hexsha + :raise BadObject: If the object file does not exist""" + try: + return self._hexsha_to_file[hexsha] + except KeyError: + pass + # END ignore cache misses + + # try filesystem + path = self.db_path(self.object_path(hexsha)) + if exists(path): + self._hexsha_to_file[hexsha] = path + return path + # END handle cache + raise BadObject(hexsha) + + #} END interface + + def _map_loose_object(self, sha): + """ + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(to_hex_sha(sha))) + try: + fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + fd = os.open(db_path, os.O_RDONLY) + except OSError: + raise BadObject(to_hex_sha(sha)) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(to_hex_sha(sha)) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(LooseObjectDB, self).set_ostream(stream) + + def info(self, sha): + m = self._map_loose_object(sha) + try: + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) + finally: + m.close() + # END assure release of system resources + + def stream(self, sha): + m = self._map_loose_object(sha) + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) + + def has_object(self, sha): + try: + self.readable_db_object_path(to_hex_sha(sha)) + return True + except BadObject: + return False + # END check existance + + def store(self, istream): + """note: The sha we produce will be hex by nature""" + assert istream.sha is None, "Direct istream writing not yet implemented" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + writer = FDCompressedSha1Writer(fd) + # END handle custom writer + + try: + try: + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + finally: + if tmp_path: + writer.close() + # END assure target stream is closed + + sha = writer.sha(as_hex=True) + + if tmp_path: + obj_path = self.db_path(self.object_path(sha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + rename(tmp_path, obj_path) + # END handle dry_run + + istream.sha = sha + return istream + + +class PackedDB(FileDBBase, ObjectDBR): + """A database operating on a set of object packs""" + + +class CompoundDB(ObjectDBR): + """A database which delegates calls to sub-databases""" + + +class ReferenceDB(CompoundDB): + """A database consisting of database referred to in a file""" + + +#class GitObjectDB(CompoundDB, ObjectDBW): +class GitObjectDB(LooseObjectDB): + """A database representing the default git object store, which includes loose + objects, pack files and an alternates file + + It will create objects only in the loose object database. + :note: for now, we use the git command to do all the lookup, just until he + have packs and the other implementations + """ + def __init__(self, root_path, git): + """Initialize this instance with the root and a git command""" + super(GitObjectDB, self).__init__(root_path) + self._git = git + + def info(self, sha): + t = self._git.get_object_header(sha) + return OInfo(t[0], t[1], t[2]) + + def stream(self, sha): + """For now, all lookup is done by git itself""" + t = self._git.stream_object_data(sha) + return OStream(t[0], t[1], t[2], t[3]) + |