diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-03 16:35:35 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-03 16:35:35 +0200 |
commit | 38d59fc8ccccae8882fa48671377bf40a27915a7 (patch) | |
tree | 84a3671b709f9fcff39819805a4902546e4d6d66 /lib/git/odb/db.py | |
parent | 6f8ce8901e21587cd2320562df412e05b5ab1731 (diff) | |
download | gitpython-38d59fc8ccccae8882fa48671377bf40a27915a7.tar.gz |
odb: implemented loose object streaming, which is impossible to do efficiently considering that it copies string buffers all the time
Diffstat (limited to 'lib/git/odb/db.py')
-rw-r--r-- | lib/git/odb/db.py | 114 |
1 files changed, 63 insertions, 51 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py index 1248a3f4..5c50a512 100644 --- a/lib/git/odb/db.py +++ b/lib/git/odb/db.py @@ -1,17 +1,18 @@ """Contains implementations of database retrieveing objects""" -import os +from git.utils import IndexFileSHA1Writer from git.errors import ( InvalidDBRoot, - BadObject + BadObject, + BadObjectType ) -from git.utils import IndexFileSHA1Writer from utils import ( - getsize, + DecompressMemMapReader, + FDCompressedSha1Writer, + ENOENT, to_hex_sha, exists, hex_to_bin, - FDCompressedSha1Writer, isdir, mkdir, rename, @@ -19,8 +20,15 @@ from utils import ( join ) +from fun import ( + chunk_size, + loose_object_header_info, + write_object + ) + import tempfile import mmap +import os class iObjectDBR(object): @@ -36,7 +44,8 @@ class iObjectDBR(object): def has_object(self, sha): """ :return: True if the object identified by the given 40 byte hexsha or 20 bytes - binary sha is contained in the database""" + binary sha is contained in the database + :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") def object(self, sha): @@ -44,14 +53,16 @@ class iObjectDBR(object): :return: tuple(type_string, size_in_bytes, stream) a tuple with object information including its type, its size as well as a stream from which its contents can be read - :param sha: 40 bytes hexsha or 20 bytes binary sha """ + :param sha: 40 bytes hexsha or 20 bytes binary sha + :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") def object_info(self, sha): """ :return: tuple(type_string, size_in_bytes) tuple with the object's type string as well as its size in bytes - :param sha: 40 bytes hexsha or 20 bytes binary sha""" + :param sha: 40 bytes hexsha or 20 bytes binary sha + :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") #} END query interface @@ -70,7 +81,8 @@ class iObjectDBW(object): :param stream: stream providing the data :param dry_run: if True, the object database will not actually be changed :param sha_as_hex: if True, the returned sha identifying the object will be - hex encoded, not binary""" + hex encoded, not binary + :raise IOError: if data could not be written""" raise NotImplementedError("To be implemented in subclass") def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0): @@ -82,7 +94,8 @@ class iObjectDBW(object): :param dry_run: see ``to_obj`` :param sha_as_hex: see ``to_obj`` :param max_threads: if < 1, any number of threads may be started while processing - the request, otherwise the given number of threads will be started.""" + the request, otherwise the given number of threads will be started. + :raise IOError: if data could not be written""" # a trivial implementation, ignoring the threads for now # TODO: add configuration to the class to determine whether we may # actually use multiple threads, default False of course. If the add @@ -130,15 +143,19 @@ class FileDBBase(object): class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW): """A database which operates on loose object files""" - __slots__ = ('_hexsha_to_file', ) - + __slots__ = ('_hexsha_to_file', '_fd_open_flags') # CONFIGURATION # chunks in which data will be copied between streams - stream_chunk_size = 1000*1000 + stream_chunk_size = chunk_size + def __init__(self, root_path): super(LooseObjectDB, self).__init__(root_path) self._hexsha_to_file = dict() + # Additional Flags - might be set to 0 after the first failure + # Depending on the root, this might work for some mounts, for others not, which + # is why it is per instance + self._fd_open_flags = os.O_NOATIME #{ Interface def object_path(self, hexsha): @@ -167,36 +184,46 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW): #} END interface - def _object_header_info(self, mmap): - """:return: tuple(type_string, uncompressed_size_in_bytes - :param mmap: newly mapped memory map at position 0. It will be - seeked to the actual start of the object contents, which can be used - to initialize a zlib decompress object.""" - raise NotImplementedError("todo") - - def _map_object(self, sha): + def _map_loose_object(self, sha): """ - :return: tuple(file, mmap) tuple with an opened file for reading, and - a memory map of that file""" - db_path = self.readable_db_object_path(to_hex_sha(sha)) - f = open(db_path, 'rb') - m = mmap.mmap(f.fileno(), getsize(db_path), access=mmap.ACCESS_READ) - return (f, m) + :return: memory map of that file to allow random read access + :raise BadObject: if object could not be located""" + db_path = self.db_path(self.object_path(to_hex_sha(sha))) + try: + fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags) + except OSError,e: + if e.errno != ENOENT: + # try again without noatime + try: + fd = os.open(db_path, os.O_RDONLY) + except OSError: + raise BadObject(to_hex_sha(sha)) + # didn't work because of our flag, don't try it again + self._fd_open_flags = 0 + else: + raise BadObject(to_hex_sha(sha)) + # END handle error + # END exception handling + try: + return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + finally: + os.close(fd) + # END assure file is closed def object_info(self, sha): - f, m = self._map_object(sha) + m = self._map_loose_object(sha) try: - type, size = self._object_header_info(m) + return loose_object_header_info(m) finally: - f.close() m.close() # END assure release of system resources def object(self, sha): - f, m = self._map_object(sha) - type, size = self._object_header_info(m) - # TODO: init a dynamic decompress stream from our memory map + m = self._map_loose_object(sha) + reader = DecompressMemMapReader(m, close_on_deletion = True) + type, size = reader.initialize() + return type, size, reader def has_object(self, sha): try: @@ -210,25 +237,10 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW): # open a tmp file to write the data to fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) writer = FDCompressedSha1Writer(fd) - - # WRITE HEADER: type SP size NULL - writer.write("%s %i%s" % (type, size, chr(0))) - - # WRITE ALL DATA - chunksize = self.stream_chunk_size + try: - try: - while True: - data_len = writer.write(stream.read(chunksize)) - if data_len < chunksize: - # WRITE FOOTER - writer.write('\n') - break - # END check for stream end - # END duplicate data - finally: - writer.close() - # END assure file was closed + write_object(type, size, stream, writer, + close_target_stream=True, chunk_size=self.stream_chunk_size) except: os.remove(tmp_path) raise |