diff options
author | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 22:40:52 +0200 |
---|---|---|
committer | Sebastian Thiel <byronimo@gmail.com> | 2010-06-02 22:40:52 +0200 |
commit | 8b86f9b399a8f5af792a04025fdeefc02883f3e5 (patch) | |
tree | f10c0bc2e8a95676e1062d2b86676eb0a5294ce1 /lib/git/odb/db.py | |
parent | 282018b79cc8df078381097cb3aeb29ff56e83c6 (diff) | |
download | gitpython-8b86f9b399a8f5af792a04025fdeefc02883f3e5.tar.gz |
initial version of loose object writing and simple cached object lookup appears to be working
Diffstat (limited to 'lib/git/odb/db.py')
-rw-r--r-- | lib/git/odb/db.py | 148 |
1 files changed, 124 insertions, 24 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py index fd1b640a..204da9ad 100644 --- a/lib/git/odb/db.py +++ b/lib/git/odb/db.py @@ -1,6 +1,21 @@ """Contains implementations of database retrieveing objects""" import os from git.errors import InvalidDBRoot +from git.utils import IndexFileSHA1Writer + +from utils import ( + to_hex_sha, + exists, + hex_to_bin, + FDCompressedSha1Writer, + isdir, + mkdir, + rename, + dirname, + join + ) + +import tempfile class iObjectDBR(object): @@ -9,29 +24,29 @@ class iObjectDBR(object): by sha (20 bytes)""" __slots__ = tuple() + def __contains__(self, sha): + return self.has_obj + #{ Query Interface - def has_obj_hex(self, hexsha): - """:return: True if the object identified by the given 40 byte hexsha is - contained in the database""" - raise NotImplementedError("To be implemented in subclass") - - def has_obj_bin(self, sha): - """:return: as ``has_obj_hex``, but takes a 20 byte binary sha""" - raise NotImplementedError("To be implemented in subclass") - - def obj_hex(self, hexsha): - """:return: tuple(type_string, size_in_bytes, stream) a tuple with object - information including its type, its size as well as a stream from which its - contents can be read""" + def has_object(self, sha): + """ + :return: True if the object identified by the given 40 byte hexsha or 20 bytes + binary sha is contained in the database""" raise NotImplementedError("To be implemented in subclass") - def obj_bin(self, sha): - """:return: as in ``obj_hex``, but takes a binary sha""" + def object(self, sha): + """ + :return: tuple(type_string, size_in_bytes, stream) a tuple with object + information including its type, its size as well as a stream from which its + contents can be read + :param sha: 40 bytes hexsha or 20 bytes binary sha """ raise NotImplementedError("To be implemented in subclass") - def obj_info_hex(self, hexsha): - """:return: tuple(type_string, size_in_bytes) tuple with the object's type - string as well as its size in bytes""" + def object_info(self, sha): + """ + :return: tuple(type_string, size_in_bytes) tuple with the object's type + string as well as its size in bytes + :param sha: 40 bytes hexsha or 20 bytes binary sha""" raise NotImplementedError("To be implemented in subclass") #} END query interface @@ -42,7 +57,7 @@ class iObjectDBW(object): #{ Edit Interface - def to_obj(self, type, size, stream, dry_run=False, sha_as_hex=True): + def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True): """Create a new object in the database :return: the sha identifying the object in the database :param type: type string identifying the object @@ -53,7 +68,7 @@ class iObjectDBW(object): hex encoded, not binary""" raise NotImplementedError("To be implemented in subclass") - def to_objs(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0): + def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0): """Create multiple new objects in the database :return: sequence of shas identifying the created objects in the order in which they where given. @@ -68,7 +83,7 @@ class iObjectDBW(object): # actually use multiple threads, default False of course. If the add shas = list() for args in iter_info: - shas.append(self.to_obj(*args, dry_run=dry_run, sha_as_hex=sha_as_hex)) + shas.append(self.to_object(*args, dry_run=dry_run, sha_as_hex=sha_as_hex)) return shas #} END edit interface @@ -95,18 +110,103 @@ class FileDBBase(object): """:return: path at which this db operates""" return self._root_path + def db_path(self, rela_path): + """ + :return: the given relative path relative to our database root, allowing + to pontentially access datafiles""" + return join(self._root_path, rela_path) #} END interface #{ Utiltities - def _root_rela_path(self, rela_path): - """:return: the given relative path relative to our database root""" - return os.path.join(self._root_path, rela_path) + #} END utilities class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW): """A database which operates on loose object files""" + __slots__ = ('_hexsha_to_file', ) + + # CONFIGURATION + # chunks in which data will be copied between streams + stream_chunk_size = 1000*1000 + + def __init__(self, root_path): + super(LooseObjectDB, self).__init__(root_path) + self._hexsha_to_file = dict() + + #{ Interface + def hexsha_to_object_path(self, hexsha): + """ + :return: path at which the object with the given hexsha would be stored, + relative to the database root""" + return join(hexsha[:2], hexsha[2:]) + + #} END interface + + def has_object(self, sha): + sha = to_hex_sha(sha) + # try cache + if sha in self._hexsha_to_file: + return True + + # try filesystem + path = self.db_path(self.hexsha_to_object_path(sha)) + if exists(path): + self._hexsha_to_file[sha] = path + return True + # END handle cache + return False + + def to_object(self, type, size, stream, dry_run=False, sha_as_hex=True): + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + writer = FDCompressedSha1Writer(fd) + + # WRITE HEADER: type SP size NULL + writer.write("%s %i%s" % (type, size, chr(0))) + + # WRITE ALL DATA + chunksize = self.stream_chunk_size + try: + try: + while True: + data_len = writer.write(stream.read(chunksize)) + if data_len < chunksize: + # WRITE FOOTER + writer.write('\n') + break + # END check for stream end + # END duplicate data + finally: + writer.close() + # END assure file was closed + except: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + + + # in dry-run mode, we delete the file afterwards + sha = writer.sha(as_hex=True) + + if dry_run: + os.remove(tmp_path) + else: + # rename the file into place + obj_path = self.db_path(self.hexsha_to_object_path(sha)) + obj_dir = dirname(obj_path) + if not isdir(obj_dir): + mkdir(obj_dir) + # END handle destination directory + rename(tmp_path, obj_path) + # END handle dry_run + + if not sha_as_hex: + sha = hex_to_bin(sha) + # END handle sha format + + return sha class PackedDB(FileDBBase, iObjectDBR): |