diff options
-rw-r--r-- | lib/git/errors.py | 15 | ||||
-rw-r--r-- | lib/git/odb/__init__.py | 2 | ||||
-rw-r--r-- | lib/git/odb/db.py | 129 | ||||
-rw-r--r-- | test/git/performance/test_odb.py | 61 | ||||
-rw-r--r-- | test/git/test_odb.py | 12 |
5 files changed, 210 insertions, 9 deletions
diff --git a/lib/git/errors.py b/lib/git/errors.py index f66fb528..ecb1c35b 100644 --- a/lib/git/errors.py +++ b/lib/git/errors.py @@ -8,19 +8,16 @@ Module containing all exceptions thrown througout the git package, """ class InvalidGitRepositoryError(Exception): - """ - Thrown if the given repository appears to have an invalid format. - """ + """ Thrown if the given repository appears to have an invalid format. """ + +class InvalidDBRoot(Exception): + """Thrown if an object database cannot be initialized at the given path""" class NoSuchPathError(OSError): - """ - Thrown if a path could not be access by the system. - """ + """ Thrown if a path could not be access by the system. """ class GitCommandError(Exception): - """ - Thrown if execution of the git command fails with non-zero status code. - """ + """ Thrown if execution of the git command fails with non-zero status code. """ def __init__(self, command, status, stderr=None): self.stderr = stderr self.status = status diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py new file mode 100644 index 00000000..17000244 --- /dev/null +++ b/lib/git/odb/__init__.py @@ -0,0 +1,2 @@ +"""Initialize the object database module""" + diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py new file mode 100644 index 00000000..fd1b640a --- /dev/null +++ b/lib/git/odb/db.py @@ -0,0 +1,129 @@ +"""Contains implementations of database retrieveing objects""" +import os +from git.errors import InvalidDBRoot + + +class iObjectDBR(object): + """Defines an interface for object database lookup. + Objects are identified either by hex-sha (40 bytes) or + by sha (20 bytes)""" + __slots__ = tuple() + + #{ Query Interface + def has_obj_hex(self, hexsha): + """:return: True if the object identified by the given 40 byte hexsha is + contained in the database""" + raise NotImplementedError("To be implemented in subclass") + + def has_obj_bin(self, sha): + """:return: as ``has_obj_hex``, but takes a 20 byte binary sha""" + raise NotImplementedError("To be implemented in subclass") + + def obj_hex(self, hexsha): + """:return: tuple(type_string, size_in_bytes, stream) a tuple with object + information including its type, its size as well as a stream from which its + contents can be read""" + raise NotImplementedError("To be implemented in subclass") + + def obj_bin(self, sha): + """:return: as in ``obj_hex``, but takes a binary sha""" + raise NotImplementedError("To be implemented in subclass") + + def obj_info_hex(self, hexsha): + """:return: tuple(type_string, size_in_bytes) tuple with the object's type + string as well as its size in bytes""" + raise NotImplementedError("To be implemented in subclass") + + #} END query interface + +class iObjectDBW(object): + """Defines an interface to create objects in the database""" + __slots__ = tuple() + + #{ Edit Interface + + def to_obj(self, type, size, stream, dry_run=False, sha_as_hex=True): + """Create a new object in the database + :return: the sha identifying the object in the database + :param type: type string identifying the object + :param size: size of the data to read from stream + :param stream: stream providing the data + :param dry_run: if True, the object database will not actually be changed + :param sha_as_hex: if True, the returned sha identifying the object will be + hex encoded, not binary""" + raise NotImplementedError("To be implemented in subclass") + + def to_objs(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0): + """Create multiple new objects in the database + :return: sequence of shas identifying the created objects in the order in which + they where given. + :param iter_info: iterable yielding tuples containing the type_string + size_in_bytes and the steam with the content data. + :param dry_run: see ``to_obj`` + :param sha_as_hex: see ``to_obj`` + :param max_threads: if < 1, any number of threads may be started while processing + the request, otherwise the given number of threads will be started.""" + # a trivial implementation, ignoring the threads for now + # TODO: add configuration to the class to determine whether we may + # actually use multiple threads, default False of course. If the add + shas = list() + for args in iter_info: + shas.append(self.to_obj(*args, dry_run=dry_run, sha_as_hex=sha_as_hex)) + return shas + + #} END edit interface + + +class FileDBBase(object): + """Provides basic facilities to retrieve files of interest, including + caching facilities to help mapping hexsha's to objects""" + __slots__ = ('_root_path', ) + + def __init__(self, root_path): + """Initialize this instance to look for its files at the given root path + All subsequent operations will be relative to this path + :raise InvalidDBRoot: + :note: The base will perform basic checking for accessability, but the subclass + is required to verify that the root_path contains the database structure it needs""" + if not os.path.isdir(root_path): + raise InvalidDBRoot(root_path) + self._root_path = root_path + + + #{ Interface + def root_path(self): + """:return: path at which this db operates""" + return self._root_path + + #} END interface + + #{ Utiltities + def _root_rela_path(self, rela_path): + """:return: the given relative path relative to our database root""" + return os.path.join(self._root_path, rela_path) + + #} END utilities + + +class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW): + """A database which operates on loose object files""" + + +class PackedDB(FileDBBase, iObjectDBR): + """A database operating on a set of object packs""" + + +class CompoundDB(iObjectDBR): + """A database which delegates calls to sub-databases""" + + +class ReferenceDB(CompoundDB): + """A database consisting of database referred to in a file""" + + +class GitObjectDB(CompoundDB, iObjectDBW): + """A database representing the default git object store, which includes loose + objects, pack files and an alternates file + + It will create objects only in the loose object database.""" + diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py new file mode 100644 index 00000000..0ad2ce33 --- /dev/null +++ b/test/git/performance/test_odb.py @@ -0,0 +1,61 @@ +"""Performance tests for object store""" + +from time import time +import sys +import stat + +from lib import ( + TestBigRepoReadOnly + ) + + +class TestObjDBPerformance(TestBigRepoReadOnly): + + def test_random_access(self): + + # GET COMMITS + # TODO: use the actual db for this + st = time() + root_commit = self.gitrepo.commit(self.head_sha_2k) + commits = list(root_commit.traverse()) + nc = len(commits) + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed) + + + # GET TREES + # walk all trees of all commits + st = time() + blobs_per_commit = list() + nt = 0 + for commit in commits: + tree = commit.tree + blobs = list() + for item in tree.traverse(): + nt += 1 + if item.type == 'blob': + blobs.append(item) + # direct access for speed + # END while trees are there for walking + blobs_per_commit.append(blobs) + # END for each commit + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed) + + # GET BLOBS + st = time() + nb = 0 + too_many = 15000 + for blob_list in blobs_per_commit: + for blob in blob_list: + blob.data + # END for each blobsha + nb += len(blob_list) + if nb > too_many: + break + # END for each bloblist + elapsed = time() - st + + print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed) diff --git a/test/git/test_odb.py b/test/git/test_odb.py new file mode 100644 index 00000000..6f92a5c1 --- /dev/null +++ b/test/git/test_odb.py @@ -0,0 +1,12 @@ +"""Test for object db""" + +from test.testlib import * +from git.odb.db import * + + +class TestDB(TestBase): + """Test the different db class implementations""" + + def test_loose_db(self): + self.fail("todo") + |