summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/git/errors.py15
-rw-r--r--lib/git/odb/__init__.py2
-rw-r--r--lib/git/odb/db.py129
-rw-r--r--test/git/performance/test_odb.py61
-rw-r--r--test/git/test_odb.py12
5 files changed, 210 insertions, 9 deletions
diff --git a/lib/git/errors.py b/lib/git/errors.py
index f66fb528..ecb1c35b 100644
--- a/lib/git/errors.py
+++ b/lib/git/errors.py
@@ -8,19 +8,16 @@ Module containing all exceptions thrown througout the git package,
"""
class InvalidGitRepositoryError(Exception):
- """
- Thrown if the given repository appears to have an invalid format.
- """
+ """ Thrown if the given repository appears to have an invalid format. """
+
+class InvalidDBRoot(Exception):
+ """Thrown if an object database cannot be initialized at the given path"""
class NoSuchPathError(OSError):
- """
- Thrown if a path could not be access by the system.
- """
+ """ Thrown if a path could not be access by the system. """
class GitCommandError(Exception):
- """
- Thrown if execution of the git command fails with non-zero status code.
- """
+ """ Thrown if execution of the git command fails with non-zero status code. """
def __init__(self, command, status, stderr=None):
self.stderr = stderr
self.status = status
diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py
new file mode 100644
index 00000000..17000244
--- /dev/null
+++ b/lib/git/odb/__init__.py
@@ -0,0 +1,2 @@
+"""Initialize the object database module"""
+
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
new file mode 100644
index 00000000..fd1b640a
--- /dev/null
+++ b/lib/git/odb/db.py
@@ -0,0 +1,129 @@
+"""Contains implementations of database retrieveing objects"""
+import os
+from git.errors import InvalidDBRoot
+
+
+class iObjectDBR(object):
+ """Defines an interface for object database lookup.
+ Objects are identified either by hex-sha (40 bytes) or
+ by sha (20 bytes)"""
+ __slots__ = tuple()
+
+ #{ Query Interface
+ def has_obj_hex(self, hexsha):
+ """:return: True if the object identified by the given 40 byte hexsha is
+ contained in the database"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def has_obj_bin(self, sha):
+ """:return: as ``has_obj_hex``, but takes a 20 byte binary sha"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def obj_hex(self, hexsha):
+ """:return: tuple(type_string, size_in_bytes, stream) a tuple with object
+ information including its type, its size as well as a stream from which its
+ contents can be read"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def obj_bin(self, sha):
+ """:return: as in ``obj_hex``, but takes a binary sha"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def obj_info_hex(self, hexsha):
+ """:return: tuple(type_string, size_in_bytes) tuple with the object's type
+ string as well as its size in bytes"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ #} END query interface
+
+class iObjectDBW(object):
+ """Defines an interface to create objects in the database"""
+ __slots__ = tuple()
+
+ #{ Edit Interface
+
+ def to_obj(self, type, size, stream, dry_run=False, sha_as_hex=True):
+ """Create a new object in the database
+ :return: the sha identifying the object in the database
+ :param type: type string identifying the object
+ :param size: size of the data to read from stream
+ :param stream: stream providing the data
+ :param dry_run: if True, the object database will not actually be changed
+ :param sha_as_hex: if True, the returned sha identifying the object will be
+ hex encoded, not binary"""
+ raise NotImplementedError("To be implemented in subclass")
+
+ def to_objs(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
+ """Create multiple new objects in the database
+ :return: sequence of shas identifying the created objects in the order in which
+ they where given.
+ :param iter_info: iterable yielding tuples containing the type_string
+ size_in_bytes and the steam with the content data.
+ :param dry_run: see ``to_obj``
+ :param sha_as_hex: see ``to_obj``
+ :param max_threads: if < 1, any number of threads may be started while processing
+ the request, otherwise the given number of threads will be started."""
+ # a trivial implementation, ignoring the threads for now
+ # TODO: add configuration to the class to determine whether we may
+ # actually use multiple threads, default False of course. If the add
+ shas = list()
+ for args in iter_info:
+ shas.append(self.to_obj(*args, dry_run=dry_run, sha_as_hex=sha_as_hex))
+ return shas
+
+ #} END edit interface
+
+
+class FileDBBase(object):
+ """Provides basic facilities to retrieve files of interest, including
+ caching facilities to help mapping hexsha's to objects"""
+ __slots__ = ('_root_path', )
+
+ def __init__(self, root_path):
+ """Initialize this instance to look for its files at the given root path
+ All subsequent operations will be relative to this path
+ :raise InvalidDBRoot:
+ :note: The base will perform basic checking for accessability, but the subclass
+ is required to verify that the root_path contains the database structure it needs"""
+ if not os.path.isdir(root_path):
+ raise InvalidDBRoot(root_path)
+ self._root_path = root_path
+
+
+ #{ Interface
+ def root_path(self):
+ """:return: path at which this db operates"""
+ return self._root_path
+
+ #} END interface
+
+ #{ Utiltities
+ def _root_rela_path(self, rela_path):
+ """:return: the given relative path relative to our database root"""
+ return os.path.join(self._root_path, rela_path)
+
+ #} END utilities
+
+
+class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
+ """A database which operates on loose object files"""
+
+
+class PackedDB(FileDBBase, iObjectDBR):
+ """A database operating on a set of object packs"""
+
+
+class CompoundDB(iObjectDBR):
+ """A database which delegates calls to sub-databases"""
+
+
+class ReferenceDB(CompoundDB):
+ """A database consisting of database referred to in a file"""
+
+
+class GitObjectDB(CompoundDB, iObjectDBW):
+ """A database representing the default git object store, which includes loose
+ objects, pack files and an alternates file
+
+ It will create objects only in the loose object database."""
+
diff --git a/test/git/performance/test_odb.py b/test/git/performance/test_odb.py
new file mode 100644
index 00000000..0ad2ce33
--- /dev/null
+++ b/test/git/performance/test_odb.py
@@ -0,0 +1,61 @@
+"""Performance tests for object store"""
+
+from time import time
+import sys
+import stat
+
+from lib import (
+ TestBigRepoReadOnly
+ )
+
+
+class TestObjDBPerformance(TestBigRepoReadOnly):
+
+ def test_random_access(self):
+
+ # GET COMMITS
+ # TODO: use the actual db for this
+ st = time()
+ root_commit = self.gitrepo.commit(self.head_sha_2k)
+ commits = list(root_commit.traverse())
+ nc = len(commits)
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i commits from ObjectStore in %g s ( %f commits / s )" % (nc, elapsed, nc / elapsed)
+
+
+ # GET TREES
+ # walk all trees of all commits
+ st = time()
+ blobs_per_commit = list()
+ nt = 0
+ for commit in commits:
+ tree = commit.tree
+ blobs = list()
+ for item in tree.traverse():
+ nt += 1
+ if item.type == 'blob':
+ blobs.append(item)
+ # direct access for speed
+ # END while trees are there for walking
+ blobs_per_commit.append(blobs)
+ # END for each commit
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i objects from %i commits in %g s ( %f objects / s )" % (nt, len(commits), elapsed, nt / elapsed)
+
+ # GET BLOBS
+ st = time()
+ nb = 0
+ too_many = 15000
+ for blob_list in blobs_per_commit:
+ for blob in blob_list:
+ blob.data
+ # END for each blobsha
+ nb += len(blob_list)
+ if nb > too_many:
+ break
+ # END for each bloblist
+ elapsed = time() - st
+
+ print >> sys.stderr, "Retrieved %i blob and their data in %g s ( %f blobs / s )" % (nb, elapsed, nb / elapsed)
diff --git a/test/git/test_odb.py b/test/git/test_odb.py
new file mode 100644
index 00000000..6f92a5c1
--- /dev/null
+++ b/test/git/test_odb.py
@@ -0,0 +1,12 @@
+"""Test for object db"""
+
+from test.testlib import *
+from git.odb.db import *
+
+
+class TestDB(TestBase):
+ """Test the different db class implementations"""
+
+ def test_loose_db(self):
+ self.fail("todo")
+