summaryrefslogtreecommitdiff
path: root/git/db/py/pack.py
diff options
context:
space:
mode:
Diffstat (limited to 'git/db/py/pack.py')
-rw-r--r--git/db/py/pack.py378
1 files changed, 189 insertions, 189 deletions
diff --git a/git/db/py/pack.py b/git/db/py/pack.py
index 75b75468..0d4c533a 100644
--- a/git/db/py/pack.py
+++ b/git/db/py/pack.py
@@ -5,17 +5,17 @@
"""Module containing a database to deal with packs"""
from git.db import CachingDB
from base import (
- PureRootPathDB,
- PureObjectDBR
- )
+ PureRootPathDB,
+ PureObjectDBR
+ )
from git.util import LazyMixin
from git.exc import (
- BadObject,
- UnsupportedOperation,
- AmbiguousObjectName
- )
+ BadObject,
+ UnsupportedOperation,
+ AmbiguousObjectName
+ )
from git.pack import PackEntity
@@ -28,185 +28,185 @@ __all__ = ('PurePackedODB', )
class PurePackedODB(PureRootPathDB, PureObjectDBR, CachingDB, LazyMixin):
- """A database operating on a set of object packs"""
-
- # the type to use when instantiating a pack entity
- PackEntityCls = PackEntity
-
- # sort the priority list every N queries
- # Higher values are better, performance tests don't show this has
- # any effect, but it should have one
- _sort_interval = 500
-
- def __init__(self, root_path):
- super(PurePackedODB, self).__init__(root_path)
- # list of lists with three items:
- # * hits - number of times the pack was hit with a request
- # * entity - Pack entity instance
- # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
- # self._entities = list() # lazy loaded list
- self._hit_count = 0 # amount of hits
- self._st_mtime = 0 # last modification data of our root path
-
- def _set_cache_(self, attr):
- if attr == '_entities':
- self._entities = list()
- self.update_cache(force=True)
- # END handle entities initialization
-
- def _sort_entities(self):
- self._entities.sort(key=lambda l: l[0], reverse=True)
-
- def _pack_info(self, sha):
- """:return: tuple(entity, index) for an item at the given sha
- :param sha: 20 or 40 byte sha
- :raise BadObject:
- :note: This method is not thread-safe, but may be hit in multi-threaded
- operation. The worst thing that can happen though is a counter that
- was not incremented, or the list being in wrong order. So we safe
- the time for locking here, lets see how that goes"""
- # presort ?
- if self._hit_count % self._sort_interval == 0:
- self._sort_entities()
- # END update sorting
-
- for item in self._entities:
- index = item[2](sha)
- if index is not None:
- item[0] += 1 # one hit for you
- self._hit_count += 1 # general hit count
- return (item[1], index)
- # END index found in pack
- # END for each item
-
- # no hit, see whether we have to update packs
- # NOTE: considering packs don't change very often, we safe this call
- # and leave it to the super-caller to trigger that
- raise BadObject(sha)
-
- #{ Object DB Read
-
- def has_object(self, sha):
- try:
- self._pack_info(sha)
- return True
- except BadObject:
- return False
- # END exception handling
-
- def info(self, sha):
- entity, index = self._pack_info(sha)
- return entity.info_at_index(index)
-
- def stream(self, sha):
- entity, index = self._pack_info(sha)
- return entity.stream_at_index(index)
-
- def sha_iter(self):
- sha_list = list()
- for entity in self.entities():
- index = entity.index()
- sha_by_index = index.sha
- for index in xrange(index.size()):
- yield sha_by_index(index)
- # END for each index
- # END for each entity
-
- def size(self):
- sizes = [item[1].index().size() for item in self._entities]
- return reduce(lambda x,y: x+y, sizes, 0)
-
- #} END object db read
-
- #{ object db write
-
- def store(self, istream):
- """Storing individual objects is not feasible as a pack is designed to
- hold multiple objects. Writing or rewriting packs for single objects is
- inefficient"""
- raise UnsupportedOperation()
-
- def store_async(self, reader):
- # TODO: add PureObjectDBRW before implementing this
- raise NotImplementedError()
-
- #} END object db write
-
-
- #{ Interface
-
- def update_cache(self, force=False):
- """
- Update our cache with the acutally existing packs on disk. Add new ones,
- and remove deleted ones. We keep the unchanged ones
-
- :param force: If True, the cache will be updated even though the directory
- does not appear to have changed according to its modification timestamp.
- :return: True if the packs have been updated so there is new information,
- False if there was no change to the pack database"""
- stat = os.stat(self.root_path())
- if not force and stat.st_mtime <= self._st_mtime:
- return False
- # END abort early on no change
- self._st_mtime = stat.st_mtime
-
- # packs are supposed to be prefixed with pack- by git-convention
- # get all pack files, figure out what changed
- pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
- our_pack_files = set(item[1].pack().path() for item in self._entities)
-
- # new packs
- for pack_file in (pack_files - our_pack_files):
- # init the hit-counter/priority with the size, a good measure for hit-
- # probability. Its implemented so that only 12 bytes will be read
- entity = self.PackEntityCls(pack_file)
- self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
- # END for each new packfile
-
- # removed packs
- for pack_file in (our_pack_files - pack_files):
- del_index = -1
- for i, item in enumerate(self._entities):
- if item[1].pack().path() == pack_file:
- del_index = i
- break
- # END found index
- # END for each entity
- assert del_index != -1
- del(self._entities[del_index])
- # END for each removed pack
-
- # reinitialize prioritiess
- self._sort_entities()
- return True
-
- def entities(self):
- """:return: list of pack entities operated upon by this database"""
- return [ item[1] for item in self._entities ]
-
- def partial_to_complete_sha(self, partial_binsha, canonical_length):
- """:return: 20 byte sha as inferred by the given partial binary sha
- :param partial_binsha: binary sha with less than 20 bytes
- :param canonical_length: length of the corresponding canonical representation.
- It is required as binary sha's cannot display whether the original hex sha
- had an odd or even number of characters
- :raise AmbiguousObjectName:
- :raise BadObject: """
- candidate = None
- for item in self._entities:
- item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
- if item_index is not None:
- sha = item[1].index().sha(item_index)
- if candidate and candidate != sha:
- raise AmbiguousObjectName(partial_binsha)
- candidate = sha
- # END handle full sha could be found
- # END for each entity
-
- if candidate:
- return candidate
-
- # still not found ?
- raise BadObject(partial_binsha)
-
- #} END interface
+ """A database operating on a set of object packs"""
+
+ # the type to use when instantiating a pack entity
+ PackEntityCls = PackEntity
+
+ # sort the priority list every N queries
+ # Higher values are better, performance tests don't show this has
+ # any effect, but it should have one
+ _sort_interval = 500
+
+ def __init__(self, root_path):
+ super(PurePackedODB, self).__init__(root_path)
+ # list of lists with three items:
+ # * hits - number of times the pack was hit with a request
+ # * entity - Pack entity instance
+ # * sha_to_index - PackIndexFile.sha_to_index method for direct cache query
+ # self._entities = list() # lazy loaded list
+ self._hit_count = 0 # amount of hits
+ self._st_mtime = 0 # last modification data of our root path
+
+ def _set_cache_(self, attr):
+ if attr == '_entities':
+ self._entities = list()
+ self.update_cache(force=True)
+ # END handle entities initialization
+
+ def _sort_entities(self):
+ self._entities.sort(key=lambda l: l[0], reverse=True)
+
+ def _pack_info(self, sha):
+ """:return: tuple(entity, index) for an item at the given sha
+ :param sha: 20 or 40 byte sha
+ :raise BadObject:
+ :note: This method is not thread-safe, but may be hit in multi-threaded
+ operation. The worst thing that can happen though is a counter that
+ was not incremented, or the list being in wrong order. So we safe
+ the time for locking here, lets see how that goes"""
+ # presort ?
+ if self._hit_count % self._sort_interval == 0:
+ self._sort_entities()
+ # END update sorting
+
+ for item in self._entities:
+ index = item[2](sha)
+ if index is not None:
+ item[0] += 1 # one hit for you
+ self._hit_count += 1 # general hit count
+ return (item[1], index)
+ # END index found in pack
+ # END for each item
+
+ # no hit, see whether we have to update packs
+ # NOTE: considering packs don't change very often, we safe this call
+ # and leave it to the super-caller to trigger that
+ raise BadObject(sha)
+
+ #{ Object DB Read
+
+ def has_object(self, sha):
+ try:
+ self._pack_info(sha)
+ return True
+ except BadObject:
+ return False
+ # END exception handling
+
+ def info(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.info_at_index(index)
+
+ def stream(self, sha):
+ entity, index = self._pack_info(sha)
+ return entity.stream_at_index(index)
+
+ def sha_iter(self):
+ sha_list = list()
+ for entity in self.entities():
+ index = entity.index()
+ sha_by_index = index.sha
+ for index in xrange(index.size()):
+ yield sha_by_index(index)
+ # END for each index
+ # END for each entity
+
+ def size(self):
+ sizes = [item[1].index().size() for item in self._entities]
+ return reduce(lambda x,y: x+y, sizes, 0)
+
+ #} END object db read
+
+ #{ object db write
+
+ def store(self, istream):
+ """Storing individual objects is not feasible as a pack is designed to
+ hold multiple objects. Writing or rewriting packs for single objects is
+ inefficient"""
+ raise UnsupportedOperation()
+
+ def store_async(self, reader):
+ # TODO: add PureObjectDBRW before implementing this
+ raise NotImplementedError()
+
+ #} END object db write
+
+
+ #{ Interface
+
+ def update_cache(self, force=False):
+ """
+ Update our cache with the acutally existing packs on disk. Add new ones,
+ and remove deleted ones. We keep the unchanged ones
+
+ :param force: If True, the cache will be updated even though the directory
+ does not appear to have changed according to its modification timestamp.
+ :return: True if the packs have been updated so there is new information,
+ False if there was no change to the pack database"""
+ stat = os.stat(self.root_path())
+ if not force and stat.st_mtime <= self._st_mtime:
+ return False
+ # END abort early on no change
+ self._st_mtime = stat.st_mtime
+
+ # packs are supposed to be prefixed with pack- by git-convention
+ # get all pack files, figure out what changed
+ pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack")))
+ our_pack_files = set(item[1].pack().path() for item in self._entities)
+
+ # new packs
+ for pack_file in (pack_files - our_pack_files):
+ # init the hit-counter/priority with the size, a good measure for hit-
+ # probability. Its implemented so that only 12 bytes will be read
+ entity = self.PackEntityCls(pack_file)
+ self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index])
+ # END for each new packfile
+
+ # removed packs
+ for pack_file in (our_pack_files - pack_files):
+ del_index = -1
+ for i, item in enumerate(self._entities):
+ if item[1].pack().path() == pack_file:
+ del_index = i
+ break
+ # END found index
+ # END for each entity
+ assert del_index != -1
+ del(self._entities[del_index])
+ # END for each removed pack
+
+ # reinitialize prioritiess
+ self._sort_entities()
+ return True
+
+ def entities(self):
+ """:return: list of pack entities operated upon by this database"""
+ return [ item[1] for item in self._entities ]
+
+ def partial_to_complete_sha(self, partial_binsha, canonical_length):
+ """:return: 20 byte sha as inferred by the given partial binary sha
+ :param partial_binsha: binary sha with less than 20 bytes
+ :param canonical_length: length of the corresponding canonical representation.
+ It is required as binary sha's cannot display whether the original hex sha
+ had an odd or even number of characters
+ :raise AmbiguousObjectName:
+ :raise BadObject: """
+ candidate = None
+ for item in self._entities:
+ item_index = item[1].index().partial_sha_to_index(partial_binsha, canonical_length)
+ if item_index is not None:
+ sha = item[1].index().sha(item_index)
+ if candidate and candidate != sha:
+ raise AmbiguousObjectName(partial_binsha)
+ candidate = sha
+ # END handle full sha could be found
+ # END for each entity
+
+ if candidate:
+ return candidate
+
+ # still not found ?
+ raise BadObject(partial_binsha)
+
+ #} END interface