summaryrefslogtreecommitdiff
path: root/lib/git/odb/db.py
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2010-06-03 16:35:35 +0200
committerSebastian Thiel <byronimo@gmail.com>2010-06-03 16:35:35 +0200
commit38d59fc8ccccae8882fa48671377bf40a27915a7 (patch)
tree84a3671b709f9fcff39819805a4902546e4d6d66 /lib/git/odb/db.py
parent6f8ce8901e21587cd2320562df412e05b5ab1731 (diff)
downloadgitpython-38d59fc8ccccae8882fa48671377bf40a27915a7.tar.gz
odb: implemented loose object streaming, which is impossible to do efficiently considering that it copies string buffers all the time
Diffstat (limited to 'lib/git/odb/db.py')
-rw-r--r--lib/git/odb/db.py114
1 files changed, 63 insertions, 51 deletions
diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py
index 1248a3f4..5c50a512 100644
--- a/lib/git/odb/db.py
+++ b/lib/git/odb/db.py
@@ -1,17 +1,18 @@
"""Contains implementations of database retrieveing objects"""
-import os
+from git.utils import IndexFileSHA1Writer
from git.errors import (
InvalidDBRoot,
- BadObject
+ BadObject,
+ BadObjectType
)
-from git.utils import IndexFileSHA1Writer
from utils import (
- getsize,
+ DecompressMemMapReader,
+ FDCompressedSha1Writer,
+ ENOENT,
to_hex_sha,
exists,
hex_to_bin,
- FDCompressedSha1Writer,
isdir,
mkdir,
rename,
@@ -19,8 +20,15 @@ from utils import (
join
)
+from fun import (
+ chunk_size,
+ loose_object_header_info,
+ write_object
+ )
+
import tempfile
import mmap
+import os
class iObjectDBR(object):
@@ -36,7 +44,8 @@ class iObjectDBR(object):
def has_object(self, sha):
"""
:return: True if the object identified by the given 40 byte hexsha or 20 bytes
- binary sha is contained in the database"""
+ binary sha is contained in the database
+ :raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
def object(self, sha):
@@ -44,14 +53,16 @@ class iObjectDBR(object):
:return: tuple(type_string, size_in_bytes, stream) a tuple with object
information including its type, its size as well as a stream from which its
contents can be read
- :param sha: 40 bytes hexsha or 20 bytes binary sha """
+ :param sha: 40 bytes hexsha or 20 bytes binary sha
+ :raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
def object_info(self, sha):
"""
:return: tuple(type_string, size_in_bytes) tuple with the object's type
string as well as its size in bytes
- :param sha: 40 bytes hexsha or 20 bytes binary sha"""
+ :param sha: 40 bytes hexsha or 20 bytes binary sha
+ :raise BadObject:"""
raise NotImplementedError("To be implemented in subclass")
#} END query interface
@@ -70,7 +81,8 @@ class iObjectDBW(object):
:param stream: stream providing the data
:param dry_run: if True, the object database will not actually be changed
:param sha_as_hex: if True, the returned sha identifying the object will be
- hex encoded, not binary"""
+ hex encoded, not binary
+ :raise IOError: if data could not be written"""
raise NotImplementedError("To be implemented in subclass")
def to_objects(self, iter_info, dry_run=False, sha_as_hex=True, max_threads=0):
@@ -82,7 +94,8 @@ class iObjectDBW(object):
:param dry_run: see ``to_obj``
:param sha_as_hex: see ``to_obj``
:param max_threads: if < 1, any number of threads may be started while processing
- the request, otherwise the given number of threads will be started."""
+ the request, otherwise the given number of threads will be started.
+ :raise IOError: if data could not be written"""
# a trivial implementation, ignoring the threads for now
# TODO: add configuration to the class to determine whether we may
# actually use multiple threads, default False of course. If the add
@@ -130,15 +143,19 @@ class FileDBBase(object):
class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
"""A database which operates on loose object files"""
- __slots__ = ('_hexsha_to_file', )
-
+ __slots__ = ('_hexsha_to_file', '_fd_open_flags')
# CONFIGURATION
# chunks in which data will be copied between streams
- stream_chunk_size = 1000*1000
+ stream_chunk_size = chunk_size
+
def __init__(self, root_path):
super(LooseObjectDB, self).__init__(root_path)
self._hexsha_to_file = dict()
+ # Additional Flags - might be set to 0 after the first failure
+ # Depending on the root, this might work for some mounts, for others not, which
+ # is why it is per instance
+ self._fd_open_flags = os.O_NOATIME
#{ Interface
def object_path(self, hexsha):
@@ -167,36 +184,46 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
#} END interface
- def _object_header_info(self, mmap):
- """:return: tuple(type_string, uncompressed_size_in_bytes
- :param mmap: newly mapped memory map at position 0. It will be
- seeked to the actual start of the object contents, which can be used
- to initialize a zlib decompress object."""
- raise NotImplementedError("todo")
-
- def _map_object(self, sha):
+ def _map_loose_object(self, sha):
"""
- :return: tuple(file, mmap) tuple with an opened file for reading, and
- a memory map of that file"""
- db_path = self.readable_db_object_path(to_hex_sha(sha))
- f = open(db_path, 'rb')
- m = mmap.mmap(f.fileno(), getsize(db_path), access=mmap.ACCESS_READ)
- return (f, m)
+ :return: memory map of that file to allow random read access
+ :raise BadObject: if object could not be located"""
+ db_path = self.db_path(self.object_path(to_hex_sha(sha)))
+ try:
+ fd = os.open(db_path, os.O_RDONLY|self._fd_open_flags)
+ except OSError,e:
+ if e.errno != ENOENT:
+ # try again without noatime
+ try:
+ fd = os.open(db_path, os.O_RDONLY)
+ except OSError:
+ raise BadObject(to_hex_sha(sha))
+ # didn't work because of our flag, don't try it again
+ self._fd_open_flags = 0
+ else:
+ raise BadObject(to_hex_sha(sha))
+ # END handle error
+ # END exception handling
+ try:
+ return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ finally:
+ os.close(fd)
+ # END assure file is closed
def object_info(self, sha):
- f, m = self._map_object(sha)
+ m = self._map_loose_object(sha)
try:
- type, size = self._object_header_info(m)
+ return loose_object_header_info(m)
finally:
- f.close()
m.close()
# END assure release of system resources
def object(self, sha):
- f, m = self._map_object(sha)
- type, size = self._object_header_info(m)
- # TODO: init a dynamic decompress stream from our memory map
+ m = self._map_loose_object(sha)
+ reader = DecompressMemMapReader(m, close_on_deletion = True)
+ type, size = reader.initialize()
+ return type, size, reader
def has_object(self, sha):
try:
@@ -210,25 +237,10 @@ class LooseObjectDB(FileDBBase, iObjectDBR, iObjectDBW):
# open a tmp file to write the data to
fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path)
writer = FDCompressedSha1Writer(fd)
-
- # WRITE HEADER: type SP size NULL
- writer.write("%s %i%s" % (type, size, chr(0)))
-
- # WRITE ALL DATA
- chunksize = self.stream_chunk_size
+
try:
- try:
- while True:
- data_len = writer.write(stream.read(chunksize))
- if data_len < chunksize:
- # WRITE FOOTER
- writer.write('\n')
- break
- # END check for stream end
- # END duplicate data
- finally:
- writer.close()
- # END assure file was closed
+ write_object(type, size, stream, writer,
+ close_target_stream=True, chunk_size=self.stream_chunk_size)
except:
os.remove(tmp_path)
raise