diff options
Diffstat (limited to 'lib/git')
-rw-r--r-- | lib/git/__init__.py | 3 | ||||
-rw-r--r-- | lib/git/objects/base.py | 17 | ||||
-rw-r--r-- | lib/git/objects/commit.py | 4 | ||||
-rw-r--r-- | lib/git/odb/__init__.py | 4 | ||||
-rw-r--r-- | lib/git/odb/db.py | 116 | ||||
-rw-r--r-- | lib/git/odb/fun.py | 46 | ||||
-rw-r--r-- | lib/git/odb/stream.py | 168 | ||||
-rw-r--r-- | lib/git/odb/utils.py | 2 | ||||
-rw-r--r-- | lib/git/repo.py | 2 |
9 files changed, 211 insertions, 151 deletions
diff --git a/lib/git/__init__.py b/lib/git/__init__.py index aac539eb..2f17c55b 100644 --- a/lib/git/__init__.py +++ b/lib/git/__init__.py @@ -22,5 +22,8 @@ from git.remote import * from git.index import * from git.utils import LockFile, BlockingLockFile +# odb is NOT imported intentionally - if you really want it, you should get it +# yourself as its part of the core + __all__ = [ name for name, obj in locals().items() if not (name.startswith('_') or inspect.ismodule(obj)) ] diff --git a/lib/git/objects/base.py b/lib/git/objects/base.py index 76384888..5a3a15a7 100644 --- a/lib/git/objects/base.py +++ b/lib/git/objects/base.py @@ -76,12 +76,14 @@ class Object(LazyMixin): Retrieve object information """ if attr == "size": - typename, self.size = self.repo.odb.info(self.sha) - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + oinfo = self.repo.odb.info(self.sha) + self.size = oinfo.size + assert oinfo.type == self.type, _assertion_msg_format % (self.sha, oinfo.type, self.type) elif attr == "data": - typename, self.size, stream = self.repo.odb.stream(self.sha) - self.data = stream.read() # once we have an own odb, we can delay reading - assert typename == self.type, _assertion_msg_format % (self.sha, typename, self.type) + ostream = self.repo.odb.stream(self.sha) + self.size = ostream.size + self.data = ostream.read() + assert ostream.type == self.type, _assertion_msg_format % (self.sha, ostream.type, self.type) else: super(Object,self)._set_cache_(attr) @@ -124,14 +126,13 @@ class Object(LazyMixin): def data_stream(self): """ :return: File Object compatible stream to the uncompressed raw data of the object :note: returned streams must be read in order""" - type, size, stream = self.repo.odb.stream(self.sha) - return stream + return self.repo.odb.stream(self.sha) def stream_data(self, ostream): """Writes our data directly to the given output stream :param ostream: File object compatible stream object. :return: self""" - type, size, istream = self.repo.odb.stream(self.sha) + istream = self.repo.odb.stream(self.sha) stream_copy(istream, ostream) return self diff --git a/lib/git/objects/commit.py b/lib/git/objects/commit.py index dbc0cf27..9a3c2c95 100644 --- a/lib/git/objects/commit.py +++ b/lib/git/objects/commit.py @@ -9,6 +9,7 @@ import git.diff as diff import git.stats as stats from git.actor import Actor from tree import Tree +from git.odb import IStream from cStringIO import StringIO import base import utils @@ -346,7 +347,8 @@ class Commit(base.Object, Iterable, diff.Diffable, utils.Traversable, utils.Seri streamlen = stream.tell() stream.seek(0) - new_commit.sha = repo.odb.store(cls.type, streamlen, stream, sha_as_hex=True) + istream = repo.odb.store(IStream(cls.type, streamlen, stream)) + new_commit.sha = istream.sha if head: try: diff --git a/lib/git/odb/__init__.py b/lib/git/odb/__init__.py index 17000244..5789d7eb 100644 --- a/lib/git/odb/__init__.py +++ b/lib/git/odb/__init__.py @@ -1,2 +1,6 @@ """Initialize the object database module""" +# default imports +from db import * +from stream import * + diff --git a/lib/git/odb/db.py b/lib/git/odb/db.py index 7ae8f446..a8de28ec 100644 --- a/lib/git/odb/db.py +++ b/lib/git/odb/db.py @@ -8,7 +8,10 @@ from git.errors import ( from stream import ( DecompressMemMapReader, - FDCompressedSha1Writer + FDCompressedSha1Writer, + Sha1Writer, + OStream, + OInfo ) from utils import ( @@ -34,11 +37,13 @@ import mmap import os +__all__ = ('ObjectDBR', 'ObjectDBW', 'FileDBBase', 'LooseObjectDB', 'PackedDB', + 'CompoundDB', 'ReferenceDB', 'GitObjectDB' ) + class ObjectDBR(object): """Defines an interface for object database lookup. Objects are identified either by hex-sha (40 bytes) or by sha (20 bytes)""" - __slots__ = tuple() def __contains__(self, sha): return self.has_obj @@ -52,7 +57,7 @@ class ObjectDBR(object): raise NotImplementedError("To be implemented in subclass") def info(self, sha): - """ :return: ODB_Info instance + """ :return: OInfo instance :param sha: 40 bytes hexsha or 20 bytes binary sha :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") @@ -60,27 +65,26 @@ class ObjectDBR(object): def info_async(self, input_channel): """Retrieve information of a multitude of objects asynchronously :param input_channel: Channel yielding the sha's of the objects of interest - :return: Channel yielding ODB_Info|InvalidODB_Info, in any order""" + :return: Channel yielding OInfo|InvalidOInfo, in any order""" raise NotImplementedError("To be implemented in subclass") def stream(self, sha): - """:return: ODB_OStream instance + """:return: OStream instance :param sha: 40 bytes hexsha or 20 bytes binary sha :raise BadObject:""" raise NotImplementedError("To be implemented in subclass") def stream_async(self, input_channel): - """Retrieve the ODB_OStream of multiple objects + """Retrieve the OStream of multiple objects :param input_channel: see ``info`` :param max_threads: see ``ObjectDBW.store`` - :return: Channel yielding ODB_OStream|InvalidODB_OStream instances in any order""" + :return: Channel yielding OStream|InvalidOStream instances in any order""" raise NotImplementedError("To be implemented in subclass") #} END query interface class ObjectDBW(object): """Defines an interface to create objects in the database""" - __slots__ = "_ostream" def __init__(self, *args, **kwargs): self._ostream = None @@ -99,12 +103,12 @@ class ObjectDBW(object): def ostream(self): """:return: overridden output stream this instance will write to, or None if it will write to the default stream""" - return self._ostream + return self._ostream def store(self, istream): """Create a new object in the database :return: the input istream object with its sha set to its corresponding value - :param istream: ODB_IStream compatible instance. If its sha is already set + :param istream: IStream compatible instance. If its sha is already set to a value, the object will just be stored in the our database format, in which case the input stream is expected to be in object format ( header + contents ). :raise IOError: if data could not be written""" @@ -115,22 +119,16 @@ class ObjectDBW(object): return right away, returning an output channel which receives the results as they are computed. - :return: Channel yielding your ODB_IStream which served as input, in any order. + :return: Channel yielding your IStream which served as input, in any order. The IStreams sha will be set to the sha it received during the process, or its error attribute will be set to the exception informing about the error. - :param input_channel: Channel yielding ODB_IStream instance. + :param input_channel: Channel yielding IStream instance. As the same instances will be used in the output channel, you can create a map between the id(istream) -> istream :note:As some ODB implementations implement this operation as atomic, they might abort the whole operation if one item could not be processed. Hence check how many items have actually been produced.""" - # a trivial implementation, ignoring the threads for now - # TODO: add configuration to the class to determine whether we may - # actually use multiple threads, default False of course. If the add - shas = list() - for args in iter_info: - shas.append(self.store(dry_run=dry_run, sha_as_hex=sha_as_hex, *args)) - return shas + raise NotImplementedError("To be implemented in subclass") #} END edit interface @@ -138,7 +136,6 @@ class ObjectDBW(object): class FileDBBase(object): """Provides basic facilities to retrieve files of interest, including caching facilities to help mapping hexsha's to objects""" - __slots__ = ('_root_path', ) def __init__(self, root_path): """Initialize this instance to look for its files at the given root path @@ -164,15 +161,11 @@ class FileDBBase(object): return join(self._root_path, rela_path) #} END interface - #{ Utiltities - - - #} END utilities class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): """A database which operates on loose object files""" - __slots__ = ('_hexsha_to_file', '_fd_open_flags') + # CONFIGURATION # chunks in which data will be copied between streams stream_chunk_size = chunk_size @@ -238,21 +231,26 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): finally: os.close(fd) # END assure file is closed + + def set_ostream(self, stream): + """:raise TypeError: if the stream does not support the Sha1Writer interface""" + if stream is not None and not isinstance(stream, Sha1Writer): + raise TypeError("Output stream musst support the %s interface" % Sha1Writer.__name__) + return super(LooseObjectDB, self).set_ostream(stream) def info(self, sha): m = self._map_loose_object(sha) try: - return loose_object_header_info(m) + type, size = loose_object_header_info(m) + return OInfo(sha, type, size) finally: m.close() # END assure release of system resources - def object(self, sha): + def stream(self, sha): m = self._map_loose_object(sha) - reader = DecompressMemMapReader(m, close_on_deletion = True) - type, size = reader.initialize() - - return type, size, reader + type, size, stream = DecompressMemMapReader.new(m, close_on_deletion = True) + return OStream(sha, type, size, stream) def has_object(self, sha): try: @@ -263,27 +261,33 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): # END check existance def store(self, istream): - # open a tmp file to write the data to - # todo: implement ostream properly - fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) - writer = FDCompressedSha1Writer(fd) + """note: The sha we produce will be hex by nature""" + assert istream.sha is None, "Direct istream writing not yet implemented" + tmp_path = None + writer = self.ostream() + if writer is None: + # open a tmp file to write the data to + fd, tmp_path = tempfile.mkstemp(prefix='obj', dir=self._root_path) + writer = FDCompressedSha1Writer(fd) + # END handle custom writer try: - write_object(type, size, stream, writer, - close_target_stream=True, chunk_size=self.stream_chunk_size) - except: - os.remove(tmp_path) - raise - # END assure tmpfile removal on error - + try: + write_object(istream.type, istream.size, istream.read, writer.write, + chunk_size=self.stream_chunk_size) + except: + if tmp_path: + os.remove(tmp_path) + raise + # END assure tmpfile removal on error + finally: + if tmp_path: + writer.close() + # END assure target stream is closed - # in dry-run mode, we delete the file afterwards sha = writer.sha(as_hex=True) - if dry_run: - os.remove(tmp_path) - else: - # rename the file into place + if tmp_path: obj_path = self.db_path(self.object_path(sha)) obj_dir = dirname(obj_path) if not isdir(obj_dir): @@ -292,11 +296,8 @@ class LooseObjectDB(FileDBBase, ObjectDBR, ObjectDBW): rename(tmp_path, obj_path) # END handle dry_run - if not sha_as_hex: - sha = hex_to_bin(sha) - # END handle sha format - - return sha + istream.sha = sha + return istream class PackedDB(FileDBBase, ObjectDBR): @@ -320,18 +321,17 @@ class GitObjectDB(LooseObjectDB): :note: for now, we use the git command to do all the lookup, just until he have packs and the other implementations """ - __slots__ = ('_git', ) def __init__(self, root_path, git): """Initialize this instance with the root and a git command""" super(GitObjectDB, self).__init__(root_path) self._git = git def info(self, sha): - discard, type, size = self._git.get_object_header(sha) - return type, size + t = self._git.get_object_header(sha) + return OInfo(t[0], t[1], t[2]) - def object(self, sha): + def stream(self, sha): """For now, all lookup is done by git itself""" - discard, type, size, stream = self._git.stream_object_data(sha) - return type, size, stream + t = self._git.stream_object_data(sha) + return OStream(t[0], t[1], t[2], t[3]) diff --git a/lib/git/odb/fun.py b/lib/git/odb/fun.py index ee7144dd..870a6f02 100644 --- a/lib/git/odb/fun.py +++ b/lib/git/odb/fun.py @@ -21,6 +21,8 @@ type_id_to_type_map = { # used when dealing with larger streams chunk_size = 1000*1000 +__all__ = ('is_loose_object', 'loose_object_header_info', 'object_header_info', + 'write_object' ) #{ Routines @@ -73,42 +75,34 @@ def object_header_info(m): raise BadObjectType(type_id) # END handle exceptions -def write_object(type, size, source_stream, target_stream, close_target_stream=True, - chunk_size=chunk_size): +def write_object(type, size, read, write, chunk_size=chunk_size): """Write the object as identified by type, size and source_stream into the target_stream :param type: type string of the object :param size: amount of bytes to write from source_stream - :param source_stream: stream as file-like object providing at least size bytes - :param target_stream: stream as file-like object to receive the data + :param read: read method of a stream providing the content data + :param write: write method of the output stream :param close_target_stream: if True, the target stream will be closed when the routine exits, even if an error is thrown - :param chunk_size: size of chunks to read from source. Larger values can be beneficial - for io performance, but cost more memory as well :return: The actual amount of bytes written to stream, which includes the header and a trailing newline""" tbw = 0 # total num bytes written dbw = 0 # num data bytes written - try: - # WRITE HEADER: type SP size NULL - tbw += target_stream.write("%s %i\0" % (type, size)) - - # WRITE ALL DATA UP TO SIZE - while True: - cs = min(chunk_size, size-dbw) - data_len = target_stream.write(source_stream.read(cs)) - dbw += data_len - if data_len < cs or dbw == size: - tbw += dbw - break - # END check for stream end - # END duplicate data - return tbw - finally: - if close_target_stream: - target_stream.close() - # END handle stream closing - # END assure file was closed + # WRITE HEADER: type SP size NULL + tbw += write("%s %i\0" % (type, size)) + + # WRITE ALL DATA UP TO SIZE + while True: + cs = min(chunk_size, size-dbw) + data_len = write(read(cs)) + dbw += data_len + if data_len < cs or dbw == size: + tbw += dbw + break + # END check for stream end + # END duplicate data + return tbw + #} END routines diff --git a/lib/git/odb/stream.py b/lib/git/odb/stream.py index 325c1444..d1181382 100644 --- a/lib/git/odb/stream.py +++ b/lib/git/odb/stream.py @@ -5,10 +5,13 @@ import errno from utils import ( to_hex_sha, - to_bin_sha + to_bin_sha, + write, + close ) -__all__ = ('FDCompressedSha1Writer', 'DecompressMemMapReader') +__all__ = ('OInfo', 'OStream', 'IStream', 'InvalidOInfo', 'InvalidOStream', + 'DecompressMemMapReader', 'FDCompressedSha1Writer') # ZLIB configuration @@ -18,7 +21,7 @@ Z_BEST_SPEED = 1 #{ ODB Bases -class ODB_Info(tuple): +class OInfo(tuple): """Carries information about an object in an ODB, provdiing information about the sha of the object, the type_string as well as the uncompressed size in bytes. @@ -35,8 +38,8 @@ class ODB_Info(tuple): def __new__(cls, sha, type, size): return tuple.__new__(cls, (sha, type, size)) - def __init__(self, sha, type, size): - pass + def __init__(self, *args): + tuple.__init__(self) #{ Interface @property @@ -53,38 +56,52 @@ class ODB_Info(tuple): #} END interface -class ODB_OStream(ODB_Info): +class OStream(OInfo): """Base for object streams retrieved from the database, providing additional information about the stream. Generally, ODB streams are read-only as objects are immutable""" __slots__ = tuple() - def __new__(cls, sha, type, size, *args, **kwargs): + def __new__(cls, sha, type, size, stream, *args, **kwargs): """Helps with the initialization of subclasses""" - return tuple.__new__(cls, (sha, type, size)) + return tuple.__new__(cls, (sha, type, size, stream)) + + + def __init__(self, *args, **kwargs): + tuple.__init__(self) + #{ Interface def is_compressed(self): - """:return: True if reads of this stream yield zlib compressed data. + """:return: True if reads of this stream yield zlib compressed data. Default False :note: this does not imply anything about the actual internal storage. Hence the data could be uncompressed, but read compressed, or vice versa""" - raise NotImplementedError("To be implemented by subclass") + raise False + + #} END interface + + #{ Stream Reader Interface + + def read(self, size=-1): + return self[3].read(size) + + #} END stream reader interface -class ODB_IStream(list): +class IStream(list): """Represents an input content stream to be fed into the ODB. It is mutable to allow the ODB to record information about the operations outcome right in this instance. - It provides interfaces for the ODB_OStream and a StreamReader to allow the instance + It provides interfaces for the OStream and a StreamReader to allow the instance to blend in without prior conversion. The only method your content stream must support is 'read'""" __slots__ = tuple() def __new__(cls, type, size, stream, sha=None, compressed=False): - list.__new__(cls, (sha, type, size, stream, compressed, None)) + return list.__new__(cls, (sha, type, size, stream, compressed, None)) - def __init__(cls, type, size, stream, sha=None, compressed=None): - pass + def __init__(self, type, size, stream, sha=None, compressed=None): + list.__init__(self, (sha, type, size, stream, compressed, None)) #{ Interface @@ -127,25 +144,42 @@ class ODB_IStream(list): sha = property(_sha, _set_sha) - @property - def type(self): + + def _type(self): return self[1] + + def _set_type(self, type): + self[1] = type - @property - def size(self): + type = property(_type, _set_type) + + def _size(self): return self[2] + + def _set_size(self, size): + self[2] = size + + size = property(_size, _set_size) + + def _stream(self): + return self[3] + + def _set_stream(self, stream): + self[3] = stream + + stream = property(_stream, _set_stream) #} END odb info interface - #{ ODB_OStream interface + #{ OStream interface def is_compressed(self): return self[4] - #} END ODB_OStream interface + #} END OStream interface -class InvalidODB_Info(tuple): +class InvalidOInfo(tuple): """Carries information about a sha identifying an object which is invalid in the queried database. The exception attribute provides more information about the cause of the issue""" @@ -155,7 +189,7 @@ class InvalidODB_Info(tuple): return tuple.__new__(cls, (sha, exc)) def __init__(self, sha, exc): - pass + tuple.__init__(self, (sha, exc)) @property def sha(self): @@ -166,7 +200,8 @@ class InvalidODB_Info(tuple): """:return: exception instance explaining the failure""" return self[1] -class InvalidODB_OStream(InvalidODB_Info): + +class InvalidOStream(InvalidOInfo): """Carries information about an invalid ODB stream""" __slots__ = tuple() @@ -175,7 +210,7 @@ class InvalidODB_OStream(InvalidODB_Info): #{ RO Streams -class DecompressMemMapReader(ODB_OStream): +class DecompressMemMapReader(object): """Reads data in chunks from a memory map and decompresses it. The client sees only the uncompressed data, respective file-like read calls are handling on-demand buffered decompression accordingly @@ -192,17 +227,17 @@ class DecompressMemMapReader(ODB_OStream): times we actually allocate. An own zlib implementation would be good here to better support streamed reading - it would only need to keep the mmap and decompress it into chunks, thats all ... """ - # __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close') + __slots__ = ('_m', '_zip', '_buf', '_buflen', '_br', '_cws', '_cwe', '_s', '_close') max_read_size = 512*1024 - def __init__(self, sha, type, size, m, close_on_deletion): + def __init__(self, m, close_on_deletion, size): """Initialize with mmap for stream reading""" self._m = m self._zip = zlib.decompressobj() self._buf = None # buffer of decompressed bytes self._buflen = 0 # length of bytes in buffer - self._s = 0 # size of uncompressed data to read in total + self._s = size # size of uncompressed data to read in total self._br = 0 # num uncompressed bytes read self._cws = 0 # start byte of compression window self._cwe = 0 # end byte of compression window @@ -213,34 +248,33 @@ class DecompressMemMapReader(ODB_OStream): self._m.close() # END handle resource freeing - def initialize(self, size=0): - """Initialize this instance for acting as a read-only stream for size bytes. - :param size: size in bytes to be decompresed before being depleted. - If 0, default object header information is parsed from the data, - returning a tuple of (type_string, uncompressed_size) - If not 0, the size will be used, and None is returned. - :note: must only be called exactly once""" - if size: - self._s = size - return - # END handle size + @classmethod + def new(self, m, close_on_deletion=False): + """Create a new DecompressMemMapReader instance for acting as a read-only stream + This method parses the object header from m and returns the parsed + type and size, as well as the created stream instance. + :param m: memory map on which to oparate + :param close_on_deletion: if True, the memory map will be closed once we are + being deleted""" + inst = DecompressMemMapReader(m, close_on_deletion, 0) # read header maxb = 512 # should really be enough, cgit uses 8192 I believe - self._s = maxb - hdr = self.read(maxb) + inst._s = maxb + hdr = inst.read(maxb) hdrend = hdr.find("\0") type, size = hdr[:hdrend].split(" ") - self._s = int(size) + size = int(size) + inst._s = size # adjust internal state to match actual header length that we ignore # The buffer will be depleted first on future reads - self._br = 0 + inst._br = 0 hdrend += 1 # count terminating \0 - self._buf = StringIO(hdr[hdrend:]) - self._buflen = len(hdr) - hdrend + inst._buf = StringIO(hdr[hdrend:]) + inst._buflen = len(hdr) - hdrend - return type, size + return type, size, inst def read(self, size=-1): if size < 1: @@ -346,7 +380,35 @@ class DecompressMemMapReader(ODB_OStream): #{ W Streams -class FDCompressedSha1Writer(object): +class Sha1Writer(object): + """Simple stream writer which produces a sha whenever you like as it degests + everything it is supposed to write""" + + def __init__(self): + self.sha1 = make_sha("") + + #{ Stream Interface + + def write(self, data): + """:raise IOError: If not all bytes could be written + :return: lenght of incoming data""" + self.sha1.update(data) + return len(data) + + # END stream interface + + #{ Interface + + def sha(self, as_hex = False): + """:return: sha so far + :param as_hex: if True, sha will be hex-encoded, binary otherwise""" + if as_hex: + return self.sha1.hexdigest() + return self.sha1.digest() + + #} END interface + +class FDCompressedSha1Writer(Sha1Writer): """Digests data written to it, making the sha available, then compress the data and write it to the file descriptor :note: operates on raw file descriptors @@ -357,10 +419,12 @@ class FDCompressedSha1Writer(object): exc = IOError("Failed to write all bytes to filedescriptor") def __init__(self, fd): + super(FDCompressedSha1Writer, self).__init__() self.fd = fd - self.sha1 = make_sha("") self.zip = zlib.compressobj(Z_BEST_SPEED) + #{ Stream Interface + def write(self, data): """:raise IOError: If not all bytes could be written :return: lenght of incoming data""" @@ -371,18 +435,12 @@ class FDCompressedSha1Writer(object): raise self.exc return len(data) - def sha(self, as_hex = False): - """:return: sha so far - :param as_hex: if True, sha will be hex-encoded, binary otherwise""" - if as_hex: - return self.sha1.hexdigest() - return self.sha1.digest() - def close(self): remainder = self.zip.flush() if write(self.fd, remainder) != len(remainder): raise self.exc return close(self.fd) + #} END stream interface #} END W streams diff --git a/lib/git/odb/utils.py b/lib/git/odb/utils.py index 61565ba9..6863e97b 100644 --- a/lib/git/odb/utils.py +++ b/lib/git/odb/utils.py @@ -2,8 +2,6 @@ import binascii import os import errno -__all__ = ('FDSha1Writer', ) - #{ Routines hex_to_bin = binascii.a2b_hex diff --git a/lib/git/repo.py b/lib/git/repo.py index 1afb1eb7..78e5f526 100644 --- a/lib/git/repo.py +++ b/lib/git/repo.py @@ -13,7 +13,7 @@ from objects import * from config import GitConfigParser from remote import Remote -from odb.db import GitObjectDB +from odb import GitObjectDB import os import sys |