summaryrefslogtreecommitdiff
path: root/gitdb
diff options
context:
space:
mode:
authorThomas Grainger <tom.grainger@procensus.com>2016-10-04 18:04:24 +0100
committerThomas Grainger <tom.grainger@procensus.com>2016-10-04 18:05:26 +0100
commit62815b5c5a4c39e9ace3d20ec0c593011201dbcf (patch)
treedcae6f6f0d399d50a1bceaffb61fa2bab95009e0 /gitdb
parent847a21ebf0f20f708b1d879f7f8e865ae9d64817 (diff)
downloadgitdb-62815b5c5a4c39e9ace3d20ec0c593011201dbcf.tar.gz
support optional gitdb_speedups
Diffstat (limited to 'gitdb')
-rw-r--r--gitdb/_delta_apply.c1154
-rw-r--r--gitdb/_delta_apply.h6
-rw-r--r--gitdb/_fun.c107
-rw-r--r--gitdb/fun.py2
-rw-r--r--gitdb/pack.py2
-rw-r--r--gitdb/stream.py2
6 files changed, 3 insertions, 1270 deletions
diff --git a/gitdb/_delta_apply.c b/gitdb/_delta_apply.c
deleted file mode 100644
index f4fffdc..0000000
--- a/gitdb/_delta_apply.c
+++ /dev/null
@@ -1,1154 +0,0 @@
-#include <_delta_apply.h>
-#include <stdint.h>
-#include <assert.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-
-
-
-typedef unsigned long long ull;
-typedef unsigned int uint;
-typedef unsigned char uchar;
-typedef unsigned short ushort;
-typedef uchar bool;
-
-// Constants
-const ull gDIV_grow_by = 100;
-
-
-
-// DELTA STREAM ACCESS
-///////////////////////
-inline
-ull msb_size(const uchar** datap, const uchar* top)
-{
- const uchar *data = *datap;
- ull cmd, size = 0;
- uint i = 0;
- do {
- cmd = *data++;
- size |= (cmd & 0x7f) << i;
- i += 7;
- } while (cmd & 0x80 && data < top);
- *datap = data;
- return size;
-}
-
-
-// TOP LEVEL STREAM INFO
-/////////////////////////////
-typedef struct {
- const uchar *tds; // Toplevel delta stream
- const uchar *cstart; // start of the chunks
- Py_ssize_t tdslen; // size of tds in bytes
- Py_ssize_t target_size; // size of the target buffer which can hold all data
- uint num_chunks; // amount of chunks in the delta stream
- PyObject *parent_object;
-} ToplevelStreamInfo;
-
-
-void TSI_init(ToplevelStreamInfo* info)
-{
- info->tds = NULL;
- info->cstart = NULL;
- info->tdslen = 0;
- info->num_chunks = 0;
- info->target_size = 0;
- info->parent_object = 0;
-}
-
-void TSI_destroy(ToplevelStreamInfo* info)
-{
-#ifdef DEBUG
- fprintf(stderr, "TSI_destroy: %p\n", info);
-#endif
-
- if (info->parent_object){
- Py_DECREF(info->parent_object);
- info->parent_object = NULL;
- } else if (info->tds){
- PyMem_Free((void*)info->tds);
- }
- info->tds = NULL;
- info->cstart = NULL;
- info->tdslen = 0;
- info->num_chunks = 0;
-}
-
-inline
-const uchar* TSI_end(ToplevelStreamInfo* info)
-{
- return info->tds + info->tdslen;
-}
-
-inline
-const uchar* TSI_first(ToplevelStreamInfo* info)
-{
- return info->cstart;
-}
-
-// set the stream, and initialize it
-// initialize our set stream to point to the first chunk
-// Fill in the header information, which is the base and target size
-inline
-void TSI_set_stream(ToplevelStreamInfo* info, const uchar* stream)
-{
- info->tds = stream;
- info->cstart = stream;
-
- assert(info->tds && info->tdslen);
-
- // init stream
- const uchar* tdsend = TSI_end(info);
- msb_size(&info->cstart, tdsend); // base size
- info->target_size = msb_size(&info->cstart, tdsend);
-}
-
-
-
-// duplicate the data currently owned by the parent object drop its refcount
-// return 1 on success
-bool TSI_copy_stream_from_object(ToplevelStreamInfo* info)
-{
- assert(info->parent_object);
-
- uchar* ptmp = PyMem_Malloc(info->tdslen);
- if (!ptmp){
- return 0;
- }
- uint ofs = (uint)(info->cstart - info->tds);
- memcpy((void*)ptmp, info->tds, info->tdslen);
-
- info->tds = ptmp;
- info->cstart = ptmp + ofs;
-
- Py_DECREF(info->parent_object);
- info->parent_object = 0;
-
- return 1;
-}
-
-// Transfer ownership of the given stream into our instance. The amount of chunks
-// remains the same, and needs to be set by the caller
-void TSI_replace_stream(ToplevelStreamInfo* info, const uchar* stream, uint streamlen)
-{
- assert(info->parent_object == 0);
-
- uint ofs = (uint)(info->cstart - info->tds);
- if (info->tds){
- PyMem_Free((void*)info->tds);
- }
- info->tds = stream;
- info->cstart = info->tds + ofs;
- info->tdslen = streamlen;
-
-}
-
-// DELTA CHUNK
-////////////////
-// Internal Delta Chunk Objects
-// They are just used to keep information parsed from a stream
-// The data pointer is always shared
-typedef struct {
- ull to;
- uint ts;
- uint so;
- const uchar* data;
-} DeltaChunk;
-
-// forward declarations
-const uchar* next_delta_info(const uchar*, DeltaChunk*);
-
-inline
-void DC_init(DeltaChunk* dc, ull to, ull ts, ull so, const uchar* data)
-{
- dc->to = to;
- dc->ts = ts;
- dc->so = so;
- dc->data = NULL;
-}
-
-
-inline
-ull DC_rbound(const DeltaChunk* dc)
-{
- return dc->to + dc->ts;
-}
-
-inline
-void DC_print(const DeltaChunk* dc, const char* prefix)
-{
- fprintf(stderr, "%s-dc: to = %i, ts = %i, so = %i, data = %p\n", prefix, (int)dc->to, dc->ts, dc->so, dc->data);
-}
-
-// Apply
-inline
-void DC_apply(const DeltaChunk* dc, const uchar* base, PyObject* writer, PyObject* tmpargs)
-{
- PyObject* buffer = 0;
- if (dc->data){
- buffer = PyBuffer_FromMemory((void*)dc->data, dc->ts);
- } else {
- buffer = PyBuffer_FromMemory((void*)(base + dc->so), dc->ts);
- }
-
- if (PyTuple_SetItem(tmpargs, 0, buffer)){
- assert(0);
- }
-
-
- // tuple steals reference, and will take care about the deallocation
- PyObject_Call(writer, tmpargs, NULL);
-
-}
-
-// Encode the information in the given delta chunk and write the byte-stream
-// into the given output stream
-// It will be copied into the given bounds, the given size must be the final size
-// and work with the given relative offset - hence the bounds are assumed to be
-// correct and to fit within the unaltered dc
-inline
-void DC_encode_to(const DeltaChunk* dc, uchar** pout, uint ofs, uint size)
-{
- uchar* out = *pout;
- if (dc->data){
- *out++ = (uchar)size;
- memcpy(out, dc->data+ofs, size);
- out += size;
- } else {
- uchar i = 0x80;
- uchar* op = out++;
- uint moff = dc->so+ofs;
-
- if (moff & 0x000000ff)
- *out++ = moff >> 0, i |= 0x01;
- if (moff & 0x0000ff00)
- *out++ = moff >> 8, i |= 0x02;
- if (moff & 0x00ff0000)
- *out++ = moff >> 16, i |= 0x04;
- if (moff & 0xff000000)
- *out++ = moff >> 24, i |= 0x08;
-
- if (size & 0x00ff)
- *out++ = size >> 0, i |= 0x10;
- if (size & 0xff00)
- *out++ = size >> 8, i |= 0x20;
-
- *op = i;
- }
-
- *pout = out;
-}
-
-// Return: amount of bytes one would need to encode dc
-inline
-ushort DC_count_encode_bytes(const DeltaChunk* dc)
-{
- if (dc->data){
- return 1 + dc->ts; // cmd byte + actual data bytes
- } else {
- ushort c = 1; // cmd byte
- uint ts = dc->ts;
- ull so = dc->so;
-
- // offset
- c += (so & 0x000000FF) > 0;
- c += (so & 0x0000FF00) > 0;
- c += (so & 0x00FF0000) > 0;
- c += (so & 0xFF000000) > 0;
-
- // size - max size is 0x10000, its encoded with 0 size bits
- c += (ts & 0x000000FF) > 0;
- c += (ts & 0x0000FF00) > 0;
-
- return c;
- }
-}
-
-
-
-// DELTA INFO
-/////////////
-typedef struct {
- uint dso; // delta stream offset, relative to the very start of the stream
- uint to; // target offset (cache)
-} DeltaInfo;
-
-
-// DELTA INFO VECTOR
-//////////////////////
-
-typedef struct {
- DeltaInfo *mem; // Memory for delta infos
- uint di_last_size; // size of the last element - we can't compute it using the next bound
- const uchar *dstream; // borrowed ointer to delta stream we index
- Py_ssize_t size; // Amount of DeltaInfos
- Py_ssize_t reserved_size; // Reserved amount of DeltaInfos
-} DeltaInfoVector;
-
-
-
-// Reserve enough memory to hold the given amount of delta chunks
-// Return 1 on success
-// NOTE: added a minimum allocation to assure reallocation is not done
-// just for a single additional entry. DIVs change often, and reallocs are expensive
-inline
-int DIV_reserve_memory(DeltaInfoVector* vec, uint num_dc)
-{
- if (num_dc <= vec->reserved_size){
- return 1;
- }
-
-#ifdef DEBUG
- bool was_null = vec->mem == NULL;
-#endif
-
- if (vec->mem == NULL){
- vec->mem = PyMem_Malloc(num_dc * sizeof(DeltaInfo));
- } else {
- vec->mem = PyMem_Realloc(vec->mem, num_dc * sizeof(DeltaInfo));
- }
-
- if (vec->mem == NULL){
- Py_FatalError("Could not allocate memory for append operation");
- }
-
- vec->reserved_size = num_dc;
-
-#ifdef DEBUG
- const char* format = "Allocated %i bytes at %p, to hold up to %i chunks\n";
- if (!was_null)
- format = "Re-allocated %i bytes at %p, to hold up to %i chunks\n";
- fprintf(stderr, format, (int)(vec->reserved_size * sizeof(DeltaInfo)), vec->mem, (int)vec->reserved_size);
-#endif
-
- return vec->mem != NULL;
-}
-
-/*
-Grow the delta chunk list by the given amount of bytes.
-This may trigger a realloc, but will do nothing if the reserved size is already
-large enough.
-Return 1 on success, 0 on failure
-*/
-inline
-int DIV_grow_by(DeltaInfoVector* vec, uint num_dc)
-{
- return DIV_reserve_memory(vec, vec->reserved_size + num_dc);
-}
-
-int DIV_init(DeltaInfoVector* vec, ull initial_size)
-{
- vec->mem = NULL;
- vec->dstream = NULL;
- vec->size = 0;
- vec->reserved_size = 0;
- vec->di_last_size = 0;
-
- return DIV_grow_by(vec, initial_size);
-}
-
-inline
-Py_ssize_t DIV_len(const DeltaInfoVector* vec)
-{
- return vec->size;
-}
-
-inline
-uint DIV_lbound(const DeltaInfoVector* vec)
-{
- assert(vec->size && vec->mem);
- return vec->mem->to;
-}
-
-// Return item at index
-inline
-DeltaInfo* DIV_get(const DeltaInfoVector* vec, Py_ssize_t i)
-{
- assert(i < vec->size && vec->mem);
- return &vec->mem[i];
-}
-
-// Return last item
-inline
-DeltaInfo* DIV_last(const DeltaInfoVector* vec)
-{
- return DIV_get(vec, vec->size-1);
-}
-
-inline
-int DIV_empty(const DeltaInfoVector* vec)
-{
- return vec->size == 0;
-}
-
-// Return end pointer of the vector
-inline
-const DeltaInfo* DIV_end(const DeltaInfoVector* vec)
-{
- assert(!DIV_empty(vec));
- return vec->mem + vec->size;
-}
-
-// return first item in vector
-inline
-DeltaInfo* DIV_first(const DeltaInfoVector* vec)
-{
- assert(!DIV_empty(vec));
- return vec->mem;
-}
-
-// return rbound offset in bytes. We use information contained in the
-// vec to do that
-inline
-uint DIV_info_rbound(const DeltaInfoVector* vec, const DeltaInfo* di)
-{
- if (DIV_last(vec) == di){
- return di->to + vec->di_last_size;
- } else {
- return (di+1)->to;
- }
-}
-
-// return size of the given delta info item
-inline
-uint DIV_info_size2(const DeltaInfoVector* vec, const DeltaInfo* di, const DeltaInfo* const veclast)
-{
- if (veclast == di){
- return vec->di_last_size;
- } else {
- return (di+1)->to - di->to;
- }
-}
-
-// return size of the given delta info item
-inline
-uint DIV_info_size(const DeltaInfoVector* vec, const DeltaInfo* di)
-{
- return DIV_info_size2(vec, di, DIV_last(vec));
-}
-
-void DIV_destroy(DeltaInfoVector* vec)
-{
- if (vec->mem){
-#ifdef DEBUG
- fprintf(stderr, "DIV_destroy: %p\n", (void*)vec->mem);
-#endif
- PyMem_Free(vec->mem);
- vec->size = 0;
- vec->reserved_size = 0;
- vec->mem = 0;
- }
-}
-
-// Reset this vector so that its existing memory can be filled again.
-// Memory will be kept, but not cleaned up
-inline
-void DIV_forget_members(DeltaInfoVector* vec)
-{
- vec->size = 0;
-}
-
-// Reset the vector so that its size will be zero
-// It will keep its memory though, and hence can be filled again
-inline
-void DIV_reset(DeltaInfoVector* vec)
-{
- if (vec->size == 0)
- return;
- vec->size = 0;
-}
-
-
-// Append one chunk to the end of the list, and return a pointer to it
-// It will not have been initialized !
-inline
-DeltaInfo* DIV_append(DeltaInfoVector* vec)
-{
- if (vec->size + 1 > vec->reserved_size){
- DIV_grow_by(vec, gDIV_grow_by);
- }
-
- DeltaInfo* next = vec->mem + vec->size;
- vec->size += 1;
- return next;
-}
-
-// Return delta chunk being closest to the given absolute offset
-inline
-DeltaInfo* DIV_closest_chunk(const DeltaInfoVector* vec, ull ofs)
-{
- assert(vec->mem);
-
- ull lo = 0;
- ull hi = vec->size;
- ull mid;
- DeltaInfo* di;
-
- while (lo < hi)
- {
- mid = (lo + hi) / 2;
- di = vec->mem + mid;
- if (di->to > ofs){
- hi = mid;
- } else if ((DIV_info_rbound(vec, di) > ofs) | (di->to == ofs)) {
- return di;
- } else {
- lo = mid + 1;
- }
- }
-
- return DIV_last(vec);
-}
-
-
-// Return the amount of chunks a slice at the given spot would have, as well as
-// its size in bytes it would have if the possibly partial chunks would be encoded
-// and added to the spot marked by sdc
-uint DIV_count_slice_bytes(const DeltaInfoVector* src, uint ofs, uint size)
-{
- uint num_bytes = 0;
- DeltaInfo* cdi = DIV_closest_chunk(src, ofs);
-
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
-
- // partial overlap
- if (cdi->to != ofs) {
- const ull relofs = ofs - cdi->to;
- const uint cdisize = DIV_info_size(src, cdi);
- const uint max_size = cdisize - relofs < size ? cdisize - relofs : size;
- size -= max_size;
-
- // get the size in bytes the info would have
- next_delta_info(src->dstream + cdi->dso, &dc);
- dc.so += relofs;
- dc.ts = max_size;
- num_bytes += DC_count_encode_bytes(&dc);
-
- cdi += 1;
-
- if (size == 0){
- return num_bytes;
- }
- }
-
- const DeltaInfo* const vecend = DIV_end(src);
- const uchar* nstream;
- for( ;cdi < vecend; ++cdi){
- nstream = next_delta_info(src->dstream + cdi->dso, &dc);
-
- if (dc.ts < size) {
- num_bytes += nstream - (src->dstream + cdi->dso);
- size -= dc.ts;
- } else {
- dc.ts = size;
- num_bytes += DC_count_encode_bytes(&dc);
- size = 0;
- break;
- }
- }
-
- assert(size == 0);
- return num_bytes;
-}
-
-// Write a slice as defined by its absolute offset in bytes and its size into the given
-// destination memory. The individual chunks written will be a byte copy of the source
-// data chunk stream
-// Return: number of chunks in the slice
-uint DIV_copy_slice_to(const DeltaInfoVector* src, uchar** dest, ull tofs, uint size)
-{
- assert(DIV_lbound(src) <= tofs);
- assert((tofs + size) <= DIV_info_rbound(src, DIV_last(src)));
-
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
-
- DeltaInfo* cdi = DIV_closest_chunk(src, tofs);
- uint num_chunks = 0;
-
- // partial overlap
- if (cdi->to != tofs) {
- const uint relofs = tofs - cdi->to;
- next_delta_info(src->dstream + cdi->dso, &dc);
- const uint max_size = dc.ts - relofs < size ? dc.ts - relofs : size;
-
- size -= max_size;
-
- // adjust dc proportions
- DC_encode_to(&dc, dest, relofs, max_size);
-
- num_chunks += 1;
- cdi += 1;
-
- if (size == 0){
- return num_chunks;
- }
- }
-
- const uchar* dstream = src->dstream + cdi->dso;
- const uchar* nstream = dstream;
- for( ; nstream; dstream = nstream)
- {
- num_chunks += 1;
- nstream = next_delta_info(dstream, &dc);
- if (dc.ts < size) {
- memcpy(*dest, dstream, nstream - dstream);
- *dest += nstream - dstream;
- size -= dc.ts;
- } else {
- DC_encode_to(&dc, dest, 0, size);
- size = 0;
- break;
- }
- }
-
- assert(size == 0);
- return num_chunks;
-}
-
-
-// Take slices of div into the corresponding area of the tsi, which is the topmost
-// delta to apply.
-bool DIV_connect_with_base(ToplevelStreamInfo* tsi, DeltaInfoVector* div)
-{
- assert(tsi->num_chunks);
-
-
- uint num_bytes = 0;
- const uchar* data = TSI_first(tsi);
- const uchar* dend = TSI_end(tsi);
-
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
-
-
- // COMPUTE SIZE OF TARGET STREAM
- /////////////////////////////////
- for (;data < dend;)
- {
- data = next_delta_info(data, &dc);
-
- // Data chunks don't need processing
- if (dc.data){
- num_bytes += 1 + dc.ts;
- continue;
- }
-
- num_bytes += DIV_count_slice_bytes(div, dc.so, dc.ts);
- }
- assert(DC_rbound(&dc) == tsi->target_size);
-
-
- // GET NEW DELTA BUFFER
- ////////////////////////
- uchar *const dstream = PyMem_Malloc(num_bytes);
- if (!dstream){
- return 0;
- }
-
-
- data = TSI_first(tsi);
- const uchar *ndata = data;
- dend = TSI_end(tsi);
-
- uint num_chunks = 0;
- uchar* ds = dstream;
- DC_init(&dc, 0, 0, 0, NULL);
-
- // pick slices from the delta and put them into the new stream
- for (; data < dend; data = ndata)
- {
- ndata = next_delta_info(data, &dc);
-
- // Data chunks don't need processing
- if (dc.data){
- // just copy it over
- memcpy((void*)ds, (void*)data, ndata - data);
- ds += ndata - data;
- num_chunks += 1;
- continue;
- }
-
- // Copy Chunks
- num_chunks += DIV_copy_slice_to(div, &ds, dc.so, dc.ts);
- }
- assert(ds - dstream == num_bytes);
- assert(num_chunks >= tsi->num_chunks);
- assert(DC_rbound(&dc) == tsi->target_size);
-
- // finally, replace the streams
- TSI_replace_stream(tsi, dstream, num_bytes);
- tsi->cstart = dstream; // we have NO header !
- assert(tsi->tds == dstream);
- tsi->num_chunks = num_chunks;
-
-
- return 1;
-
-}
-
-// DELTA CHUNK LIST (PYTHON)
-/////////////////////////////
-// Internally, it has nothing to do with a ChunkList anymore though
-typedef struct {
- PyObject_HEAD
- // -----------
- ToplevelStreamInfo istream;
-
-} DeltaChunkList;
-
-
-
-int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
-{
- if(args && PySequence_Size(args) > 0){
- PyErr_SetString(PyExc_ValueError, "Too many arguments");
- return -1;
- }
-
- TSI_init(&self->istream);
- return 0;
-}
-
-
-void DCL_dealloc(DeltaChunkList* self)
-{
- TSI_destroy(&(self->istream));
-}
-
-
-PyObject* DCL_py_rbound(DeltaChunkList* self)
-{
- return PyLong_FromUnsignedLongLong(self->istream.target_size);
-}
-
-// Write using a write function, taking remaining bytes from a base buffer
-
-PyObject* DCL_apply(DeltaChunkList* self, PyObject* args)
-{
- PyObject* pybuf = 0;
- PyObject* writeproc = 0;
- if (!PyArg_ParseTuple(args, "OO", &pybuf, &writeproc)){
- PyErr_BadArgument();
- return NULL;
- }
-
- if (!PyObject_CheckReadBuffer(pybuf)){
- PyErr_SetString(PyExc_ValueError, "First argument must be a buffer-compatible object, like a string, or a memory map");
- return NULL;
- }
-
- if (!PyCallable_Check(writeproc)){
- PyErr_SetString(PyExc_ValueError, "Second argument must be a writer method with signature write(buf)");
- return NULL;
- }
-
- const uchar* base;
- Py_ssize_t baselen;
- PyObject_AsReadBuffer(pybuf, (const void**)&base, &baselen);
-
- PyObject* tmpargs = PyTuple_New(1);
-
- const uchar* data = TSI_first(&self->istream);
- const uchar* const dend = TSI_end(&self->istream);
-
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
-
- while (data < dend){
- data = next_delta_info(data, &dc);
- DC_apply(&dc, base, writeproc, tmpargs);
- }
-
- Py_DECREF(tmpargs);
- Py_RETURN_NONE;
-}
-
-PyMethodDef DCL_methods[] = {
- {"apply", (PyCFunction)DCL_apply, METH_VARARGS, "Apply the given iterable of delta streams" },
- {"rbound", (PyCFunction)DCL_py_rbound, METH_NOARGS, NULL},
- {NULL} /* Sentinel */
-};
-
-PyTypeObject DeltaChunkListType = {
- PyObject_HEAD_INIT(NULL)
- 0, /*ob_size*/
- "DeltaChunkList", /*tp_name*/
- sizeof(DeltaChunkList), /*tp_basicsize*/
- 0, /*tp_itemsize*/
- (destructor)DCL_dealloc, /*tp_dealloc*/
- 0, /*tp_print*/
- 0, /*tp_getattr*/
- 0, /*tp_setattr*/
- 0, /*tp_compare*/
- 0, /*tp_repr*/
- 0, /*tp_as_number*/
- 0, /*tp_as_sequence*/
- 0, /*tp_as_mapping*/
- 0, /*tp_hash */
- 0, /*tp_call*/
- 0, /*tp_str*/
- 0, /*tp_getattro*/
- 0, /*tp_setattro*/
- 0, /*tp_as_buffer*/
- Py_TPFLAGS_DEFAULT, /*tp_flags*/
- "Minimal Delta Chunk List",/* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- DCL_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- (initproc)DCL_init, /* tp_init */
- 0, /* tp_alloc */
- 0, /* tp_new */
-};
-
-
-// Makes a new copy of the DeltaChunkList - you have to do everything yourselve
-// in C ... want C++ !!
-DeltaChunkList* DCL_new_instance(void)
-{
- DeltaChunkList* dcl = (DeltaChunkList*) PyType_GenericNew(&DeltaChunkListType, 0, 0);
- assert(dcl);
-
- DCL_init(dcl, 0, 0);
- return dcl;
-}
-
-// Read the next delta chunk from the given stream and advance it
-// dc will contain the parsed information, its offset must be set by
-// the previous call of next_delta_info, which implies it should remain the
-// same instance between the calls.
-// Return the altered uchar pointer, reassign it to the input data
-inline
-const uchar* next_delta_info(const uchar* data, DeltaChunk* dc)
-{
- const char cmd = *data++;
-
- if (cmd & 0x80)
- {
- uint cp_off = 0, cp_size = 0;
- if (cmd & 0x01) cp_off = *data++;
- if (cmd & 0x02) cp_off |= (*data++ << 8);
- if (cmd & 0x04) cp_off |= (*data++ << 16);
- if (cmd & 0x08) cp_off |= ((unsigned) *data++ << 24);
- if (cmd & 0x10) cp_size = *data++;
- if (cmd & 0x20) cp_size |= (*data++ << 8);
- if (cmd & 0x40) cp_size |= (*data++ << 16); // this should never get hit with current deltas ...
- if (cp_size == 0) cp_size = 0x10000;
-
- dc->to += dc->ts;
- dc->data = NULL;
- dc->so = cp_off;
- dc->ts = cp_size;
-
- } else if (cmd) {
- // Just share the data
- dc->to += dc->ts;
- dc->data = data;
- dc->ts = cmd;
- dc->so = 0;
-
- data += cmd;
- } else {
- PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");
- assert(0);
- return NULL;
- }
-
- return data;
-}
-
-// Return amount of chunks encoded in the given delta stream
-// If read_header is True, then the header msb chunks will be read first.
-// Otherwise, the stream is assumed to be scrubbed one past the header
-uint compute_chunk_count(const uchar* data, const uchar* dend, bool read_header)
-{
- // read header
- if (read_header){
- msb_size(&data, dend);
- msb_size(&data, dend);
- }
-
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
- uint num_chunks = 0;
-
- while (data < dend)
- {
- data = next_delta_info(data, &dc);
- num_chunks += 1;
- }// END handle command opcodes
-
- return num_chunks;
-}
-
-PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
-{
- // obtain iterator
- PyObject* stream_iter = 0;
- if (!PyIter_Check(dstreams)){
- stream_iter = PyObject_GetIter(dstreams);
- if (!stream_iter){
- PyErr_SetString(PyExc_RuntimeError, "Couldn't obtain iterator for streams");
- return NULL;
- }
- } else {
- stream_iter = dstreams;
- }
-
- DeltaInfoVector div;
- ToplevelStreamInfo tdsinfo;
- TSI_init(&tdsinfo);
- DIV_init(&div, 0);
-
-
- // GET TOPLEVEL DELTA STREAM
- int error = 0;
- PyObject* ds = 0;
- unsigned int dsi = 0; // delta stream index we process
- ds = PyIter_Next(stream_iter);
- if (!ds){
- error = 1;
- goto _error;
- }
-
- dsi += 1;
- tdsinfo.parent_object = PyObject_CallMethod(ds, "read", 0);
- if (!PyObject_CheckReadBuffer(tdsinfo.parent_object)){
- Py_DECREF(ds);
- error = 1;
- goto _error;
- }
-
- PyObject_AsReadBuffer(tdsinfo.parent_object, (const void**)&tdsinfo.tds, &tdsinfo.tdslen);
- if (tdsinfo.tdslen > pow(2, 32)){
- // parent object is deallocated by info structure
- Py_DECREF(ds);
- PyErr_SetString(PyExc_RuntimeError, "Cannot handle deltas larger than 4GB");
- tdsinfo.parent_object = 0;
-
- error = 1;
- goto _error;
- }
- Py_DECREF(ds);
-
- // let it officially know, and initialize its internal state
- TSI_set_stream(&tdsinfo, tdsinfo.tds);
-
- // INTEGRATE ANCESTOR DELTA STREAMS
- for (ds = PyIter_Next(stream_iter); ds != NULL; ds = PyIter_Next(stream_iter), ++dsi)
- {
- // Its important to initialize this before the next block which can jump
- // to code who needs this to exist !
- PyObject* db = 0;
-
- // When processing the first delta, we know we will have to alter the tds
- // Hence we copy it and deallocate the parent object
- if (dsi == 1) {
- if (!TSI_copy_stream_from_object(&tdsinfo)){
- PyErr_SetString(PyExc_RuntimeError, "Could not allocate memory to copy toplevel buffer");
- // info structure takes care of the parent_object
- error = 1;
- goto loop_end;
- }
-
- tdsinfo.num_chunks = compute_chunk_count(tdsinfo.cstart, TSI_end(&tdsinfo), 0);
- }
-
- db = PyObject_CallMethod(ds, "read", 0);
- if (!PyObject_CheckReadBuffer(db)){
- error = 1;
- PyErr_SetString(PyExc_RuntimeError, "Returned buffer didn't support the buffer protocol");
- goto loop_end;
- }
-
- // Fill the stream info structure
- const uchar* data;
- Py_ssize_t dlen;
- PyObject_AsReadBuffer(db, (const void**)&data, &dlen);
- const uchar* const dstart = data;
- const uchar* const dend = data + dlen;
- div.dstream = dstart;
-
- if (dlen > pow(2, 32)){
- error = 1;
- PyErr_SetString(PyExc_RuntimeError, "Cannot currently handle deltas larger than 4GB");
- goto loop_end;
- }
-
- // READ HEADER
- msb_size(&data, dend);
- const ull target_size = msb_size(&data, dend);
-
- DIV_reserve_memory(&div, compute_chunk_count(data, dend, 0));
-
- // parse command stream
- DeltaInfo* di = 0; // temporary pointer
- DeltaChunk dc;
- DC_init(&dc, 0, 0, 0, NULL);
-
- assert(data < dend);
- while (data < dend)
- {
- di = DIV_append(&div);
- di->dso = data - dstart;
- if ((data = next_delta_info(data, &dc))){
- di->to = dc.to;
- } else {
- error = 1;
- goto loop_end;
- }
- }// END handle command opcodes
-
- // finalize information
- div.di_last_size = dc.ts;
-
- if (DC_rbound(&dc) != target_size){
- PyErr_SetString(PyExc_RuntimeError, "Failed to parse delta stream");
- error = 1;
- }
-
- #ifdef DEBUG
- fprintf(stderr, "------------ Stream %i --------\n ", (int)dsi);
- fprintf(stderr, "Before Connect: tdsinfo: num_chunks = %i, bytelen = %i KiB, target_size = %i KiB\n", (int)tdsinfo.num_chunks, (int)tdsinfo.tdslen/1000, (int)tdsinfo.target_size/1000);
- fprintf(stderr, "div->num_chunks = %i, div->reserved_size = %i, div->bytelen=%i KiB\n", (int)div.size, (int)div.reserved_size, (int)dlen/1000);
- #endif
-
- if (!DIV_connect_with_base(&tdsinfo, &div)){
- error = 1;
- }
-
- #ifdef DEBUG
- fprintf(stderr, "after connect: tdsinfo->num_chunks = %i, tdsinfo->bytelen = %i KiB\n", (int)tdsinfo.num_chunks, (int)tdsinfo.tdslen/1000);
- #endif
-
- // destroy members, but keep memory
- DIV_reset(&div);
-
-loop_end:
- // perform cleanup
- Py_DECREF(ds);
- Py_DECREF(db);
-
- if (error){
- break;
- }
- }// END for each stream object
-
- if (dsi == 0){
- PyErr_SetString(PyExc_ValueError, "No streams provided");
- }
-
-
-_error:
-
- if (stream_iter != dstreams){
- Py_DECREF(stream_iter);
- }
-
-
- DIV_destroy(&div);
-
- // Return the actual python object - its just a container
- DeltaChunkList* dcl = DCL_new_instance();
- if (!dcl){
- PyErr_SetString(PyExc_RuntimeError, "Couldn't allocate list");
- // Otherwise tdsinfo would be deallocated by the chunk list
- TSI_destroy(&tdsinfo);
- error = 1;
- } else {
- // Plain copy, transfer ownership to dcl
- dcl->istream = tdsinfo;
- }
-
- if (error){
- // Will dealloc tdcv
- Py_XDECREF(dcl);
- return NULL;
- }
-
- return (PyObject*)dcl;
-}
-
-
-// Write using a write function, taking remaining bytes from a base buffer
-// replaces the corresponding method in python
-PyObject* apply_delta(PyObject* self, PyObject* args)
-{
- PyObject* pybbuf = 0;
- PyObject* pydbuf = 0;
- PyObject* pytbuf = 0;
- if (!PyArg_ParseTuple(args, "OOO", &pybbuf, &pydbuf, &pytbuf)){
- PyErr_BadArgument();
- return NULL;
- }
-
- PyObject* objects[] = { pybbuf, pydbuf, pytbuf };
- assert(sizeof(objects) / sizeof(PyObject*) == 3);
-
- uint i;
- for(i = 0; i < 3; i++){
- if (!PyObject_CheckReadBuffer(objects[i])){
- PyErr_SetString(PyExc_ValueError, "Argument must be a buffer-compatible object, like a string, or a memory map");
- return NULL;
- }
- }
-
- Py_ssize_t lbbuf; Py_ssize_t ldbuf; Py_ssize_t ltbuf;
- const uchar* bbuf; const uchar* dbuf;
- uchar* tbuf;
- PyObject_AsReadBuffer(pybbuf, (const void**)(&bbuf), &lbbuf);
- PyObject_AsReadBuffer(pydbuf, (const void**)(&dbuf), &ldbuf);
-
- if (PyObject_AsWriteBuffer(pytbuf, (void**)(&tbuf), &ltbuf)){
- PyErr_SetString(PyExc_ValueError, "Argument 3 must be a writable buffer");
- return NULL;
- }
-
- const uchar* data = dbuf;
- const uchar* dend = dbuf + ldbuf;
-
- while (data < dend)
- {
- const char cmd = *data++;
-
- if (cmd & 0x80)
- {
- unsigned long cp_off = 0, cp_size = 0;
- if (cmd & 0x01) cp_off = *data++;
- if (cmd & 0x02) cp_off |= (*data++ << 8);
- if (cmd & 0x04) cp_off |= (*data++ << 16);
- if (cmd & 0x08) cp_off |= ((unsigned) *data++ << 24);
- if (cmd & 0x10) cp_size = *data++;
- if (cmd & 0x20) cp_size |= (*data++ << 8);
- if (cmd & 0x40) cp_size |= (*data++ << 16);
- if (cp_size == 0) cp_size = 0x10000;
-
- memcpy(tbuf, bbuf + cp_off, cp_size);
- tbuf += cp_size;
-
- } else if (cmd) {
- memcpy(tbuf, data, cmd);
- tbuf += cmd;
- data += cmd;
- } else {
- PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");
- return NULL;
- }
- }// END handle command opcodes
-
- Py_RETURN_NONE;
-}
diff --git a/gitdb/_delta_apply.h b/gitdb/_delta_apply.h
deleted file mode 100644
index 1fcd538..0000000
--- a/gitdb/_delta_apply.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <Python.h>
-
-extern PyObject* connect_deltas(PyObject *self, PyObject *dstreams);
-extern PyObject* apply_delta(PyObject* self, PyObject* args);
-
-extern PyTypeObject DeltaChunkListType;
diff --git a/gitdb/_fun.c b/gitdb/_fun.c
deleted file mode 100644
index 4997038..0000000
--- a/gitdb/_fun.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <Python.h>
-#include "_delta_apply.h"
-
-static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
-{
- const unsigned char *sha;
- const unsigned int sha_len;
-
- // Note: self is only set if we are a c type. We emulate an instance method,
- // hence we have to get the instance as 'first' argument
-
- // get instance and sha
- PyObject* inst = 0;
- if (!PyArg_ParseTuple(args, "Os#", &inst, &sha, &sha_len))
- return NULL;
-
- if (sha_len != 20) {
- PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
- return NULL;
- }
-
- if( !inst){
- PyErr_SetString(PyExc_ValueError, "Cannot be called without self");
- return NULL;
- }
-
- // read lo and hi bounds
- PyObject* fanout_table = PyObject_GetAttrString(inst, "_fanout_table");
- if (!fanout_table){
- PyErr_SetString(PyExc_ValueError, "Couldn't obtain fanout table");
- return NULL;
- }
-
- unsigned int lo = 0, hi = 0;
- if (sha[0]){
- PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)(sha[0]-1));
- lo = PyInt_AS_LONG(item);
- Py_DECREF(item);
- }
- PyObject* item = PySequence_GetItem(fanout_table, (const Py_ssize_t)sha[0]);
- hi = PyInt_AS_LONG(item);
- Py_DECREF(item);
- item = 0;
-
- Py_DECREF(fanout_table);
-
- // get sha query function
- PyObject* get_sha = PyObject_GetAttrString(inst, "sha");
- if (!get_sha){
- PyErr_SetString(PyExc_ValueError, "Couldn't obtain sha method");
- return NULL;
- }
-
- PyObject *sha_str = 0;
- while (lo < hi) {
- const int mid = (lo + hi)/2;
- sha_str = PyObject_CallFunction(get_sha, "i", mid);
- if (!sha_str) {
- return NULL;
- }
-
- // we really trust that string ... for speed
- const int cmp = memcmp(PyString_AS_STRING(sha_str), sha, 20);
- Py_DECREF(sha_str);
- sha_str = 0;
-
- if (cmp < 0){
- lo = mid + 1;
- }
- else if (cmp > 0) {
- hi = mid;
- }
- else {
- Py_DECREF(get_sha);
- return PyInt_FromLong(mid);
- }// END handle comparison
- }// END while lo < hi
-
- // nothing found, cleanup
- Py_DECREF(get_sha);
- Py_RETURN_NONE;
-}
-
-static PyMethodDef py_fun[] = {
- { "PackIndexFile_sha_to_index", (PyCFunction)PackIndexFile_sha_to_index, METH_VARARGS, "TODO" },
- { "connect_deltas", (PyCFunction)connect_deltas, METH_O, "See python implementation" },
- { "apply_delta", (PyCFunction)apply_delta, METH_VARARGS, "See python implementation" },
- { NULL, NULL, 0, NULL }
-};
-
-#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
-#define PyMODINIT_FUNC void
-#endif
-PyMODINIT_FUNC init_perf(void)
-{
- PyObject *m;
-
- if (PyType_Ready(&DeltaChunkListType) < 0)
- return;
-
- m = Py_InitModule3("_perf", py_fun, NULL);
- if (m == NULL)
- return;
-
- Py_INCREF(&DeltaChunkListType);
- PyModule_AddObject(m, "DeltaChunkList", (PyObject *)&DeltaChunkListType);
-}
diff --git a/gitdb/fun.py b/gitdb/fun.py
index ac9d993..8ca38c8 100644
--- a/gitdb/fun.py
+++ b/gitdb/fun.py
@@ -776,6 +776,6 @@ def is_equal_canonical_sha(canonical_length, match, sha1):
try:
- from _perf import connect_deltas
+ from gitdb_speedups._perf import connect_deltas
except ImportError:
pass
diff --git a/gitdb/pack.py b/gitdb/pack.py
index 2447455..20a4515 100644
--- a/gitdb/pack.py
+++ b/gitdb/pack.py
@@ -35,7 +35,7 @@ from gitdb.fun import (
)
try:
- from _perf import PackIndexFile_sha_to_index
+ from gitdb_speedups._perf import PackIndexFile_sha_to_index
except ImportError:
pass
# END try c module
diff --git a/gitdb/stream.py b/gitdb/stream.py
index be95c11..2f4c12d 100644
--- a/gitdb/stream.py
+++ b/gitdb/stream.py
@@ -33,7 +33,7 @@ from gitdb.utils.encoding import force_bytes
has_perf_mod = False
PY26 = sys.version_info[:2] < (2, 7)
try:
- from _perf import apply_delta as c_apply_delta
+ from gitdb_speedups._perf import apply_delta as c_apply_delta
has_perf_mod = True
except ImportError:
pass