diff options
author | Pauli Virtanen <pav@iki.fi> | 2009-12-06 16:20:07 +0000 |
---|---|---|
committer | Pauli Virtanen <pav@iki.fi> | 2009-12-06 16:20:07 +0000 |
commit | 577dbbd844b60958a59e062dae428a34a9c3644f (patch) | |
tree | d16fa96f2191c5a57c48bbc6ad0277bedd3c65f5 | |
parent | b860d14381adc72323c0a8f5f47af39259604d09 (diff) | |
download | numpy-577dbbd844b60958a59e062dae428a34a9c3644f.tar.gz |
BUG: add a work-around for #1312 -- don't define bf_releasebuffer at all
The problem is that PyArg_ParseTuple("s#", ...) does not accept objects
with the new buffer interface, if they have bf_releasebuffer defined.
We now work around this by not defining bf_releasebuffer, and releasing
any info associated with the buffer interface when array or its
descriptor is deallocated.
The shape+strides information for arrays stays static due to the
refcounting lock. The format information for dtypes may change
if field names are changed on-the-fly.
XXX: dtype field name changes are not currently handled.
-rw-r--r-- | doc/Py3K.txt | 41 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarrayobject.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arrayobject.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/buffer.c | 84 | ||||
-rw-r--r-- | numpy/core/src/multiarray/buffer.h | 3 | ||||
-rw-r--r-- | numpy/core/src/multiarray/descriptor.c | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 10 |
7 files changed, 114 insertions, 34 deletions
diff --git a/doc/Py3K.txt b/doc/Py3K.txt index fdb6644e0..bb7e50573 100644 --- a/doc/Py3K.txt +++ b/doc/Py3K.txt @@ -338,6 +338,21 @@ doesn't currently produce format strings, which needs to be fixed. Currently, the format string and some of the memory is cached in the PyArrayObject structure. This is partly needed because of Python bug #7433. +Also some code also stops working when ``bf_releasebuffer`` is +defined. Most importantly, ``PyArg_ParseTuple("s#", ...)`` refuses to +return a buffer if ``bf_releasebuffer`` is present. For this reason, +the buffer interface for arrays is implemented currently *without* +defining ``bf_releasebuffer`` at all. This forces us to go through +some additional contortions. But basically, since the strides and shape +of an array are locked when references to it are held, we can do with +a single allocated ``Py_ssize_t`` shape+strides buffer. + +The buffer format string is currently cached in the ``dtype`` object. +Currently, there's a slight problem as dtypes are not immutable -- +the names of the fields can be changed. Right now, this issue is +just ignored, and the field names in the buffer format string are +not updated. + From the consumer side, the new buffer protocol is mostly backward compatible with the old one, so little needs to be done here to retain basic functionality. However, we *do* want to make use of the new @@ -386,6 +401,32 @@ The Py2/Py3 compatible PyBufferMethods definition looks like:: #endif }; + +.. todo:: + + Is there a cleaner way out of the ``bf_releasebuffer`` issue? It + seems a bit that on Py2.6, the new buffer interface is a bit + troublesome, as apparently Numpy will be the first piece of code + exercising it more fully. + + It seems we should submit patches to Python on this. At least "s#" + implementation on Py3 won't work at all, since the old buffer + interface is no more present. + +.. todo:: + + Find a way around the dtype mutability issue. + + Note that we cannot just realloc the format string when the names + are changed: this would invalidate any existing buffer + interfaces. And since we can't define ``bf_releasebuffer``, we + don't know if there are any buffer interfaces present. + + One solution would be to alloc a "big enough" buffer at the + beginning, and not change it after that. We could also make the + strides etc. in the ``buffer_info`` structure static size. There's + MAXDIMS present after all. + .. todo:: Take a second look at places that used PyBuffer_FromMemory and diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h index 709e26d49..68d3ab9d5 100644 --- a/numpy/core/include/numpy/ndarrayobject.h +++ b/numpy/core/include/numpy/ndarrayobject.h @@ -520,6 +520,8 @@ typedef struct _PyArray_Descr { basic data descriptor */ PyObject *metadata; /* Metadata about this dtype */ + + char *buffer_format; /* Buffer interface format string, or NULL */ } PyArray_Descr; typedef struct _arr_descr { diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 03692c84d..3dbaf78c1 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -163,6 +163,8 @@ PyArray_TypeNumFromName(char *str) static void array_dealloc(PyArrayObject *self) { + _array_dealloc_buffer_info(self); + if (self->weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *)self); } diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c index 4da576481..18f829b11 100644 --- a/numpy/core/src/multiarray/buffer.c +++ b/numpy/core/src/multiarray/buffer.c @@ -76,6 +76,12 @@ array_getcharbuf(PyArrayObject *self, Py_ssize_t segment, constchar **ptrptr) * PEP 3118 buffer protocol *************************************************************************/ + +/* + * Note: because for backward compatibility we cannot define bf_releasebuffer, + * we must go through some additional contortions in bf_getbuffer. + */ + #if PY_VERSION_HEX >= 0x02060000 /* @@ -90,9 +96,9 @@ typedef struct { typedef struct { char *format; + int nd; Py_ssize_t *strides; Py_ssize_t *shape; - int count; } _buffer_data; static int @@ -263,7 +269,7 @@ array_getbuffer(PyObject *obj, Py_buffer *view, int flags) if (self->buffer_info == NULL) { cache = (_buffer_data*)malloc(sizeof(_buffer_data)); - cache->count = 0; + cache->nd = 0; cache->format = NULL; cache->strides = NULL; cache->shape = NULL; @@ -300,32 +306,53 @@ array_getbuffer(PyObject *obj, Py_buffer *view, int flags) view->len = PyArray_NBYTES(self); if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { - if (cache->format == NULL) { +#warning XXX -- assumes descriptors are immutable + if (PyArray_DESCR(self)->buffer_format == NULL) { int offset = 0; _tmp_string fmt = {0,0,0}; if (_buffer_format_string(PyArray_DESCR(self), &fmt, &offset)) { goto fail; } _append_char(&fmt, '\0'); - cache->format = fmt.s; + PyArray_DESCR(self)->buffer_format = fmt.s; } - view->format = cache->format; + view->format = PyArray_DESCR(self)->buffer_format; } else { view->format = NULL; } if ((flags & PyBUF_STRIDED) == PyBUF_STRIDED) { - if (cache->strides == NULL) { - int k; + int shape_changed = 0; + int k; + + if (cache->nd != PyArray_NDIM(self)) { + shape_changed = 1; + } + else { + for (k = 0; k < PyArray_NDIM(self); ++k) { + if (cache->shape[k] != PyArray_DIMS(self)[k] + || cache->strides[k] != PyArray_STRIDES(self)[k]) { + shape_changed = 1; + break; + } + } + } + + if (cache->strides == NULL || shape_changed) { + if (cache->shape != NULL) { + free(cache->shape); + } + cache->nd = PyArray_NDIM(self); cache->shape = (Py_ssize_t*)malloc(sizeof(Py_ssize_t) - * PyArray_NDIM(self) * 2); + * PyArray_NDIM(self) * 2 + 1); cache->strides = cache->shape + PyArray_NDIM(self); for (k = 0; k < PyArray_NDIM(self); ++k) { cache->shape[k] = PyArray_DIMS(self)[k]; cache->strides[k] = PyArray_STRIDES(self)[k]; } } + view->ndim = PyArray_NDIM(self); view->shape = cache->shape; view->strides = cache->strides; @@ -343,47 +370,38 @@ array_getbuffer(PyObject *obj, Py_buffer *view, int flags) view->obj = self; Py_INCREF(self); - cache->count++; - return 0; fail: - if (view->format) { - free(view->format); - } - if (view->shape) { - free(view->shape); - } return -1; } /* - * Releasing buffers + * NOTE: for backward compatibility (esp. with PyArg_ParseTuple("s#", ...)) + * we do *not* define bf_releasebuffer at all. + * + * Instead, any extra data allocated with the buffer is released only in + * array_dealloc. + * + * Ensuring that the buffer stays in place is taken care by refcounting; + * ndarrays do not reallocate if there are references to them, and a buffer + * view holds one reference. */ -static void -array_releasebuffer(PyObject *obj, Py_buffer *view) +NPY_NO_EXPORT void +_array_dealloc_buffer_info(PyArrayObject *self) { _buffer_data *cache; - PyArrayObject *self; - self = (PyArrayObject*)obj; cache = self->buffer_info; if (cache != NULL) { - cache->count--; - - if (cache->count == 0) { - if (cache->format) { - free(cache->format); - } - if (cache->shape) { - free(cache->shape); - } - free(cache); - self->buffer_info = NULL; + if (cache->shape) { + free(cache->shape); } + free(cache); + self->buffer_info = NULL; } } @@ -407,6 +425,6 @@ NPY_NO_EXPORT PyBufferProcs array_as_buffer = { #endif #if PY_VERSION_HEX >= 0x02060000 (getbufferproc)array_getbuffer, - (releasebufferproc)array_releasebuffer, + (releasebufferproc)0, #endif }; diff --git a/numpy/core/src/multiarray/buffer.h b/numpy/core/src/multiarray/buffer.h index 0b2ae2b6d..19e4e8ff3 100644 --- a/numpy/core/src/multiarray/buffer.h +++ b/numpy/core/src/multiarray/buffer.h @@ -7,4 +7,7 @@ extern NPY_NO_EXPORT PyBufferProcs array_as_buffer; NPY_NO_EXPORT PyBufferProcs array_as_buffer; #endif +NPY_NO_EXPORT void +_array_dealloc_buffer_info(PyArrayObject *self); + #endif diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 64a2963ac..84a6614cc 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1449,6 +1449,9 @@ arraydescr_dealloc(PyArray_Descr *self) _pya_free(self->subarray); } Py_XDECREF(self->metadata); + if (self->buffer_format) { + free(self->buffer_format); + } Py_TYPE(self)->tp_free((PyObject *)self); } @@ -1942,6 +1945,9 @@ arraydescr_new(PyTypeObject *NPY_UNUSED(subtype), PyObject *args, PyObject *kwds conv->metadata = PyDict_Copy(ometadata); } } + + conv->buffer_format = NULL; + return (PyObject *)conv; } diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index cf2409265..f047f758d 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1235,7 +1235,15 @@ class TestRegression(TestCase): def func(): x = np.dtype([(('a', 'a'), 'i'), ('b', 'i')]) self.failUnlessRaises(ValueError, func) - + + def test_buffer_hashlib(self): + try: + from hashlib import md5 + except ImportError: + from md5 import new as md5 + + x = np.array([1,2,3]) + assert_equal(md5(x).hexdigest(), '2a1dd1e1e59d0a384c26951e316cd7e6') if __name__ == "__main__": run_module_suite() |