summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorPierre Glaser <pierreglaser@msn.com>2018-10-10 14:45:20 +0200
committerPierre Glaser <pierreglaser@msn.com>2018-10-10 19:54:36 +0200
commit64a855f421d7b50dd29e5e09c69d285eddfb6d1c (patch)
tree631514b2c70b76e59428446130f1d7e7aa127c21 /numpy/core
parent9942b71a6782a850cf59462a3a9ceec39e741880 (diff)
downloadnumpy-64a855f421d7b50dd29e5e09c69d285eddfb6d1c.tar.gz
ENH implement __reduce_ex__ for np.ndarray and pickle protocol 5
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/numeric.py4
-rw-r--r--numpy/core/src/multiarray/methods.c131
-rw-r--r--numpy/core/tests/test_multiarray.py88
3 files changed, 223 insertions, 0 deletions
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 7c9e41299..56ac69424 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -1934,6 +1934,10 @@ def fromfunction(function, shape, **kwargs):
return function(*args, **kwargs)
+def _frombuffer(buf, dtype, shape, order):
+ return frombuffer(buf, dtype=dtype).reshape(shape, order=order)
+
+
def isscalar(num):
"""
Returns True if the type of `num` is a scalar type.
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index cdbd0d6ae..23b0bfd24 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -1619,6 +1619,8 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
Notice because Python does not describe a mechanism to write
raw data to the pickle, this performs a copy to a string first
+ This issue is now adressed in protocol 5, where a buffer is serialized
+ instead of a string,
*/
state = PyTuple_New(5);
@@ -1652,6 +1654,132 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
}
static PyObject *
+array_reduce_ex(PyArrayObject *self, PyObject *args)
+{
+ int protocol;
+ PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
+ PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
+ PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
+ PyObject *subclass_array_reduce = NULL;
+ PyObject *buffer = NULL, *transposed_array = NULL;
+ PyArray_Descr *descr = NULL;
+ char order;
+
+ if (PyArg_ParseTuple(args, "i", &protocol)){
+ descr = PyArray_DESCR(self);
+ if ((protocol < 5) ||
+ (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+ !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
+ PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
+ (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
+ ((PyObject*)self)->ob_type != &PyArray_Type) ||
+ PyDataType_ISUNSIZED(descr)) {
+ /* The PickleBuffer class from version 5 of the pickle protocol
+ * can only be used for arrays backed by a contiguous data buffer.
+ * For all other cases we fallback to the generic array_reduce
+ * method that involves using a temporary bytes allocation. However
+ * we do not call array_reduce directly but instead lookup and call
+ * the __reduce__ method to make sure that it's possible customize
+ * pickling in sub-classes. */
+ subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
+ "__reduce__");
+ return PyObject_CallObject(subclass_array_reduce, unused);
+ }
+ else if (protocol == 5){
+ ret = PyTuple_New(2);
+
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* if the python version is below 3.8, the pickle module does not provide
+ * built-in support for protocol 5. We try importing the pickle5
+ * backport instead */
+#if PY_VERSION_HEX >= 0x03080000
+ pickle_module = PyImport_ImportModule("pickle");
+#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
+ pickle_module = PyImport_ImportModule("pickle5");
+ if (pickle_module == NULL){
+ /* for protocol 5, raise a clear ImportError if pickle5 is not found
+ */
+ PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
+ "requires the pickle5 module for python versions >=3.6 "
+ "and <3.8");
+ return NULL;
+ }
+#else
+ PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
+ "for python versions < 3.6");
+ return NULL;
+#endif
+ if (pickle_module == NULL){
+ return NULL;
+ }
+
+ pickle_class = PyObject_GetAttrString(pickle_module,
+ "PickleBuffer");
+
+ class_args_tuple = PyTuple_New(1);
+ if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+ PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
+
+ /* if the array if Fortran-contiguous and not C-contiguous,
+ * the PickleBuffer instance will hold a view on the transpose
+ * of the initial array, that is C-contiguous. */
+ order = 'F';
+ transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
+ PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
+ }
+ else {
+ order = 'C';
+ PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
+ Py_INCREF(self);
+ }
+
+ class_args = Py_BuildValue("O", class_args_tuple);
+
+ buffer = PyObject_CallObject(pickle_class, class_args);
+
+ numeric_mod = PyImport_ImportModule("numpy.core.numeric");
+ if (numeric_mod == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ from_buffer_func = PyObject_GetAttrString(numeric_mod,
+ "_frombuffer");
+ Py_DECREF(numeric_mod);
+
+ Py_INCREF(descr);
+
+ buffer_tuple = PyTuple_New(4);
+ PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
+ PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
+ PyTuple_SET_ITEM(buffer_tuple, 2,
+ PyObject_GetAttrString((PyObject *)self,
+ "shape"));
+ PyTuple_SET_ITEM(buffer_tuple, 3,
+ PyUnicode_FromStringAndSize(&order,
+ (Py_ssize_t)1));
+
+ PyTuple_SET_ITEM(ret, 0, from_buffer_func);
+ PyTuple_SET_ITEM(ret, 1, buffer_tuple);
+
+ return ret;
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "cannot call __reduce_ex__ with protocol >= %d",
+ 5);
+ return NULL;
+ }
+ }
+ else {
+ return NULL;
+ }
+
+}
+
+static PyObject *
array_setstate(PyArrayObject *self, PyObject *args)
{
PyObject *shape;
@@ -2524,6 +2652,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
{"__reduce__",
(PyCFunction) array_reduce,
METH_VARARGS, NULL},
+ {"__reduce_ex__",
+ (PyCFunction) array_reduce_ex,
+ METH_VARARGS, NULL},
{"__setstate__",
(PyCFunction) array_setstate,
METH_VARARGS, NULL},
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 837aab23e..8cd0f4d92 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -1382,6 +1382,21 @@ class TestZeroSizeFlexible(object):
assert_equal(zs.dtype, zs2.dtype)
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+ reason="requires pickle protocol 5")
+ def test_pickle_with_buffercallback(self):
+ array = np.arange(10)
+ buffers = []
+ bytes_string = pickle.dumps(array, buffer_callback=buffers.append,
+ protocol=5)
+ array_from_buffer = pickle.loads(bytes_string, buffers=buffers)
+ # when using pickle protocol 5 with buffer callbacks,
+ # array_from_buffer is reconstructed from a buffer holding a view
+ # to the initial array's data, so modifying an element in array
+ # should modify it in array_from_buffer too.
+ array[0] = -1
+ assert array_from_buffer[0] == -1, array_from_buffer[0]
+
class TestMethods(object):
def test_compress(self):
@@ -3562,6 +3577,79 @@ class TestPickling(object):
else:
assert pickle.HIGHEST_PROTOCOL < 5
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5,
+ reason=('this tests the error messages when trying to'
+ 'protocol 5 although it is not available'))
+ def test_correct_protocol5_error_message(self):
+ array = np.arange(10)
+ f = io.BytesIO()
+
+ if sys.version_info[:2] in ((3, 6), (3, 7)):
+ # For the specific case of python3.6 and 3.7, raise a clear import
+ # error about the pickle5 backport when trying to use protocol=5
+ # without the pickle5 package
+ with pytest.raises(ImportError):
+ array.__reduce_ex__(5)
+
+ elif sys.version_info[:2] < (3, 6):
+ # when calling __reduce_ex__ explicitly with protocol=5 on python
+ # raise a ValueError saying that protocol 5 is not available for
+ # this python version
+ with pytest.raises(ValueError):
+ array.__reduce_ex__(5)
+
+ def test_record_array_with_object_dtype(self):
+ my_object = object()
+
+ arr_with_object = np.array(
+ [(my_object, 1, 2.0)],
+ dtype=[('a', object), ('b', int), ('c', float)])
+ arr_without_object = np.array(
+ [('xxx', 1, 2.0)],
+ dtype=[('a', str), ('b', int), ('c', float)])
+
+ for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+ depickled_arr_with_object = pickle.loads(
+ pickle.dumps(arr_with_object, protocol=proto))
+ depickled_arr_without_object = pickle.loads(
+ pickle.dumps(arr_without_object, protocol=proto))
+
+ assert_equal(arr_with_object.dtype,
+ depickled_arr_with_object.dtype)
+ assert_equal(arr_without_object.dtype,
+ depickled_arr_without_object.dtype)
+
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+ reason="requires pickle protocol 5")
+ def test_f_contiguous_array(self):
+ f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F')
+ buffers = []
+
+ # When using pickle protocol 5, Fortran-contiguous arrays can be
+ # serialized using out-of-band buffers
+ bytes_string = pickle.dumps(f_contiguous_array, protocol=5,
+ buffer_callback=buffers.append)
+
+ assert len(buffers) > 0
+
+ depickled_f_contiguous_array = pickle.loads(bytes_string,
+ buffers=buffers)
+
+ assert_equal(f_contiguous_array, depickled_f_contiguous_array)
+
+ def test_non_contiguous_array(self):
+ non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2]
+ assert not non_contiguous_array.flags.c_contiguous
+ assert not non_contiguous_array.flags.f_contiguous
+
+ # make sure non-contiguous arrays can be pickled-depickled
+ # using any protocol
+ for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+ depickled_non_contiguous_array = pickle.loads(
+ pickle.dumps(non_contiguous_array, protocol=proto))
+
+ assert_equal(non_contiguous_array, depickled_non_contiguous_array)
+
def test_roundtrip(self):
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
carray = np.array([[2, 9], [7, 0], [3, 8]])