summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.16.0-notes.rst9
-rw-r--r--doc/source/dev/conduct/code_of_conduct.rst3
-rw-r--r--numpy/core/numeric.py4
-rw-r--r--numpy/core/src/multiarray/methods.c131
-rw-r--r--numpy/core/tests/test_multiarray.py88
-rw-r--r--numpy/lib/histograms.py8
-rw-r--r--numpy/lib/tests/test_histograms.py17
7 files changed, 258 insertions, 2 deletions
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index cc69b245d..f2c8f8dc2 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -104,6 +104,15 @@ content to be read after ``skiprows``, as in `numpy.genfromtxt`.
Improvements
============
+no-copy pickling of numpy arrays
+--------------------------------
+Up to protocol 4, numpy array pickling created 2 spurious copies of the data
+being serlialized.
+With pickle protocol 5, and the ``PickleBuffer`` API, a large variety of numpy
+arrays can now be serialized without any copy using out-of-band buffers,
+and with one less copy using in-band buffers. This results, for large arrays,
+in an up to 66% drop in peak memory usage.
+
build shell independence
------------------------
NumPy builds should no longer interact with the host machine
diff --git a/doc/source/dev/conduct/code_of_conduct.rst b/doc/source/dev/conduct/code_of_conduct.rst
index 604f14662..aca39d8a7 100644
--- a/doc/source/dev/conduct/code_of_conduct.rst
+++ b/doc/source/dev/conduct/code_of_conduct.rst
@@ -121,8 +121,7 @@ a conflict of interest in handling it, then they will recuse themselves from
considering your report. Alternatively, if for any reason you feel
uncomfortable making a report to the committee, then you can also contact:
-- NumFOCUS Executive Director: Leah Silen
-- NumFOCUS President: Andy Terrel
+- Senior `NumFOCUS staff <https://numfocus.org/code-of-conduct#persons-responsible>`__: conduct@numfocus.org
Incident reporting resolution & Code of Conduct enforcement
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 7c9e41299..56ac69424 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -1934,6 +1934,10 @@ def fromfunction(function, shape, **kwargs):
return function(*args, **kwargs)
+def _frombuffer(buf, dtype, shape, order):
+ return frombuffer(buf, dtype=dtype).reshape(shape, order=order)
+
+
def isscalar(num):
"""
Returns True if the type of `num` is a scalar type.
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index cdbd0d6ae..23b0bfd24 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -1619,6 +1619,8 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
Notice because Python does not describe a mechanism to write
raw data to the pickle, this performs a copy to a string first
+ This issue is now adressed in protocol 5, where a buffer is serialized
+ instead of a string,
*/
state = PyTuple_New(5);
@@ -1652,6 +1654,132 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
}
static PyObject *
+array_reduce_ex(PyArrayObject *self, PyObject *args)
+{
+ int protocol;
+ PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
+ PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
+ PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
+ PyObject *subclass_array_reduce = NULL;
+ PyObject *buffer = NULL, *transposed_array = NULL;
+ PyArray_Descr *descr = NULL;
+ char order;
+
+ if (PyArg_ParseTuple(args, "i", &protocol)){
+ descr = PyArray_DESCR(self);
+ if ((protocol < 5) ||
+ (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+ !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
+ PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
+ (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
+ ((PyObject*)self)->ob_type != &PyArray_Type) ||
+ PyDataType_ISUNSIZED(descr)) {
+ /* The PickleBuffer class from version 5 of the pickle protocol
+ * can only be used for arrays backed by a contiguous data buffer.
+ * For all other cases we fallback to the generic array_reduce
+ * method that involves using a temporary bytes allocation. However
+ * we do not call array_reduce directly but instead lookup and call
+ * the __reduce__ method to make sure that it's possible customize
+ * pickling in sub-classes. */
+ subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
+ "__reduce__");
+ return PyObject_CallObject(subclass_array_reduce, unused);
+ }
+ else if (protocol == 5){
+ ret = PyTuple_New(2);
+
+ if (ret == NULL) {
+ return NULL;
+ }
+
+ /* if the python version is below 3.8, the pickle module does not provide
+ * built-in support for protocol 5. We try importing the pickle5
+ * backport instead */
+#if PY_VERSION_HEX >= 0x03080000
+ pickle_module = PyImport_ImportModule("pickle");
+#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
+ pickle_module = PyImport_ImportModule("pickle5");
+ if (pickle_module == NULL){
+ /* for protocol 5, raise a clear ImportError if pickle5 is not found
+ */
+ PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
+ "requires the pickle5 module for python versions >=3.6 "
+ "and <3.8");
+ return NULL;
+ }
+#else
+ PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
+ "for python versions < 3.6");
+ return NULL;
+#endif
+ if (pickle_module == NULL){
+ return NULL;
+ }
+
+ pickle_class = PyObject_GetAttrString(pickle_module,
+ "PickleBuffer");
+
+ class_args_tuple = PyTuple_New(1);
+ if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+ PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
+
+ /* if the array if Fortran-contiguous and not C-contiguous,
+ * the PickleBuffer instance will hold a view on the transpose
+ * of the initial array, that is C-contiguous. */
+ order = 'F';
+ transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
+ PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
+ }
+ else {
+ order = 'C';
+ PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
+ Py_INCREF(self);
+ }
+
+ class_args = Py_BuildValue("O", class_args_tuple);
+
+ buffer = PyObject_CallObject(pickle_class, class_args);
+
+ numeric_mod = PyImport_ImportModule("numpy.core.numeric");
+ if (numeric_mod == NULL) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ from_buffer_func = PyObject_GetAttrString(numeric_mod,
+ "_frombuffer");
+ Py_DECREF(numeric_mod);
+
+ Py_INCREF(descr);
+
+ buffer_tuple = PyTuple_New(4);
+ PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
+ PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
+ PyTuple_SET_ITEM(buffer_tuple, 2,
+ PyObject_GetAttrString((PyObject *)self,
+ "shape"));
+ PyTuple_SET_ITEM(buffer_tuple, 3,
+ PyUnicode_FromStringAndSize(&order,
+ (Py_ssize_t)1));
+
+ PyTuple_SET_ITEM(ret, 0, from_buffer_func);
+ PyTuple_SET_ITEM(ret, 1, buffer_tuple);
+
+ return ret;
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "cannot call __reduce_ex__ with protocol >= %d",
+ 5);
+ return NULL;
+ }
+ }
+ else {
+ return NULL;
+ }
+
+}
+
+static PyObject *
array_setstate(PyArrayObject *self, PyObject *args)
{
PyObject *shape;
@@ -2524,6 +2652,9 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
{"__reduce__",
(PyCFunction) array_reduce,
METH_VARARGS, NULL},
+ {"__reduce_ex__",
+ (PyCFunction) array_reduce_ex,
+ METH_VARARGS, NULL},
{"__setstate__",
(PyCFunction) array_setstate,
METH_VARARGS, NULL},
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 837aab23e..8cd0f4d92 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -1382,6 +1382,21 @@ class TestZeroSizeFlexible(object):
assert_equal(zs.dtype, zs2.dtype)
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+ reason="requires pickle protocol 5")
+ def test_pickle_with_buffercallback(self):
+ array = np.arange(10)
+ buffers = []
+ bytes_string = pickle.dumps(array, buffer_callback=buffers.append,
+ protocol=5)
+ array_from_buffer = pickle.loads(bytes_string, buffers=buffers)
+ # when using pickle protocol 5 with buffer callbacks,
+ # array_from_buffer is reconstructed from a buffer holding a view
+ # to the initial array's data, so modifying an element in array
+ # should modify it in array_from_buffer too.
+ array[0] = -1
+ assert array_from_buffer[0] == -1, array_from_buffer[0]
+
class TestMethods(object):
def test_compress(self):
@@ -3562,6 +3577,79 @@ class TestPickling(object):
else:
assert pickle.HIGHEST_PROTOCOL < 5
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL >= 5,
+ reason=('this tests the error messages when trying to'
+ 'protocol 5 although it is not available'))
+ def test_correct_protocol5_error_message(self):
+ array = np.arange(10)
+ f = io.BytesIO()
+
+ if sys.version_info[:2] in ((3, 6), (3, 7)):
+ # For the specific case of python3.6 and 3.7, raise a clear import
+ # error about the pickle5 backport when trying to use protocol=5
+ # without the pickle5 package
+ with pytest.raises(ImportError):
+ array.__reduce_ex__(5)
+
+ elif sys.version_info[:2] < (3, 6):
+ # when calling __reduce_ex__ explicitly with protocol=5 on python
+ # raise a ValueError saying that protocol 5 is not available for
+ # this python version
+ with pytest.raises(ValueError):
+ array.__reduce_ex__(5)
+
+ def test_record_array_with_object_dtype(self):
+ my_object = object()
+
+ arr_with_object = np.array(
+ [(my_object, 1, 2.0)],
+ dtype=[('a', object), ('b', int), ('c', float)])
+ arr_without_object = np.array(
+ [('xxx', 1, 2.0)],
+ dtype=[('a', str), ('b', int), ('c', float)])
+
+ for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+ depickled_arr_with_object = pickle.loads(
+ pickle.dumps(arr_with_object, protocol=proto))
+ depickled_arr_without_object = pickle.loads(
+ pickle.dumps(arr_without_object, protocol=proto))
+
+ assert_equal(arr_with_object.dtype,
+ depickled_arr_with_object.dtype)
+ assert_equal(arr_without_object.dtype,
+ depickled_arr_without_object.dtype)
+
+ @pytest.mark.skipif(pickle.HIGHEST_PROTOCOL < 5,
+ reason="requires pickle protocol 5")
+ def test_f_contiguous_array(self):
+ f_contiguous_array = np.array([[1, 2, 3], [4, 5, 6]], order='F')
+ buffers = []
+
+ # When using pickle protocol 5, Fortran-contiguous arrays can be
+ # serialized using out-of-band buffers
+ bytes_string = pickle.dumps(f_contiguous_array, protocol=5,
+ buffer_callback=buffers.append)
+
+ assert len(buffers) > 0
+
+ depickled_f_contiguous_array = pickle.loads(bytes_string,
+ buffers=buffers)
+
+ assert_equal(f_contiguous_array, depickled_f_contiguous_array)
+
+ def test_non_contiguous_array(self):
+ non_contiguous_array = np.arange(12).reshape(3, 4)[:, :2]
+ assert not non_contiguous_array.flags.c_contiguous
+ assert not non_contiguous_array.flags.f_contiguous
+
+ # make sure non-contiguous arrays can be pickled-depickled
+ # using any protocol
+ for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+ depickled_non_contiguous_array = pickle.loads(
+ pickle.dumps(non_contiguous_array, protocol=proto))
+
+ assert_equal(non_contiguous_array, depickled_non_contiguous_array)
+
def test_roundtrip(self):
for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
carray = np.array([[2, 9], [7, 0], [3, 8]])
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index f03f30fb0..6a66e4a73 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -220,6 +220,14 @@ _hist_bin_selectors = {'auto': _hist_bin_auto,
def _ravel_and_check_weights(a, weights):
""" Check a and weights have matching shapes, and ravel both """
a = np.asarray(a)
+
+ # Ensure that the array is a "subtractable" dtype
+ if a.dtype == np.bool_:
+ warnings.warn("Converting input from {} to {} for compatibility."
+ .format(a.dtype, np.uint8),
+ RuntimeWarning, stacklevel=2)
+ a = a.astype(np.uint8)
+
if weights is not None:
weights = np.asarray(weights)
if weights.shape != a.shape:
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 561f5f938..a71060a46 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -141,6 +141,23 @@ class TestHistogram(object):
counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100)
assert_equal(counts_hist.sum(), 3.)
+ def test_bool_conversion(self):
+ # gh-12107
+ # Reference integer histogram
+ a = np.array([1, 1, 0], dtype=np.uint8)
+ int_hist, int_edges = np.histogram(a)
+
+ # Should raise an warning on booleans
+ # Ensure that the histograms are equivalent, need to suppress
+ # the warnings to get the actual outputs
+ with suppress_warnings() as sup:
+ rec = sup.record(RuntimeWarning, 'Converting input from .*')
+ hist, edges = np.histogram([True, True, False])
+ # A warning should be issued
+ assert_equal(len(rec), 1)
+ assert_array_equal(hist, int_hist)
+ assert_array_equal(edges, int_edges)
+
def test_weights(self):
v = np.random.rand(100)
w = np.ones(100) * 5