summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatti Picus <matti.picus@gmail.com>2021-02-19 16:15:29 +0200
committerGitHub <noreply@github.com>2021-02-19 16:15:29 +0200
commit572d5a1dadb398d0b7d27468230f186f72987b3b (patch)
treee44737bedfb3072382cb8cc911f505e409f385d2
parent02d6e9a4b4bae1eca7896fdba043575ef3aaf038 (diff)
parente01d89471b0c4a69f7d1e868af31af7220f90318 (diff)
downloadnumpy-572d5a1dadb398d0b7d27468230f186f72987b3b.tar.gz
Merge pull request #15392 from seberg/void-no-arr-modification
BUG: Remove temporary change of descr/flags in VOID functions
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src189
-rw-r--r--numpy/core/tests/test_indexing.py24
-rw-r--r--numpy/core/tests/test_numeric.py21
3 files changed, 153 insertions, 81 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index ecaca72a1..ad7461227 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -42,6 +42,32 @@
#include "npy_cblas.h"
#include "npy_buffer.h"
+
+/*
+ * Define a stack allocated dummy array with only the minimum information set:
+ * 1. The descr, the main field interesting here.
+ * 2. The flags, which are needed for alignment;.
+ * 3. The type is set to NULL and the base is the original array, if this
+ * is used within a subarray getitem to create a new view, the base
+ * must be walked until the type is not NULL.
+ *
+ * The following should create errors in debug mode (if deallocated
+ * incorrectly), since base would be incorrectly decref'd as well.
+ * This is especially important for nonzero and copyswap, which may run with
+ * the GIL released.
+ */
+static NPY_INLINE PyArrayObject_fields
+get_dummy_stack_array(PyArrayObject *orig)
+{
+ PyArrayObject_fields new_fields;
+ new_fields.flags = PyArray_FLAGS(orig);
+ /* Set to NULL so the dummy object can be distinguished from the real one */
+ Py_TYPE(&new_fields) = NULL;
+ new_fields.base = (PyObject *)orig;
+ return new_fields;
+}
+
+
/* check for sequences, but ignore the types numpy considers scalars */
static NPY_INLINE npy_bool
PySequence_NoString_Check(PyObject *op) {
@@ -674,6 +700,7 @@ OBJECT_setitem(PyObject *op, void *ov, void *NPY_UNUSED(ap))
return PyErr_Occurred() ? -1 : 0;
}
+
/* VOID */
static PyObject *
@@ -681,22 +708,21 @@ VOID_getitem(void *input, void *vap)
{
PyArrayObject *ap = vap;
char *ip = input;
- PyArray_Descr* descr;
+ PyArray_Descr* descr = PyArray_DESCR(vap);
- descr = PyArray_DESCR(ap);
if (PyDataType_HASFIELDS(descr)) {
PyObject *key;
PyObject *names;
int i, n;
PyObject *ret;
PyObject *tup;
- int savedflags;
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
/* get the names from the fields dictionary*/
names = descr->names;
n = PyTuple_GET_SIZE(names);
ret = PyTuple_New(n);
- savedflags = PyArray_FLAGS(ap);
for (i = 0; i < n; i++) {
npy_intp offset;
PyArray_Descr *new;
@@ -704,26 +730,19 @@ VOID_getitem(void *input, void *vap)
tup = PyDict_GetItem(descr->fields, key);
if (_unpack_field(tup, &new, &offset) < 0) {
Py_DECREF(ret);
- ((PyArrayObject_fields *)ap)->descr = descr;
return NULL;
}
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)ap)->descr = new;
+ dummy_fields.descr = new;
/* update alignment based on offset */
if ((new->alignment > 1)
&& ((((npy_intp)(ip+offset)) % new->alignment) != 0)) {
- PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED);
+ PyArray_CLEARFLAGS(dummy_arr, NPY_ARRAY_ALIGNED);
}
else {
- PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED);
+ PyArray_ENABLEFLAGS(dummy_arr, NPY_ARRAY_ALIGNED);
}
- PyTuple_SET_ITEM(ret, i, PyArray_GETITEM(ap, ip+offset));
- ((PyArrayObject_fields *)ap)->flags = savedflags;
+ PyTuple_SET_ITEM(ret, i, PyArray_GETITEM(dummy_arr, ip+offset));
}
- ((PyArrayObject_fields *)ap)->descr = descr;
return ret;
}
@@ -739,11 +758,28 @@ VOID_getitem(void *input, void *vap)
return NULL;
}
Py_INCREF(descr->subarray->base);
+
+ /*
+ * NOTE: There is the possibility of recursive calls from the above
+ * field branch. These calls use a dummy arr for thread
+ * (and general) safety. However, we must set the base array,
+ * so if such a dummy array was passed (its type is NULL),
+ * we have walk its base until the initial array is found.
+ *
+ * TODO: This should be fixed, the next "generation" of GETITEM will
+ * probably need to pass in the original array (in addition
+ * to the dtype as a method). Alternatively, VOID dtypes
+ * could have special handling.
+ */
+ PyObject *base = (PyObject *)ap;
+ while (Py_TYPE(base) == NULL) {
+ base = PyArray_BASE((PyArrayObject *)base);
+ }
ret = (PyArrayObject *)PyArray_NewFromDescrAndBase(
&PyArray_Type, descr->subarray->base,
shape.len, shape.ptr, NULL, ip,
PyArray_FLAGS(ap) & ~NPY_ARRAY_F_CONTIGUOUS,
- NULL, (PyObject *)ap);
+ NULL, base);
npy_free_cache_dim_obj(shape);
return (PyObject *)ret;
}
@@ -761,7 +797,8 @@ NPY_NO_EXPORT int PyArray_CopyObject(PyArrayObject *, PyObject *);
* individual fields of a numpy structure, in VOID_setitem. Compare to inner
* loops in VOID_getitem and VOID_nonzero.
*
- * WARNING: Clobbers arr's dtype and alignment flag.
+ * WARNING: Clobbers arr's dtype and alignment flag, should not be used
+ * on the original array!
*/
NPY_NO_EXPORT int
_setup_field(int i, PyArray_Descr *descr, PyArrayObject *arr,
@@ -798,7 +835,7 @@ static int
_copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata,
PyArray_Descr *srcdescr, char *srcdata){
PyArrayObject_fields dummy_struct;
- PyArrayObject *dummy = (PyArrayObject *)&dummy_struct;
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_struct;
npy_int names_size = PyTuple_GET_SIZE(dstdescr->names);
npy_intp offset;
npy_int i;
@@ -808,11 +845,11 @@ _copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata,
if (PyArray_EquivTypes(srcdescr, dstdescr)) {
for (i = 0; i < names_size; i++) {
/* neither line can ever fail, in principle */
- if (_setup_field(i, dstdescr, dummy, &offset, dstdata)) {
+ if (_setup_field(i, dstdescr, dummy_arr, &offset, dstdata)) {
return -1;
}
- PyArray_DESCR(dummy)->f->copyswap(dstdata + offset,
- srcdata + offset, 0, dummy);
+ PyArray_DESCR(dummy_arr)->f->copyswap(dstdata + offset,
+ srcdata + offset, 0, dummy_arr);
}
return 0;
}
@@ -831,13 +868,10 @@ VOID_setitem(PyObject *op, void *input, void *vap)
{
char *ip = input;
PyArrayObject *ap = vap;
- PyArray_Descr *descr;
- int flags;
- int itemsize=PyArray_DESCR(ap)->elsize;
+ int itemsize = PyArray_DESCR(ap)->elsize;
int res;
+ PyArray_Descr *descr = PyArray_DESCR(ap);
- descr = PyArray_DESCR(ap);
- flags = PyArray_FLAGS(ap);
if (PyDataType_HASFIELDS(descr)) {
PyObject *errmsg;
npy_int i;
@@ -874,11 +908,13 @@ VOID_setitem(PyObject *op, void *input, void *vap)
return -1;
}
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+
for (i = 0; i < names_size; i++) {
PyObject *item;
- /* temporarily make ap have only this field */
- if (_setup_field(i, descr, ap, &offset, ip) == -1) {
+ if (_setup_field(i, descr, dummy_arr, &offset, ip) == -1) {
failed = 1;
break;
}
@@ -888,7 +924,7 @@ VOID_setitem(PyObject *op, void *input, void *vap)
break;
}
/* use setitem to set this field */
- if (PyArray_SETITEM(ap, ip + offset, item) < 0) {
+ if (PyArray_SETITEM(dummy_arr, ip + offset, item) < 0) {
failed = 1;
break;
}
@@ -898,24 +934,23 @@ VOID_setitem(PyObject *op, void *input, void *vap)
/* Otherwise must be non-void scalar. Try to assign to each field */
npy_intp names_size = PyTuple_GET_SIZE(descr->names);
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+
for (i = 0; i < names_size; i++) {
/* temporarily make ap have only this field */
- if (_setup_field(i, descr, ap, &offset, ip) == -1) {
+ if (_setup_field(i, descr, dummy_arr, &offset, ip) == -1) {
failed = 1;
break;
}
/* use setitem to set this field */
- if (PyArray_SETITEM(ap, ip + offset, op) < 0) {
+ if (PyArray_SETITEM(dummy_arr, ip + offset, op) < 0) {
failed = 1;
break;
}
}
}
- /* reset clobbered attributes */
- ((PyArrayObject_fields *)(ap))->descr = descr;
- ((PyArrayObject_fields *)(ap))->flags = flags;
-
if (failed) {
return -1;
}
@@ -924,7 +959,6 @@ VOID_setitem(PyObject *op, void *input, void *vap)
else if (PyDataType_HASSUBARRAY(descr)) {
/* copy into an array of the same basic type */
PyArray_Dims shape = {NULL, -1};
- PyArrayObject *ret;
if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) {
npy_free_cache_dim_obj(shape);
PyErr_SetString(PyExc_ValueError,
@@ -932,10 +966,15 @@ VOID_setitem(PyObject *op, void *input, void *vap)
return -1;
}
Py_INCREF(descr->subarray->base);
- ret = (PyArrayObject *)PyArray_NewFromDescrAndBase(
+ /*
+ * Note we set no base object here, as to not rely on the input
+ * being a valid object for base setting. `ret` nevertheless does
+ * does not own its data, this is generally not good, but localized.
+ */
+ PyArrayObject *ret = (PyArrayObject *)PyArray_NewFromDescrAndBase(
&PyArray_Type, descr->subarray->base,
shape.len, shape.ptr, NULL, ip,
- PyArray_FLAGS(ap), NULL, (PyObject *)ap);
+ PyArray_FLAGS(ap), NULL, NULL);
npy_free_cache_dim_obj(shape);
if (!ret) {
return -1;
@@ -2287,6 +2326,7 @@ STRING_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
return;
}
+
/* */
static void
VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
@@ -2303,29 +2343,26 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
if (PyArray_HASFIELDS(arr)) {
PyObject *key, *value;
-
Py_ssize_t pos = 0;
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+
while (PyDict_Next(descr->fields, &pos, &key, &value)) {
npy_intp offset;
- PyArray_Descr * new;
+ PyArray_Descr *new;
if (NPY_TITLE_KEY(key, value)) {
continue;
}
if (_unpack_field(value, &new, &offset) < 0) {
- ((PyArrayObject_fields *)arr)->descr = descr;
return;
}
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)arr)->descr = new;
+
+ dummy_fields.descr = new;
new->f->copyswapn(dst+offset, dstride,
(src != NULL ? src+offset : NULL),
- sstride, n, swap, arr);
+ sstride, n, swap, dummy_arr);
}
- ((PyArrayObject_fields *)arr)->descr = descr;
return;
}
if (PyDataType_HASSUBARRAY(descr)) {
@@ -2351,11 +2388,6 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
}
new = descr->subarray->base;
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)arr)->descr = new;
dstptr = dst;
srcptr = src;
subitemsize = new->elsize;
@@ -2363,16 +2395,20 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
/* There cannot be any elements, so return */
return;
}
+
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+ ((PyArrayObject_fields *)dummy_arr)->descr = new;
+
num = descr->elsize / subitemsize;
for (i = 0; i < n; i++) {
new->f->copyswapn(dstptr, subitemsize, srcptr,
- subitemsize, num, swap, arr);
+ subitemsize, num, swap, dummy_arr);
dstptr += dstride;
if (srcptr) {
srcptr += sstride;
}
}
- ((PyArrayObject_fields *)arr)->descr = descr;
return;
}
/* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */
@@ -2396,26 +2432,24 @@ VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr)
PyObject *key, *value;
Py_ssize_t pos = 0;
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+
while (PyDict_Next(descr->fields, &pos, &key, &value)) {
npy_intp offset;
+
PyArray_Descr * new;
if (NPY_TITLE_KEY(key, value)) {
continue;
}
if (_unpack_field(value, &new, &offset) < 0) {
- ((PyArrayObject_fields *)arr)->descr = descr;
return;
}
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)arr)->descr = new;
+ dummy_fields.descr = new;
new->f->copyswap(dst+offset,
(src != NULL ? src+offset : NULL),
- swap, arr);
+ swap, dummy_arr);
}
- ((PyArrayObject_fields *)arr)->descr = descr;
return;
}
if (PyDataType_HASSUBARRAY(descr)) {
@@ -2439,20 +2473,19 @@ VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr)
}
new = descr->subarray->base;
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)arr)->descr = new;
subitemsize = new->elsize;
if (subitemsize == 0) {
/* There cannot be any elements, so return */
return;
}
+
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
+ dummy_fields.descr = new;
+
num = descr->elsize / subitemsize;
new->f->copyswapn(dst, subitemsize, src,
- subitemsize, num, swap, arr);
- ((PyArrayObject_fields *)arr)->descr = descr;
+ subitemsize, num, swap, dummy_arr);
return;
}
/* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */
@@ -2707,11 +2740,11 @@ VOID_nonzero (char *ip, PyArrayObject *ap)
if (PyArray_HASFIELDS(ap)) {
PyArray_Descr *descr;
PyObject *key, *value;
- int savedflags;
Py_ssize_t pos = 0;
+ PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap);
+ PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields;
descr = PyArray_DESCR(ap);
- savedflags = PyArray_FLAGS(ap);
while (PyDict_Next(descr->fields, &pos, &key, &value)) {
PyArray_Descr * new;
npy_intp offset;
@@ -2722,12 +2755,8 @@ VOID_nonzero (char *ip, PyArrayObject *ap)
PyErr_Clear();
continue;
}
- /*
- * TODO: temporarily modifying the array like this
- * is bad coding style, should be changed.
- */
- ((PyArrayObject_fields *)ap)->descr = new;
- ((PyArrayObject_fields *)ap)->flags = savedflags;
+
+ dummy_fields.descr = new;
if ((new->alignment > 1) && !__ALIGNED(ip + offset,
new->alignment)) {
PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED);
@@ -2735,13 +2764,11 @@ VOID_nonzero (char *ip, PyArrayObject *ap)
else {
PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED);
}
- if (new->f->nonzero(ip+offset, ap)) {
+ if (new->f->nonzero(ip+offset, dummy_arr)) {
nonz = NPY_TRUE;
break;
}
}
- ((PyArrayObject_fields *)ap)->descr = descr;
- ((PyArrayObject_fields *)ap)->flags = savedflags;
return nonz;
}
len = PyArray_DESCR(ap)->elsize;
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index 73dbc429c..57b1f3827 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -563,6 +563,30 @@ class TestIndexing:
with pytest.raises(IndexError):
arr[(index,) * num] = 1.
+ def test_structured_advanced_indexing(self):
+ # Test that copyswap(n) used by integer array indexing is threadsafe
+ # for structured datatypes, see gh-15387. This test can behave randomly.
+ from concurrent.futures import ThreadPoolExecutor
+
+ # Create a deeply nested dtype to make a failure more likely:
+ dt = np.dtype([("", "f8")])
+ dt = np.dtype([("", dt)] * 2)
+ dt = np.dtype([("", dt)] * 2)
+ # The array should be large enough to likely run into threading issues
+ arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0]
+
+ rng = np.random.default_rng()
+ def func(arr):
+ indx = rng.integers(0, len(arr), size=6000, dtype=np.intp)
+ arr[indx]
+
+ tpe = ThreadPoolExecutor(max_workers=8)
+ futures = [tpe.submit(func, arr) for _ in range(10)]
+ for f in futures:
+ f.result()
+
+ assert arr.dtype is dt
+
class TestFieldIndexing:
def test_scalar_return_type(self):
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index 06511822e..8d3cec708 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -1536,6 +1536,27 @@ class TestNonzero:
a = np.array([[ThrowsAfter(15)]]*10)
assert_raises(ValueError, np.nonzero, a)
+ def test_structured_threadsafety(self):
+ # Nonzero (and some other functions) should be threadsafe for
+ # structured datatypes, see gh-15387. This test can behave randomly.
+ from concurrent.futures import ThreadPoolExecutor
+
+ # Create a deeply nested dtype to make a failure more likely:
+ dt = np.dtype([("", "f8")])
+ dt = np.dtype([("", dt)])
+ dt = np.dtype([("", dt)] * 2)
+ # The array should be large enough to likely run into threading issues
+ arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0]
+ def func(arr):
+ arr.nonzero()
+
+ tpe = ThreadPoolExecutor(max_workers=8)
+ futures = [tpe.submit(func, arr) for _ in range(10)]
+ for f in futures:
+ f.result()
+
+ assert arr.dtype is dt
+
class TestIndex:
def test_boolean(self):