diff options
author | Matti Picus <matti.picus@gmail.com> | 2021-02-19 16:15:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-19 16:15:29 +0200 |
commit | 572d5a1dadb398d0b7d27468230f186f72987b3b (patch) | |
tree | e44737bedfb3072382cb8cc911f505e409f385d2 | |
parent | 02d6e9a4b4bae1eca7896fdba043575ef3aaf038 (diff) | |
parent | e01d89471b0c4a69f7d1e868af31af7220f90318 (diff) | |
download | numpy-572d5a1dadb398d0b7d27468230f186f72987b3b.tar.gz |
Merge pull request #15392 from seberg/void-no-arr-modification
BUG: Remove temporary change of descr/flags in VOID functions
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 189 | ||||
-rw-r--r-- | numpy/core/tests/test_indexing.py | 24 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 21 |
3 files changed, 153 insertions, 81 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index ecaca72a1..ad7461227 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -42,6 +42,32 @@ #include "npy_cblas.h" #include "npy_buffer.h" + +/* + * Define a stack allocated dummy array with only the minimum information set: + * 1. The descr, the main field interesting here. + * 2. The flags, which are needed for alignment;. + * 3. The type is set to NULL and the base is the original array, if this + * is used within a subarray getitem to create a new view, the base + * must be walked until the type is not NULL. + * + * The following should create errors in debug mode (if deallocated + * incorrectly), since base would be incorrectly decref'd as well. + * This is especially important for nonzero and copyswap, which may run with + * the GIL released. + */ +static NPY_INLINE PyArrayObject_fields +get_dummy_stack_array(PyArrayObject *orig) +{ + PyArrayObject_fields new_fields; + new_fields.flags = PyArray_FLAGS(orig); + /* Set to NULL so the dummy object can be distinguished from the real one */ + Py_TYPE(&new_fields) = NULL; + new_fields.base = (PyObject *)orig; + return new_fields; +} + + /* check for sequences, but ignore the types numpy considers scalars */ static NPY_INLINE npy_bool PySequence_NoString_Check(PyObject *op) { @@ -674,6 +700,7 @@ OBJECT_setitem(PyObject *op, void *ov, void *NPY_UNUSED(ap)) return PyErr_Occurred() ? -1 : 0; } + /* VOID */ static PyObject * @@ -681,22 +708,21 @@ VOID_getitem(void *input, void *vap) { PyArrayObject *ap = vap; char *ip = input; - PyArray_Descr* descr; + PyArray_Descr* descr = PyArray_DESCR(vap); - descr = PyArray_DESCR(ap); if (PyDataType_HASFIELDS(descr)) { PyObject *key; PyObject *names; int i, n; PyObject *ret; PyObject *tup; - int savedflags; + PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; /* get the names from the fields dictionary*/ names = descr->names; n = PyTuple_GET_SIZE(names); ret = PyTuple_New(n); - savedflags = PyArray_FLAGS(ap); for (i = 0; i < n; i++) { npy_intp offset; PyArray_Descr *new; @@ -704,26 +730,19 @@ VOID_getitem(void *input, void *vap) tup = PyDict_GetItem(descr->fields, key); if (_unpack_field(tup, &new, &offset) < 0) { Py_DECREF(ret); - ((PyArrayObject_fields *)ap)->descr = descr; return NULL; } - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)ap)->descr = new; + dummy_fields.descr = new; /* update alignment based on offset */ if ((new->alignment > 1) && ((((npy_intp)(ip+offset)) % new->alignment) != 0)) { - PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED); + PyArray_CLEARFLAGS(dummy_arr, NPY_ARRAY_ALIGNED); } else { - PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED); + PyArray_ENABLEFLAGS(dummy_arr, NPY_ARRAY_ALIGNED); } - PyTuple_SET_ITEM(ret, i, PyArray_GETITEM(ap, ip+offset)); - ((PyArrayObject_fields *)ap)->flags = savedflags; + PyTuple_SET_ITEM(ret, i, PyArray_GETITEM(dummy_arr, ip+offset)); } - ((PyArrayObject_fields *)ap)->descr = descr; return ret; } @@ -739,11 +758,28 @@ VOID_getitem(void *input, void *vap) return NULL; } Py_INCREF(descr->subarray->base); + + /* + * NOTE: There is the possibility of recursive calls from the above + * field branch. These calls use a dummy arr for thread + * (and general) safety. However, we must set the base array, + * so if such a dummy array was passed (its type is NULL), + * we have walk its base until the initial array is found. + * + * TODO: This should be fixed, the next "generation" of GETITEM will + * probably need to pass in the original array (in addition + * to the dtype as a method). Alternatively, VOID dtypes + * could have special handling. + */ + PyObject *base = (PyObject *)ap; + while (Py_TYPE(base) == NULL) { + base = PyArray_BASE((PyArrayObject *)base); + } ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( &PyArray_Type, descr->subarray->base, shape.len, shape.ptr, NULL, ip, PyArray_FLAGS(ap) & ~NPY_ARRAY_F_CONTIGUOUS, - NULL, (PyObject *)ap); + NULL, base); npy_free_cache_dim_obj(shape); return (PyObject *)ret; } @@ -761,7 +797,8 @@ NPY_NO_EXPORT int PyArray_CopyObject(PyArrayObject *, PyObject *); * individual fields of a numpy structure, in VOID_setitem. Compare to inner * loops in VOID_getitem and VOID_nonzero. * - * WARNING: Clobbers arr's dtype and alignment flag. + * WARNING: Clobbers arr's dtype and alignment flag, should not be used + * on the original array! */ NPY_NO_EXPORT int _setup_field(int i, PyArray_Descr *descr, PyArrayObject *arr, @@ -798,7 +835,7 @@ static int _copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata, PyArray_Descr *srcdescr, char *srcdata){ PyArrayObject_fields dummy_struct; - PyArrayObject *dummy = (PyArrayObject *)&dummy_struct; + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_struct; npy_int names_size = PyTuple_GET_SIZE(dstdescr->names); npy_intp offset; npy_int i; @@ -808,11 +845,11 @@ _copy_and_return_void_setitem(PyArray_Descr *dstdescr, char *dstdata, if (PyArray_EquivTypes(srcdescr, dstdescr)) { for (i = 0; i < names_size; i++) { /* neither line can ever fail, in principle */ - if (_setup_field(i, dstdescr, dummy, &offset, dstdata)) { + if (_setup_field(i, dstdescr, dummy_arr, &offset, dstdata)) { return -1; } - PyArray_DESCR(dummy)->f->copyswap(dstdata + offset, - srcdata + offset, 0, dummy); + PyArray_DESCR(dummy_arr)->f->copyswap(dstdata + offset, + srcdata + offset, 0, dummy_arr); } return 0; } @@ -831,13 +868,10 @@ VOID_setitem(PyObject *op, void *input, void *vap) { char *ip = input; PyArrayObject *ap = vap; - PyArray_Descr *descr; - int flags; - int itemsize=PyArray_DESCR(ap)->elsize; + int itemsize = PyArray_DESCR(ap)->elsize; int res; + PyArray_Descr *descr = PyArray_DESCR(ap); - descr = PyArray_DESCR(ap); - flags = PyArray_FLAGS(ap); if (PyDataType_HASFIELDS(descr)) { PyObject *errmsg; npy_int i; @@ -874,11 +908,13 @@ VOID_setitem(PyObject *op, void *input, void *vap) return -1; } + PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + for (i = 0; i < names_size; i++) { PyObject *item; - /* temporarily make ap have only this field */ - if (_setup_field(i, descr, ap, &offset, ip) == -1) { + if (_setup_field(i, descr, dummy_arr, &offset, ip) == -1) { failed = 1; break; } @@ -888,7 +924,7 @@ VOID_setitem(PyObject *op, void *input, void *vap) break; } /* use setitem to set this field */ - if (PyArray_SETITEM(ap, ip + offset, item) < 0) { + if (PyArray_SETITEM(dummy_arr, ip + offset, item) < 0) { failed = 1; break; } @@ -898,24 +934,23 @@ VOID_setitem(PyObject *op, void *input, void *vap) /* Otherwise must be non-void scalar. Try to assign to each field */ npy_intp names_size = PyTuple_GET_SIZE(descr->names); + PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + for (i = 0; i < names_size; i++) { /* temporarily make ap have only this field */ - if (_setup_field(i, descr, ap, &offset, ip) == -1) { + if (_setup_field(i, descr, dummy_arr, &offset, ip) == -1) { failed = 1; break; } /* use setitem to set this field */ - if (PyArray_SETITEM(ap, ip + offset, op) < 0) { + if (PyArray_SETITEM(dummy_arr, ip + offset, op) < 0) { failed = 1; break; } } } - /* reset clobbered attributes */ - ((PyArrayObject_fields *)(ap))->descr = descr; - ((PyArrayObject_fields *)(ap))->flags = flags; - if (failed) { return -1; } @@ -924,7 +959,6 @@ VOID_setitem(PyObject *op, void *input, void *vap) else if (PyDataType_HASSUBARRAY(descr)) { /* copy into an array of the same basic type */ PyArray_Dims shape = {NULL, -1}; - PyArrayObject *ret; if (!(PyArray_IntpConverter(descr->subarray->shape, &shape))) { npy_free_cache_dim_obj(shape); PyErr_SetString(PyExc_ValueError, @@ -932,10 +966,15 @@ VOID_setitem(PyObject *op, void *input, void *vap) return -1; } Py_INCREF(descr->subarray->base); - ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( + /* + * Note we set no base object here, as to not rely on the input + * being a valid object for base setting. `ret` nevertheless does + * does not own its data, this is generally not good, but localized. + */ + PyArrayObject *ret = (PyArrayObject *)PyArray_NewFromDescrAndBase( &PyArray_Type, descr->subarray->base, shape.len, shape.ptr, NULL, ip, - PyArray_FLAGS(ap), NULL, (PyObject *)ap); + PyArray_FLAGS(ap), NULL, NULL); npy_free_cache_dim_obj(shape); if (!ret) { return -1; @@ -2287,6 +2326,7 @@ STRING_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, return; } + /* */ static void VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, @@ -2303,29 +2343,26 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, if (PyArray_HASFIELDS(arr)) { PyObject *key, *value; - Py_ssize_t pos = 0; + PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + while (PyDict_Next(descr->fields, &pos, &key, &value)) { npy_intp offset; - PyArray_Descr * new; + PyArray_Descr *new; if (NPY_TITLE_KEY(key, value)) { continue; } if (_unpack_field(value, &new, &offset) < 0) { - ((PyArrayObject_fields *)arr)->descr = descr; return; } - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)arr)->descr = new; + + dummy_fields.descr = new; new->f->copyswapn(dst+offset, dstride, (src != NULL ? src+offset : NULL), - sstride, n, swap, arr); + sstride, n, swap, dummy_arr); } - ((PyArrayObject_fields *)arr)->descr = descr; return; } if (PyDataType_HASSUBARRAY(descr)) { @@ -2351,11 +2388,6 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, } new = descr->subarray->base; - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)arr)->descr = new; dstptr = dst; srcptr = src; subitemsize = new->elsize; @@ -2363,16 +2395,20 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride, /* There cannot be any elements, so return */ return; } + + PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + ((PyArrayObject_fields *)dummy_arr)->descr = new; + num = descr->elsize / subitemsize; for (i = 0; i < n; i++) { new->f->copyswapn(dstptr, subitemsize, srcptr, - subitemsize, num, swap, arr); + subitemsize, num, swap, dummy_arr); dstptr += dstride; if (srcptr) { srcptr += sstride; } } - ((PyArrayObject_fields *)arr)->descr = descr; return; } /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */ @@ -2396,26 +2432,24 @@ VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr) PyObject *key, *value; Py_ssize_t pos = 0; + PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + while (PyDict_Next(descr->fields, &pos, &key, &value)) { npy_intp offset; + PyArray_Descr * new; if (NPY_TITLE_KEY(key, value)) { continue; } if (_unpack_field(value, &new, &offset) < 0) { - ((PyArrayObject_fields *)arr)->descr = descr; return; } - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)arr)->descr = new; + dummy_fields.descr = new; new->f->copyswap(dst+offset, (src != NULL ? src+offset : NULL), - swap, arr); + swap, dummy_arr); } - ((PyArrayObject_fields *)arr)->descr = descr; return; } if (PyDataType_HASSUBARRAY(descr)) { @@ -2439,20 +2473,19 @@ VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr) } new = descr->subarray->base; - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)arr)->descr = new; subitemsize = new->elsize; if (subitemsize == 0) { /* There cannot be any elements, so return */ return; } + + PyArrayObject_fields dummy_fields = get_dummy_stack_array(arr); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; + dummy_fields.descr = new; + num = descr->elsize / subitemsize; new->f->copyswapn(dst, subitemsize, src, - subitemsize, num, swap, arr); - ((PyArrayObject_fields *)arr)->descr = descr; + subitemsize, num, swap, dummy_arr); return; } /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */ @@ -2707,11 +2740,11 @@ VOID_nonzero (char *ip, PyArrayObject *ap) if (PyArray_HASFIELDS(ap)) { PyArray_Descr *descr; PyObject *key, *value; - int savedflags; Py_ssize_t pos = 0; + PyArrayObject_fields dummy_fields = get_dummy_stack_array(ap); + PyArrayObject *dummy_arr = (PyArrayObject *)&dummy_fields; descr = PyArray_DESCR(ap); - savedflags = PyArray_FLAGS(ap); while (PyDict_Next(descr->fields, &pos, &key, &value)) { PyArray_Descr * new; npy_intp offset; @@ -2722,12 +2755,8 @@ VOID_nonzero (char *ip, PyArrayObject *ap) PyErr_Clear(); continue; } - /* - * TODO: temporarily modifying the array like this - * is bad coding style, should be changed. - */ - ((PyArrayObject_fields *)ap)->descr = new; - ((PyArrayObject_fields *)ap)->flags = savedflags; + + dummy_fields.descr = new; if ((new->alignment > 1) && !__ALIGNED(ip + offset, new->alignment)) { PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED); @@ -2735,13 +2764,11 @@ VOID_nonzero (char *ip, PyArrayObject *ap) else { PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED); } - if (new->f->nonzero(ip+offset, ap)) { + if (new->f->nonzero(ip+offset, dummy_arr)) { nonz = NPY_TRUE; break; } } - ((PyArrayObject_fields *)ap)->descr = descr; - ((PyArrayObject_fields *)ap)->flags = savedflags; return nonz; } len = PyArray_DESCR(ap)->elsize; diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index 73dbc429c..57b1f3827 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -563,6 +563,30 @@ class TestIndexing: with pytest.raises(IndexError): arr[(index,) * num] = 1. + def test_structured_advanced_indexing(self): + # Test that copyswap(n) used by integer array indexing is threadsafe + # for structured datatypes, see gh-15387. This test can behave randomly. + from concurrent.futures import ThreadPoolExecutor + + # Create a deeply nested dtype to make a failure more likely: + dt = np.dtype([("", "f8")]) + dt = np.dtype([("", dt)] * 2) + dt = np.dtype([("", dt)] * 2) + # The array should be large enough to likely run into threading issues + arr = np.random.uniform(size=(6000, 8)).view(dt)[:, 0] + + rng = np.random.default_rng() + def func(arr): + indx = rng.integers(0, len(arr), size=6000, dtype=np.intp) + arr[indx] + + tpe = ThreadPoolExecutor(max_workers=8) + futures = [tpe.submit(func, arr) for _ in range(10)] + for f in futures: + f.result() + + assert arr.dtype is dt + class TestFieldIndexing: def test_scalar_return_type(self): diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 06511822e..8d3cec708 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1536,6 +1536,27 @@ class TestNonzero: a = np.array([[ThrowsAfter(15)]]*10) assert_raises(ValueError, np.nonzero, a) + def test_structured_threadsafety(self): + # Nonzero (and some other functions) should be threadsafe for + # structured datatypes, see gh-15387. This test can behave randomly. + from concurrent.futures import ThreadPoolExecutor + + # Create a deeply nested dtype to make a failure more likely: + dt = np.dtype([("", "f8")]) + dt = np.dtype([("", dt)]) + dt = np.dtype([("", dt)] * 2) + # The array should be large enough to likely run into threading issues + arr = np.random.uniform(size=(5000, 4)).view(dt)[:, 0] + def func(arr): + arr.nonzero() + + tpe = ThreadPoolExecutor(max_workers=8) + futures = [tpe.submit(func, arr) for _ in range(10)] + for f in futures: + f.result() + + assert arr.dtype is dt + class TestIndex: def test_boolean(self): |