diff options
-rw-r--r-- | doc/release/upcoming_changes/20993.improvement.rst | 5 | ||||
-rw-r--r-- | numpy/core/_add_newdocs.py | 17 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 17 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 9 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 95 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 73 |
6 files changed, 140 insertions, 76 deletions
diff --git a/doc/release/upcoming_changes/20993.improvement.rst b/doc/release/upcoming_changes/20993.improvement.rst new file mode 100644 index 000000000..f0019c45e --- /dev/null +++ b/doc/release/upcoming_changes/20993.improvement.rst @@ -0,0 +1,5 @@ +``np.fromiter`` now accepts objects and subarrays +------------------------------------------------- +The `~numpy.fromiter` function now supports object and +subarray dtypes. Please see he function documentation for +examples. diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index dc0285a11..baafc9127 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1398,6 +1398,11 @@ add_newdoc('numpy.core.multiarray', 'fromiter', An iterable object providing data for the array. dtype : data-type The data-type of the returned array. + + .. versionchanged:: 1.23 + Object and subarray dtypes are now supported (note that the final + result is not 1-D for a subarray dtype). + count : int, optional The number of items to read from *iterable*. The default is -1, which means all data is read. @@ -1421,6 +1426,18 @@ add_newdoc('numpy.core.multiarray', 'fromiter', >>> np.fromiter(iterable, float) array([ 0., 1., 4., 9., 16.]) + A carefully constructed subarray dtype will lead to higher dimensional + results: + + >>> iterable = ((x+1, x+2) for x in range(5)) + >>> np.fromiter(iterable, dtype=np.dtype((int, 2))) + array([[1, 2], + [2, 3], + [3, 4], + [4, 5], + [5, 6]]) + + """.replace( "${ARRAY_FUNCTION_LIKE}", array_function_like_doc, diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 8264f83b2..aa612146c 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -127,23 +127,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) return 0; } -NPY_NO_EXPORT char * -index2ptr(PyArrayObject *mp, npy_intp i) -{ - npy_intp dim0; - - if (PyArray_NDIM(mp) == 0) { - PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed"); - return NULL; - } - dim0 = PyArray_DIMS(mp)[0]; - if (check_and_adjust_index(&i, dim0, 0, NULL) < 0) - return NULL; - if (i == 0) { - return PyArray_DATA(mp); - } - return PyArray_BYTES(mp)+i*PyArray_STRIDES(mp)[0]; -} NPY_NO_EXPORT int _zerofill(PyArrayObject *ret) diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index ed022e4f8..30a61f425 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -43,9 +43,6 @@ NPY_NO_EXPORT int PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype); -NPY_NO_EXPORT int -PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, - PyArray_Descr **out_dtype, int string_status); /* * Returns NULL without setting an exception if no scalar is matched, a @@ -54,12 +51,6 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims, NPY_NO_EXPORT PyArray_Descr * _array_find_python_scalar_type(PyObject *op); -NPY_NO_EXPORT PyArray_Descr * -_array_typedescr_fromstr(char const *str); - -NPY_NO_EXPORT char * -index2ptr(PyArrayObject *mp, npy_intp i); - NPY_NO_EXPORT int _zerofill(PyArrayObject *ret); diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index f72ba11cd..c0e80d1ee 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -3894,11 +3894,9 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype, NPY_NO_EXPORT PyObject * PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) { - PyObject *value; PyObject *iter = NULL; PyArrayObject *ret = NULL; npy_intp i, elsize, elcount; - char *item, *new_data; if (dtype == NULL) { return NULL; @@ -3910,6 +3908,7 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) } if (PyDataType_ISUNSIZED(dtype)) { + /* If this error is removed, the `ret` allocation may need fixing */ PyErr_SetString(PyExc_ValueError, "Must specify length when using variable-size data-type."); goto done; @@ -3927,38 +3926,50 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) elsize = dtype->elsize; /* - * We would need to alter the memory RENEW code to decrement any - * reference counts before throwing away any memory. + * Note that PyArray_DESCR(ret) may not match dtype. There are exactly + * two cases where this can happen: empty strings/bytes/void (rejected + * above) and subarray dtypes (supported by sticking with `dtype`). */ - if (PyDataType_REFCHK(dtype)) { - PyErr_SetString(PyExc_ValueError, - "cannot create object arrays from iterator"); - goto done; - } - + Py_INCREF(dtype); ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1, &elcount, NULL,NULL, 0, NULL); - dtype = NULL; if (ret == NULL) { goto done; } - for (i = 0; (i < count || count == -1) && - (value = PyIter_Next(iter)); i++) { - if (i >= elcount && elsize != 0) { +#ifdef NPY_RELAXED_STRIDES_DEBUG + /* Incompatible with NPY_RELAXED_STRIDES_DEBUG due to growing */ + if (elcount == 1) { + PyArray_STRIDES(ret)[0] = elsize; + } +#endif /* NPY_RELAXED_STRIDES_DEBUG */ + + + char *item = PyArray_BYTES(ret); + for (i = 0; i < count || count == -1; i++, item += elsize) { + PyObject *value = PyIter_Next(iter); + if (value == NULL) { + if (PyErr_Occurred()) { + /* Fetching next item failed rather than exhausting iterator */ + goto done; + } + break; + } + + if (NPY_UNLIKELY(i >= elcount) && elsize != 0) { + char *new_data = NULL; npy_intp nbytes; /* Grow PyArray_DATA(ret): this is similar for the strategy for PyListObject, but we use 50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ... + TODO: The loadtxt code now uses a `growth` helper that would + be suitable to reuse here. */ elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) { /* The handler is always valid */ - new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), nbytes, - PyArray_HANDLER(ret)); - } - else { - new_data = NULL; + new_data = PyDataMem_UserRENEW( + PyArray_BYTES(ret), nbytes, PyArray_HANDLER(ret)); } if (new_data == NULL) { PyErr_SetString(PyExc_MemoryError, @@ -3967,11 +3978,17 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) goto done; } ((PyArrayObject_fields *)ret)->data = new_data; + /* resize array for cleanup: */ + PyArray_DIMS(ret)[0] = elcount; + /* Reset `item` pointer to point into realloc'd chunk */ + item = new_data + i * elsize; + if (PyDataType_FLAGCHK(dtype, NPY_NEEDS_INIT)) { + /* Initialize new chunk: */ + memset(item, 0, nbytes - i * elsize); + } } - PyArray_DIMS(ret)[0] = i + 1; - if (((item = index2ptr(ret, i)) == NULL) || - PyArray_SETITEM(ret, item, value) == -1) { + if (PyArray_Pack(dtype, item, value) < 0) { Py_DECREF(value); goto done; } @@ -3979,32 +3996,34 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) } - if (PyErr_Occurred()) { - goto done; - } if (i < count) { - PyErr_SetString(PyExc_ValueError, - "iterator too short"); + PyErr_Format(PyExc_ValueError, + "iterator too short: Expected %zd but iterator had only %zd " + "items.", (Py_ssize_t)count, (Py_ssize_t)i); goto done; } /* - * Realloc the data so that don't keep extra memory tied up - * (assuming realloc is reasonably good about reusing space...) + * Realloc the data so that don't keep extra memory tied up and fix + * the arrays first dimension (there could be more than one). */ if (i == 0 || elsize == 0) { /* The size cannot be zero for realloc. */ - goto done; } - /* The handler is always valid */ - new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), i * elsize, - PyArray_HANDLER(ret)); - if (new_data == NULL) { - PyErr_SetString(PyExc_MemoryError, - "cannot allocate array memory"); - goto done; + else { + /* Resize array to actual final size (it may be too large) */ + /* The handler is always valid */ + char *new_data = PyDataMem_UserRENEW( + PyArray_DATA(ret), i * elsize, PyArray_HANDLER(ret)); + + if (new_data == NULL) { + PyErr_SetString(PyExc_MemoryError, + "cannot allocate array memory"); + goto done; + } + ((PyArrayObject_fields *)ret)->data = new_data; } - ((PyArrayObject_fields *)ret)->data = new_data; + PyArray_DIMS(ret)[0] = i; done: Py_XDECREF(iter); diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index ad9437911..165fcbce6 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1202,19 +1202,68 @@ class TestFromiter: raise NIterError('error at index %s' % eindex) yield e - def test_2592(self): - # Test iteration exceptions are correctly raised. - count, eindex = 10, 5 - assert_raises(NIterError, np.fromiter, - self.load_data(count, eindex), dtype=int, count=count) - - def test_2592_edge(self): - # Test iter. exceptions, edge case (exception at end of iterator). - count = 10 - eindex = count-1 - assert_raises(NIterError, np.fromiter, - self.load_data(count, eindex), dtype=int, count=count) + @pytest.mark.parametrize("dtype", [int, object]) + @pytest.mark.parametrize(["count", "error_index"], [(10, 5), (10, 9)]) + def test_2592(self, count, error_index, dtype): + # Test iteration exceptions are correctly raised. The data/generator + # has `count` elements but errors at `error_index` + iterable = self.load_data(count, error_index) + with pytest.raises(NIterError): + np.fromiter(iterable, dtype=dtype, count=count) + + @pytest.mark.parametrize("dtype", ["S", "S0", "V0", "U0"]) + def test_empty_not_structured(self, dtype): + # Note, "S0" could be allowed at some point, so long "S" (without + # any length) is rejected. + with pytest.raises(ValueError, match="Must specify length"): + np.fromiter([], dtype=dtype) + @pytest.mark.parametrize("dtype", + # Note that `np.dtype(("O", (10, 5)))` is a subarray dtype + ["d", "i,O", np.dtype(("O", (10, 5))), "O"]) + def test_growth_and_complicated_dtypes(self, dtype): + dtype = np.dtype(dtype) + data = [1, 2, 3, 4, 5, 6, 7, 8, 9] * 100 # make sure we realloc a bit + + class MyIter: + # Class/example from gh-15789 + def __length_hint__(self): + # only required to be an estimate, this is legal + return 1 + + def __iter__(self): + return iter(data) + + res = np.fromiter(MyIter(), dtype=dtype) + expected = np.array(data, dtype=dtype) + + assert_array_equal(res, expected) + + def test_empty_result(self): + class MyIter: + def __length_hint__(self): + return 10 + + def __iter__(self): + return iter([]) # actual iterator is empty. + + res = np.fromiter(MyIter(), dtype="d") + assert res.shape == (0,) + assert res.dtype == "d" + + def test_too_few_items(self): + msg = "iterator too short: Expected 10 but iterator had only 3 items." + with pytest.raises(ValueError, match=msg): + np.fromiter([1, 2, 3], count=10, dtype=int) + + def test_failed_itemsetting(self): + with pytest.raises(TypeError): + np.fromiter([1, None, 3], dtype=int) + + # The following manages to hit somewhat trickier code paths: + iterable = ((2, 3, 4) for i in range(5)) + with pytest.raises(ValueError): + np.fromiter(iterable, dtype=np.dtype((int, 2))) class TestNonzero: def test_nonzero_trivial(self): |