summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/20993.improvement.rst5
-rw-r--r--numpy/core/_add_newdocs.py17
-rw-r--r--numpy/core/src/multiarray/common.c17
-rw-r--r--numpy/core/src/multiarray/common.h9
-rw-r--r--numpy/core/src/multiarray/ctors.c95
-rw-r--r--numpy/core/tests/test_numeric.py73
6 files changed, 140 insertions, 76 deletions
diff --git a/doc/release/upcoming_changes/20993.improvement.rst b/doc/release/upcoming_changes/20993.improvement.rst
new file mode 100644
index 000000000..f0019c45e
--- /dev/null
+++ b/doc/release/upcoming_changes/20993.improvement.rst
@@ -0,0 +1,5 @@
+``np.fromiter`` now accepts objects and subarrays
+-------------------------------------------------
+The `~numpy.fromiter` function now supports object and
+subarray dtypes. Please see he function documentation for
+examples.
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index dc0285a11..baafc9127 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -1398,6 +1398,11 @@ add_newdoc('numpy.core.multiarray', 'fromiter',
An iterable object providing data for the array.
dtype : data-type
The data-type of the returned array.
+
+ .. versionchanged:: 1.23
+ Object and subarray dtypes are now supported (note that the final
+ result is not 1-D for a subarray dtype).
+
count : int, optional
The number of items to read from *iterable*. The default is -1,
which means all data is read.
@@ -1421,6 +1426,18 @@ add_newdoc('numpy.core.multiarray', 'fromiter',
>>> np.fromiter(iterable, float)
array([ 0., 1., 4., 9., 16.])
+ A carefully constructed subarray dtype will lead to higher dimensional
+ results:
+
+ >>> iterable = ((x+1, x+2) for x in range(5))
+ >>> np.fromiter(iterable, dtype=np.dtype((int, 2)))
+ array([[1, 2],
+ [2, 3],
+ [3, 4],
+ [4, 5],
+ [5, 6]])
+
+
""".replace(
"${ARRAY_FUNCTION_LIKE}",
array_function_like_doc,
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 8264f83b2..aa612146c 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -127,23 +127,6 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
return 0;
}
-NPY_NO_EXPORT char *
-index2ptr(PyArrayObject *mp, npy_intp i)
-{
- npy_intp dim0;
-
- if (PyArray_NDIM(mp) == 0) {
- PyErr_SetString(PyExc_IndexError, "0-d arrays can't be indexed");
- return NULL;
- }
- dim0 = PyArray_DIMS(mp)[0];
- if (check_and_adjust_index(&i, dim0, 0, NULL) < 0)
- return NULL;
- if (i == 0) {
- return PyArray_DATA(mp);
- }
- return PyArray_BYTES(mp)+i*PyArray_STRIDES(mp)[0];
-}
NPY_NO_EXPORT int
_zerofill(PyArrayObject *ret)
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index ed022e4f8..30a61f425 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -43,9 +43,6 @@ NPY_NO_EXPORT int
PyArray_DTypeFromObject(PyObject *obj, int maxdims,
PyArray_Descr **out_dtype);
-NPY_NO_EXPORT int
-PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
- PyArray_Descr **out_dtype, int string_status);
/*
* Returns NULL without setting an exception if no scalar is matched, a
@@ -54,12 +51,6 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
NPY_NO_EXPORT PyArray_Descr *
_array_find_python_scalar_type(PyObject *op);
-NPY_NO_EXPORT PyArray_Descr *
-_array_typedescr_fromstr(char const *str);
-
-NPY_NO_EXPORT char *
-index2ptr(PyArrayObject *mp, npy_intp i);
-
NPY_NO_EXPORT int
_zerofill(PyArrayObject *ret);
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index f72ba11cd..c0e80d1ee 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -3894,11 +3894,9 @@ PyArray_FromString(char *data, npy_intp slen, PyArray_Descr *dtype,
NPY_NO_EXPORT PyObject *
PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
{
- PyObject *value;
PyObject *iter = NULL;
PyArrayObject *ret = NULL;
npy_intp i, elsize, elcount;
- char *item, *new_data;
if (dtype == NULL) {
return NULL;
@@ -3910,6 +3908,7 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
}
if (PyDataType_ISUNSIZED(dtype)) {
+ /* If this error is removed, the `ret` allocation may need fixing */
PyErr_SetString(PyExc_ValueError,
"Must specify length when using variable-size data-type.");
goto done;
@@ -3927,38 +3926,50 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
elsize = dtype->elsize;
/*
- * We would need to alter the memory RENEW code to decrement any
- * reference counts before throwing away any memory.
+ * Note that PyArray_DESCR(ret) may not match dtype. There are exactly
+ * two cases where this can happen: empty strings/bytes/void (rejected
+ * above) and subarray dtypes (supported by sticking with `dtype`).
*/
- if (PyDataType_REFCHK(dtype)) {
- PyErr_SetString(PyExc_ValueError,
- "cannot create object arrays from iterator");
- goto done;
- }
-
+ Py_INCREF(dtype);
ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, 1,
&elcount, NULL,NULL, 0, NULL);
- dtype = NULL;
if (ret == NULL) {
goto done;
}
- for (i = 0; (i < count || count == -1) &&
- (value = PyIter_Next(iter)); i++) {
- if (i >= elcount && elsize != 0) {
+#ifdef NPY_RELAXED_STRIDES_DEBUG
+ /* Incompatible with NPY_RELAXED_STRIDES_DEBUG due to growing */
+ if (elcount == 1) {
+ PyArray_STRIDES(ret)[0] = elsize;
+ }
+#endif /* NPY_RELAXED_STRIDES_DEBUG */
+
+
+ char *item = PyArray_BYTES(ret);
+ for (i = 0; i < count || count == -1; i++, item += elsize) {
+ PyObject *value = PyIter_Next(iter);
+ if (value == NULL) {
+ if (PyErr_Occurred()) {
+ /* Fetching next item failed rather than exhausting iterator */
+ goto done;
+ }
+ break;
+ }
+
+ if (NPY_UNLIKELY(i >= elcount) && elsize != 0) {
+ char *new_data = NULL;
npy_intp nbytes;
/*
Grow PyArray_DATA(ret):
this is similar for the strategy for PyListObject, but we use
50% overallocation => 0, 4, 8, 14, 23, 36, 56, 86 ...
+ TODO: The loadtxt code now uses a `growth` helper that would
+ be suitable to reuse here.
*/
elcount = (i >> 1) + (i < 4 ? 4 : 2) + i;
if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) {
/* The handler is always valid */
- new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), nbytes,
- PyArray_HANDLER(ret));
- }
- else {
- new_data = NULL;
+ new_data = PyDataMem_UserRENEW(
+ PyArray_BYTES(ret), nbytes, PyArray_HANDLER(ret));
}
if (new_data == NULL) {
PyErr_SetString(PyExc_MemoryError,
@@ -3967,11 +3978,17 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
goto done;
}
((PyArrayObject_fields *)ret)->data = new_data;
+ /* resize array for cleanup: */
+ PyArray_DIMS(ret)[0] = elcount;
+ /* Reset `item` pointer to point into realloc'd chunk */
+ item = new_data + i * elsize;
+ if (PyDataType_FLAGCHK(dtype, NPY_NEEDS_INIT)) {
+ /* Initialize new chunk: */
+ memset(item, 0, nbytes - i * elsize);
+ }
}
- PyArray_DIMS(ret)[0] = i + 1;
- if (((item = index2ptr(ret, i)) == NULL) ||
- PyArray_SETITEM(ret, item, value) == -1) {
+ if (PyArray_Pack(dtype, item, value) < 0) {
Py_DECREF(value);
goto done;
}
@@ -3979,32 +3996,34 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
}
- if (PyErr_Occurred()) {
- goto done;
- }
if (i < count) {
- PyErr_SetString(PyExc_ValueError,
- "iterator too short");
+ PyErr_Format(PyExc_ValueError,
+ "iterator too short: Expected %zd but iterator had only %zd "
+ "items.", (Py_ssize_t)count, (Py_ssize_t)i);
goto done;
}
/*
- * Realloc the data so that don't keep extra memory tied up
- * (assuming realloc is reasonably good about reusing space...)
+ * Realloc the data so that don't keep extra memory tied up and fix
+ * the arrays first dimension (there could be more than one).
*/
if (i == 0 || elsize == 0) {
/* The size cannot be zero for realloc. */
- goto done;
}
- /* The handler is always valid */
- new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), i * elsize,
- PyArray_HANDLER(ret));
- if (new_data == NULL) {
- PyErr_SetString(PyExc_MemoryError,
- "cannot allocate array memory");
- goto done;
+ else {
+ /* Resize array to actual final size (it may be too large) */
+ /* The handler is always valid */
+ char *new_data = PyDataMem_UserRENEW(
+ PyArray_DATA(ret), i * elsize, PyArray_HANDLER(ret));
+
+ if (new_data == NULL) {
+ PyErr_SetString(PyExc_MemoryError,
+ "cannot allocate array memory");
+ goto done;
+ }
+ ((PyArrayObject_fields *)ret)->data = new_data;
}
- ((PyArrayObject_fields *)ret)->data = new_data;
+ PyArray_DIMS(ret)[0] = i;
done:
Py_XDECREF(iter);
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index ad9437911..165fcbce6 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -1202,19 +1202,68 @@ class TestFromiter:
raise NIterError('error at index %s' % eindex)
yield e
- def test_2592(self):
- # Test iteration exceptions are correctly raised.
- count, eindex = 10, 5
- assert_raises(NIterError, np.fromiter,
- self.load_data(count, eindex), dtype=int, count=count)
-
- def test_2592_edge(self):
- # Test iter. exceptions, edge case (exception at end of iterator).
- count = 10
- eindex = count-1
- assert_raises(NIterError, np.fromiter,
- self.load_data(count, eindex), dtype=int, count=count)
+ @pytest.mark.parametrize("dtype", [int, object])
+ @pytest.mark.parametrize(["count", "error_index"], [(10, 5), (10, 9)])
+ def test_2592(self, count, error_index, dtype):
+ # Test iteration exceptions are correctly raised. The data/generator
+ # has `count` elements but errors at `error_index`
+ iterable = self.load_data(count, error_index)
+ with pytest.raises(NIterError):
+ np.fromiter(iterable, dtype=dtype, count=count)
+
+ @pytest.mark.parametrize("dtype", ["S", "S0", "V0", "U0"])
+ def test_empty_not_structured(self, dtype):
+ # Note, "S0" could be allowed at some point, so long "S" (without
+ # any length) is rejected.
+ with pytest.raises(ValueError, match="Must specify length"):
+ np.fromiter([], dtype=dtype)
+ @pytest.mark.parametrize("dtype",
+ # Note that `np.dtype(("O", (10, 5)))` is a subarray dtype
+ ["d", "i,O", np.dtype(("O", (10, 5))), "O"])
+ def test_growth_and_complicated_dtypes(self, dtype):
+ dtype = np.dtype(dtype)
+ data = [1, 2, 3, 4, 5, 6, 7, 8, 9] * 100 # make sure we realloc a bit
+
+ class MyIter:
+ # Class/example from gh-15789
+ def __length_hint__(self):
+ # only required to be an estimate, this is legal
+ return 1
+
+ def __iter__(self):
+ return iter(data)
+
+ res = np.fromiter(MyIter(), dtype=dtype)
+ expected = np.array(data, dtype=dtype)
+
+ assert_array_equal(res, expected)
+
+ def test_empty_result(self):
+ class MyIter:
+ def __length_hint__(self):
+ return 10
+
+ def __iter__(self):
+ return iter([]) # actual iterator is empty.
+
+ res = np.fromiter(MyIter(), dtype="d")
+ assert res.shape == (0,)
+ assert res.dtype == "d"
+
+ def test_too_few_items(self):
+ msg = "iterator too short: Expected 10 but iterator had only 3 items."
+ with pytest.raises(ValueError, match=msg):
+ np.fromiter([1, 2, 3], count=10, dtype=int)
+
+ def test_failed_itemsetting(self):
+ with pytest.raises(TypeError):
+ np.fromiter([1, None, 3], dtype=int)
+
+ # The following manages to hit somewhat trickier code paths:
+ iterable = ((2, 3, 4) for i in range(5))
+ with pytest.raises(ValueError):
+ np.fromiter(iterable, dtype=np.dtype((int, 2)))
class TestNonzero:
def test_nonzero_trivial(self):