summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/upcoming_changes/16200.compatibility.rst64
-rw-r--r--numpy/core/code_generators/genapi.py2
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h36
-rw-r--r--numpy/core/setup.py4
-rw-r--r--numpy/core/src/multiarray/_datetime.h4
-rw-r--r--numpy/core/src/multiarray/abstractdtypes.c168
-rw-r--r--numpy/core/src/multiarray/abstractdtypes.h19
-rw-r--r--numpy/core/src/multiarray/array_coercion.c1430
-rw-r--r--numpy/core/src/multiarray/array_coercion.h58
-rw-r--r--numpy/core/src/multiarray/arrayobject.c180
-rw-r--r--numpy/core/src/multiarray/common.c431
-rw-r--r--numpy/core/src/multiarray/common.h5
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c115
-rw-r--r--numpy/core/src/multiarray/convert_datatype.h3
-rw-r--r--numpy/core/src/multiarray/ctors.c1006
-rw-r--r--numpy/core/src/multiarray/ctors.h13
-rw-r--r--numpy/core/src/multiarray/datetime.c165
-rw-r--r--numpy/core/src/multiarray/descriptor.c5
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c4
-rw-r--r--numpy/core/src/multiarray/dtypemeta.c206
-rw-r--r--numpy/core/src/multiarray/dtypemeta.h2
-rw-r--r--numpy/core/src/multiarray/item_selection.c4
-rw-r--r--numpy/core/src/multiarray/iterators.c5
-rw-r--r--numpy/core/src/multiarray/mapping.c5
-rw-r--r--numpy/core/src/multiarray/methods.c14
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c14
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c13
-rw-r--r--numpy/core/tests/test_array_coercion.py111
-rw-r--r--numpy/core/tests/test_datetime.py18
-rw-r--r--numpy/core/tests/test_deprecations.py16
-rw-r--r--numpy/core/tests/test_indexing.py9
-rw-r--r--numpy/core/tests/test_multiarray.py2
-rw-r--r--numpy/core/tests/test_regression.py3
-rw-r--r--numpy/ma/tests/test_core.py11
34 files changed, 2491 insertions, 1654 deletions
diff --git a/doc/release/upcoming_changes/16200.compatibility.rst b/doc/release/upcoming_changes/16200.compatibility.rst
new file mode 100644
index 000000000..d0fd51265
--- /dev/null
+++ b/doc/release/upcoming_changes/16200.compatibility.rst
@@ -0,0 +1,64 @@
+NumPy Scalars are cast when assigned to arrays
+----------------------------------------------
+
+When creating or assigning to arrays, in all relevant cases NumPy
+scalars will now be cast identically to NumPy arrays. In particular
+this changes the behaviour in some cases which previously raised an
+error::
+
+ np.array([np.float64(np.nan)], dtype=np.int64)
+
+will succeed at this time (this may change) and return an undefined result
+(usually the smallest possible integer). This also affects assignments::
+
+ arr[0] = np.float64(np.nan)
+
+Note, this already happened for ``np.array(np.float64(np.nan), dtype=np.int64)``
+and that the behaviour is unchanged for ``np.nan`` itself which is a Python
+float.
+To avoid backward compatibility issues, at this time assignment from
+``datetime64`` scalar to strings of too short length remains supported.
+This means that ``np.asarray(np.datetime64("2020-10-10"), dtype="S5")``
+succeeds now, when it failed before. In the long term this may be
+deprecated or the unsafe cast may be allowed generally to make assignment
+of arrays and scalars behave consistently.
+
+
+Array coercion changes when Strings and other types are mixed
+-------------------------------------------------------------
+
+When stringss and other types are mixed, such as::
+
+ np.array(["string", np.float64(3.)], dtype="S")
+
+The results will change, which may lead to string dtypes with longer strings
+in some cases. In particularly, if ``dtype="S"`` is not provided any numerical
+value will lead to a string results long enough to hold all possible numerical
+values. (e.g. "S32" for floats). Note that you should always provide
+``dtype="S"`` when converting non-strings to strings.
+
+If ``dtype="S"`` is provided the results will be largely identical to before,
+but NumPy scalars (not a Python float like ``1.0``), will still enforce
+a uniform string length::
+
+ np.array([np.float64(3.)], dtype="S") # gives "S32"
+ np.array([3.0], dtype="S") # gives "S3"
+
+while previously the first version gave the same result as the second.
+
+
+Array coercion restructure
+--------------------------
+
+Array coercion has been restructured. In general, this should not affect
+users. In extremely rare corner cases where array-likes are nested::
+
+ np.array([array_like1])
+
+things will now be more consistent with::
+
+ np.array([np.array(array_like1)])
+
+which could potentially change output subtly for badly defined array-likes.
+We are not aware of any such case where the results were not clearly
+incorrect previously.
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index d88772bdc..856db0410 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -21,9 +21,11 @@ __docformat__ = 'restructuredtext'
# The files under src/ that are scanned for API functions
API_FILES = [join('multiarray', 'alloc.c'),
+ join('multiarray', 'abstractdtypes.c'),
join('multiarray', 'arrayfunction_override.c'),
join('multiarray', 'array_assign_array.c'),
join('multiarray', 'array_assign_scalar.c'),
+ join('multiarray', 'array_coercion.c'),
join('multiarray', 'arrayobject.c'),
join('multiarray', 'arraytypes.c.src'),
join('multiarray', 'buffer.c'),
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 275bb336b..bbcf468c1 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -1547,11 +1547,15 @@ PyArray_GETITEM(const PyArrayObject *arr, const char *itemptr)
(void *)itemptr, (PyArrayObject *)arr);
}
+/*
+ * SETITEM should only be used if it is known that the value is a scalar
+ * and of a type understood by the arrays dtype.
+ * Use `PyArray_Pack` if the value may be of a different dtype.
+ */
static NPY_INLINE int
PyArray_SETITEM(PyArrayObject *arr, char *itemptr, PyObject *v)
{
- return ((PyArrayObject_fields *)arr)->descr->f->setitem(
- v, itemptr, arr);
+ return ((PyArrayObject_fields *)arr)->descr->f->setitem(v, itemptr, arr);
}
#else
@@ -1820,10 +1824,25 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
/* TODO: Make this definition public in the API, as soon as its settled */
NPY_NO_EXPORT extern PyTypeObject PyArrayDTypeMeta_Type;
+ typedef struct PyArray_DTypeMeta_tag PyArray_DTypeMeta;
+
+ typedef PyArray_Descr *(discover_descr_from_pyobject_function)(
+ PyArray_DTypeMeta *cls, PyObject *obj);
+
+ /*
+ * Before making this public, we should decide whether it should pass
+ * the type, or allow looking at the object. A possible use-case:
+ * `np.array(np.array([0]), dtype=np.ndarray)`
+ * Could consider arrays that are not `dtype=ndarray` "scalars".
+ */
+ typedef int (is_known_scalar_type_function)(
+ PyArray_DTypeMeta *cls, PyTypeObject *obj);
+
+ typedef PyArray_Descr *(default_descr_function)(PyArray_DTypeMeta *cls);
+
/*
* While NumPy DTypes would not need to be heap types the plan is to
- * make DTypes available in Python at which point we will probably want
- * them to be.
+ * make DTypes available in Python at which point they will be heap types.
* Since we also wish to add fields to the DType class, this looks like
* a typical instance definition, but with PyHeapTypeObject instead of
* only the PyObject_HEAD.
@@ -1831,7 +1850,7 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
* it is a fairly complex construct which may be better to allow
* refactoring of.
*/
- typedef struct _PyArray_DTypeMeta {
+ struct PyArray_DTypeMeta_tag {
PyHeapTypeObject super;
/*
@@ -1870,9 +1889,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size,
* NOTE: We could make a copy to detect changes to `f`.
*/
PyArray_ArrFuncs *f;
- } PyArray_DTypeMeta;
- #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
+ /* DType methods, these could be moved into its own struct */
+ discover_descr_from_pyobject_function *discover_descr_from_pyobject;
+ is_known_scalar_type_function *is_known_scalar_type;
+ default_descr_function *default_descr;
+ };
#endif /* NPY_INTERNAL_BUILD */
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 549860179..8e00e4392 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -774,9 +774,11 @@ def configuration(parent_package='',top_path=None):
#######################################################################
multiarray_deps = [
+ join('src', 'multiarray', 'abstractdtypes.h'),
join('src', 'multiarray', 'arrayobject.h'),
join('src', 'multiarray', 'arraytypes.h'),
join('src', 'multiarray', 'arrayfunction_override.h'),
+ join('src', 'multiarray', 'array_coercion.h'),
join('src', 'multiarray', 'npy_buffer.h'),
join('src', 'multiarray', 'calculation.h'),
join('src', 'multiarray', 'common.h'),
@@ -825,9 +827,11 @@ def configuration(parent_package='',top_path=None):
] + npysort_sources + npymath_sources
multiarray_src = [
+ join('src', 'multiarray', 'abstractdtypes.c'),
join('src', 'multiarray', 'alloc.c'),
join('src', 'multiarray', 'arrayobject.c'),
join('src', 'multiarray', 'arraytypes.c.src'),
+ join('src', 'multiarray', 'array_coercion.c'),
join('src', 'multiarray', 'array_assign_scalar.c'),
join('src', 'multiarray', 'array_assign_array.c'),
join('src', 'multiarray', 'arrayfunction_override.c'),
diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h
index 20f7a132c..4e7ade5ed 100644
--- a/numpy/core/src/multiarray/_datetime.h
+++ b/numpy/core/src/multiarray/_datetime.h
@@ -38,6 +38,10 @@ create_datetime_dtype_with_unit(int type_num, NPY_DATETIMEUNIT unit);
NPY_NO_EXPORT PyArray_DatetimeMetaData *
get_datetime_metadata_from_dtype(PyArray_Descr *dtype);
+NPY_NO_EXPORT int
+find_string_array_datetime64_type(PyArrayObject *arr,
+ PyArray_DatetimeMetaData *meta);
+
/*
* Both type1 and type2 must be either NPY_DATETIME or NPY_TIMEDELTA.
* Applies the type promotion rules between the two types, returning
diff --git a/numpy/core/src/multiarray/abstractdtypes.c b/numpy/core/src/multiarray/abstractdtypes.c
new file mode 100644
index 000000000..02c0eac53
--- /dev/null
+++ b/numpy/core/src/multiarray/abstractdtypes.c
@@ -0,0 +1,168 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "structmember.h"
+
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/ndarraytypes.h"
+#include "numpy/arrayobject.h"
+
+#include "abstractdtypes.h"
+#include "array_coercion.h"
+#include "common.h"
+
+
+static PyArray_Descr *
+discover_descriptor_from_pyint(
+ PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj)
+{
+ assert(PyLong_Check(obj));
+ /*
+ * We check whether long is good enough. If not, check longlong and
+ * unsigned long before falling back to `object`.
+ */
+ long long value = PyLong_AsLongLong(obj);
+ if (error_converting(value)) {
+ PyErr_Clear();
+ }
+ else {
+ if (NPY_MIN_LONG <= value && value <= NPY_MAX_LONG) {
+ return PyArray_DescrFromType(NPY_LONG);
+ }
+ return PyArray_DescrFromType(NPY_LONGLONG);
+ }
+
+ unsigned long long uvalue = PyLong_AsUnsignedLongLong(obj);
+ if (uvalue == (unsigned long long)-1 && PyErr_Occurred()){
+ PyErr_Clear();
+ }
+ else {
+ return PyArray_DescrFromType(NPY_ULONGLONG);
+ }
+
+ return PyArray_DescrFromType(NPY_OBJECT);
+}
+
+
+static PyArray_Descr*
+discover_descriptor_from_pyfloat(
+ PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj)
+{
+ assert(PyFloat_CheckExact(obj));
+ return PyArray_DescrFromType(NPY_DOUBLE);
+}
+
+
+static PyArray_Descr*
+discover_descriptor_from_pycomplex(
+ PyArray_DTypeMeta* NPY_UNUSED(cls), PyObject *obj)
+{
+ assert(PyComplex_CheckExact(obj));
+ return PyArray_DescrFromType(NPY_COMPLEX128);
+}
+
+
+NPY_NO_EXPORT int
+initialize_and_map_pytypes_to_dtypes()
+{
+ PyArrayAbstractObjDTypeMeta_Type.tp_base = &PyArrayDTypeMeta_Type;
+ if (PyType_Ready(&PyArrayAbstractObjDTypeMeta_Type) < 0) {
+ return -1;
+ }
+ ((PyTypeObject *)&PyArray_PyIntAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+ PyArray_PyIntAbstractDType.scalar_type = &PyLong_Type;
+ if (PyType_Ready((PyTypeObject *)&PyArray_PyIntAbstractDType) < 0) {
+ return -1;
+ }
+ ((PyTypeObject *)&PyArray_PyFloatAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+ PyArray_PyFloatAbstractDType.scalar_type = &PyFloat_Type;
+ if (PyType_Ready((PyTypeObject *)&PyArray_PyFloatAbstractDType) < 0) {
+ return -1;
+ }
+ ((PyTypeObject *)&PyArray_PyComplexAbstractDType)->tp_base = &PyArrayDTypeMeta_Type;
+ PyArray_PyComplexAbstractDType.scalar_type = &PyComplex_Type;
+ if (PyType_Ready((PyTypeObject *)&PyArray_PyComplexAbstractDType) < 0) {
+ return -1;
+ }
+
+ /* Register the new DTypes for discovery */
+ if (_PyArray_MapPyTypeToDType(
+ &PyArray_PyIntAbstractDType, &PyLong_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+ if (_PyArray_MapPyTypeToDType(
+ &PyArray_PyFloatAbstractDType, &PyFloat_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+ if (_PyArray_MapPyTypeToDType(
+ &PyArray_PyComplexAbstractDType, &PyComplex_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+
+ /*
+ * Map str, bytes, and bool, for which we do not need abstract versions
+ * to the NumPy DTypes. This is done here using the `is_known_scalar_type`
+ * function.
+ * TODO: The `is_known_scalar_type` function is considered preliminary,
+ * the same could be achieved e.g. with additional abstract DTypes.
+ */
+ PyArray_DTypeMeta *dtype;
+ dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_UNICODE));
+ if (_PyArray_MapPyTypeToDType(dtype, &PyUnicode_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+
+ dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_STRING));
+ if (_PyArray_MapPyTypeToDType(dtype, &PyBytes_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+ dtype = NPY_DTYPE(PyArray_DescrFromType(NPY_BOOL));
+ if (_PyArray_MapPyTypeToDType(dtype, &PyBool_Type, NPY_FALSE) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+/* Note: This is currently largely not used, but will be required eventually. */
+NPY_NO_EXPORT PyTypeObject PyArrayAbstractObjDTypeMeta_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "numpy._AbstractObjDTypeMeta",
+ .tp_basicsize = sizeof(PyArray_DTypeMeta),
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = "Helper MetaClass for value based casting AbstractDTypes.",
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyIntAbstractDType = {{{
+ PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+ .tp_basicsize = sizeof(PyArray_DTypeMeta),
+ .tp_name = "numpy._PyIntBaseAbstractDType",
+ },},
+ .abstract = 1,
+ .discover_descr_from_pyobject = discover_descriptor_from_pyint,
+ .kind = 'i',
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyFloatAbstractDType = {{{
+ PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+ .tp_basicsize = sizeof(PyArray_DTypeMeta),
+ .tp_name = "numpy._PyFloatBaseAbstractDType",
+ },},
+ .abstract = 1,
+ .discover_descr_from_pyobject = discover_descriptor_from_pyfloat,
+ .kind = 'f',
+};
+
+NPY_NO_EXPORT PyArray_DTypeMeta PyArray_PyComplexAbstractDType = {{{
+ PyVarObject_HEAD_INIT(&PyArrayAbstractObjDTypeMeta_Type, 0)
+ .tp_basicsize = sizeof(PyArray_DTypeMeta),
+ .tp_name = "numpy._PyComplexBaseAbstractDType",
+ },},
+ .abstract = 1,
+ .discover_descr_from_pyobject = discover_descriptor_from_pycomplex,
+ .kind = 'c',
+};
+
diff --git a/numpy/core/src/multiarray/abstractdtypes.h b/numpy/core/src/multiarray/abstractdtypes.h
new file mode 100644
index 000000000..50239acf2
--- /dev/null
+++ b/numpy/core/src/multiarray/abstractdtypes.h
@@ -0,0 +1,19 @@
+#ifndef _NPY_ABSTRACTDTYPES_H
+#define _NPY_ABSTRACTDTYPES_H
+
+#include "dtypemeta.h"
+
+/*
+ * These are mainly needed for value based promotion in ufuncs. It
+ * may be necessary to make them (partially) public, to allow user-defined
+ * dtypes to perform value based casting.
+ */
+NPY_NO_EXPORT extern PyTypeObject PyArrayAbstractObjDTypeMeta_Type;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyIntAbstractDType;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyFloatAbstractDType;
+NPY_NO_EXPORT extern PyArray_DTypeMeta PyArray_PyComplexAbstractDType;
+
+NPY_NO_EXPORT int
+initialize_and_map_pytypes_to_dtypes();
+
+#endif /*_NPY_ABSTRACTDTYPES_H */
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
new file mode 100644
index 000000000..8fe996ed2
--- /dev/null
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -0,0 +1,1430 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _UMATHMODULE
+#define _MULTIARRAYMODULE
+
+#include "Python.h"
+
+#include "numpy/npy_3kcompat.h"
+
+#include "lowlevel_strided_loops.h"
+#include "numpy/arrayobject.h"
+
+#include "descriptor.h"
+#include "convert_datatype.h"
+#include "dtypemeta.h"
+
+#include "array_coercion.h"
+#include "ctors.h"
+#include "common.h"
+#include "_datetime.h"
+#include "npy_import.h"
+
+
+/*
+ * This file defines helpers for some of the ctors.c functions which
+ * create an array from Python sequences and types.
+ * When creating an array with ``np.array(...)`` we have to do two main things:
+ *
+ * 1. Find the exact shape of the resulting array
+ * 2. Find the correct dtype of the resulting array.
+ *
+ * In most cases these two things are can be done in a single processing step.
+ * There are in principle three different calls that should be distinguished:
+ *
+ * 1. The user calls ``np.array(..., dtype=np.dtype("<f8"))``
+ * 2. The user calls ``np.array(..., dtype="S")``
+ * 3. The user calls ``np.array(...)``
+ *
+ * In the first case, in principle only the shape needs to be found. In the
+ * second case, the DType class (e.g. string) is already known but the DType
+ * instance (e.g. length of the string) has to be found.
+ * In the last case the DType class needs to be found as well. Note that
+ * it is not necessary to find the DType class of the entire array, but
+ * the DType class needs to be found for each element before the actual
+ * dtype instance can be found.
+ *
+ * Further, there are a few other things to keep in mind when coercing arrays:
+ *
+ * * For UFunc promotion, Python scalars need to be handled specially to
+ * allow value based casting. This requires python complex/float to
+ * have their own DTypes.
+ * * It is necessary to decide whether or not a sequence is an element.
+ * For example tuples are considered elements for structured dtypes, but
+ * otherwise are considered sequences.
+ * This means that if a dtype is given (either as a class or instance),
+ * it can effect the dimension discovery part.
+ * For the "special" NumPy types structured void and "c" (single character)
+ * this is special cased. For future user-types, this is currently
+ * handled by providing calling an `is_known_scalar` method. This method
+ * currently ensures that Python numerical types are handled quickly.
+ *
+ * In the initial version of this implementation, it is assumed that dtype
+ * discovery can be implemented sufficiently fast. That is, it is not
+ * necessary to create fast paths that only find the correct shape e.g. when
+ * ``dtype=np.dtype("f8")`` is given.
+ *
+ * The code here avoid multiple conversion of array-like objects (including
+ * sequences). These objects are cached after conversion, which will require
+ * additional memory, but can drastically speed up coercion from from array
+ * like objects.
+ */
+
+
+/*
+ * For finding a DType quickly from a type, it is easiest to have a
+ * a mapping of pytype -> DType.
+ * TODO: This mapping means that it is currently impossible to delete a
+ * pair of pytype <-> DType. To resolve this, it is necessary to
+ * weakly reference the pytype. As long as the pytype is alive, we
+ * want to be able to use `np.array([pytype()])`.
+ * It should be possible to retrofit this without too much trouble
+ * (all type objects support weak references).
+ */
+PyObject *_global_pytype_to_type_dict = NULL;
+
+
+/* Enum to track or signal some things during dtype and shape discovery */
+enum _dtype_discovery_flags {
+ FOUND_RAGGED_ARRAY = 1 << 0,
+ GAVE_SUBCLASS_WARNING = 1 << 1,
+ PROMOTION_FAILED = 1 << 2,
+ DISCOVER_STRINGS_AS_SEQUENCES = 1 << 3,
+ DISCOVER_TUPLES_AS_ELEMENTS = 1 << 4,
+ MAX_DIMS_WAS_REACHED = 1 << 5,
+ DESCRIPTOR_WAS_SET = 1 << 6,
+};
+
+
+/**
+ * Adds known sequence types to the global type dictionary, note that when
+ * a DType is passed in, this lookup may be ignored.
+ *
+ * @return -1 on error 0 on success
+ */
+static int
+_prime_global_pytype_to_type_dict()
+{
+ int res;
+
+ /* Add the basic Python sequence types */
+ res = PyDict_SetItem(_global_pytype_to_type_dict,
+ (PyObject *)&PyList_Type, Py_None);
+ if (res < 0) {
+ return -1;
+ }
+ res = PyDict_SetItem(_global_pytype_to_type_dict,
+ (PyObject *)&PyTuple_Type, Py_None);
+ if (res < 0) {
+ return -1;
+ }
+ /* NumPy Arrays are not handled as scalars */
+ res = PyDict_SetItem(_global_pytype_to_type_dict,
+ (PyObject *)&PyArray_Type, Py_None);
+ if (res < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+
+/**
+ * Add a new mapping from a python type to the DType class.
+ *
+ * This assumes that the DType class is guaranteed to hold on the
+ * python type (this assumption is guaranteed).
+ * This functionality supercedes ``_typenum_fromtypeobj``.
+ *
+ * @param DType DType to map the python type to
+ * @param pytype Python type to map from
+ * @param userdef Whether or not it is user defined. We ensure that user
+ * defined scalars subclass from our scalars (for now).
+ */
+NPY_NO_EXPORT int
+_PyArray_MapPyTypeToDType(
+ PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef)
+{
+ PyObject *Dtype_obj = (PyObject *)DType;
+
+ if (userdef) {
+ /*
+ * It seems we did not strictly enforce this in the legacy dtype
+ * API, but assume that it is always true. Further, this could be
+ * relaxed in the future. In particular we should have a new
+ * superclass of ``np.generic`` in order to note enforce the array
+ * scalar behaviour.
+ */
+ if (!PyObject_IsSubclass((PyObject *)pytype, (PyObject *)&PyGenericArrType_Type)) {
+ PyErr_Format(PyExc_RuntimeError,
+ "currently it is only possible to register a DType "
+ "for scalars deriving from `np.generic`, got '%S'.",
+ (PyObject *)pytype);
+ return -1;
+ }
+ }
+
+ /* Create the global dictionary if it does not exist */
+ if (NPY_UNLIKELY(_global_pytype_to_type_dict == NULL)) {
+ _global_pytype_to_type_dict = PyDict_New();
+ if (_global_pytype_to_type_dict == NULL) {
+ return -1;
+ }
+ if (_prime_global_pytype_to_type_dict() < 0) {
+ return -1;
+ }
+ }
+
+ int res = PyDict_Contains(_global_pytype_to_type_dict, (PyObject *)pytype);
+ if (res < 0) {
+ return -1;
+ }
+ else if (res) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Can only map one python type to DType.");
+ return -1;
+ }
+
+ return PyDict_SetItem(_global_pytype_to_type_dict,
+ (PyObject *)pytype, Dtype_obj);
+}
+
+
+/**
+ * Lookup the DType for a registered known python scalar type.
+ *
+ * @param pytype Python Type to look up
+ * @return DType, None if it a known non-scalar, or NULL if an unknown object.
+ */
+static NPY_INLINE PyArray_DTypeMeta *
+discover_dtype_from_pytype(PyTypeObject *pytype)
+{
+ PyObject *DType;
+
+ if (pytype == &PyArray_Type) {
+ Py_INCREF(Py_None);
+ return (PyArray_DTypeMeta *)Py_None;
+ }
+
+ DType = PyDict_GetItem(_global_pytype_to_type_dict, (PyObject *)pytype);
+ if (DType == NULL) {
+ /* the python type is not known */
+ return NULL;
+ }
+
+ Py_INCREF(DType);
+ if (DType == Py_None) {
+ return (PyArray_DTypeMeta *)Py_None;
+ }
+ assert(PyObject_TypeCheck(DType, (PyTypeObject *)&PyArrayDTypeMeta_Type));
+ return (PyArray_DTypeMeta *)DType;
+}
+
+
+/**
+ * Find the correct DType class for the given python type. If flags is NULL
+ * this is not used to discover a dtype, but only for conversion to an
+ * existing dtype. In that case the Python (not NumPy) scalar subclass
+ * checks are skipped.
+ *
+ * @param obj The python object, mainly type(pyobj) is used, the object
+ * is passed to reuse existing code at this time only.
+ * @param flags Flags used to know if warnings were already given. If
+ * flags is NULL, this is not
+ * @param fixed_DType if not NULL, will be checked first for whether or not
+ * it can/wants to handle the (possible) scalar value.
+ * @return New reference to either a DType class, Py_None, or NULL on error.
+ */
+static NPY_INLINE PyArray_DTypeMeta *
+discover_dtype_from_pyobject(
+ PyObject *obj, enum _dtype_discovery_flags *flags,
+ PyArray_DTypeMeta *fixed_DType)
+{
+ if (fixed_DType != NULL) {
+ /*
+ * Let the given DType handle the discovery. This is when the
+ * scalar-type matches exactly, or the DType signals that it can
+ * handle the scalar-type. (Even if it cannot handle here it may be
+ * asked to attempt to do so later, if no other matching DType exists.)
+ */
+ if ((Py_TYPE(obj) == fixed_DType->scalar_type) ||
+ (fixed_DType->is_known_scalar_type != NULL &&
+ fixed_DType->is_known_scalar_type(fixed_DType, Py_TYPE(obj)))) {
+ Py_INCREF(fixed_DType);
+ return fixed_DType;
+ }
+ }
+
+ PyArray_DTypeMeta *DType = discover_dtype_from_pytype(Py_TYPE(obj));
+ if (DType != NULL) {
+ return DType;
+ }
+ /*
+ * At this point we have not found a clear mapping, but mainly for
+ * backward compatibility we have to make some further attempts at
+ * interpreting the input as a known scalar type.
+ */
+ PyArray_Descr *legacy_descr;
+ if (PyArray_IsScalar(obj, Generic)) {
+ legacy_descr = PyArray_DescrFromScalar(obj);
+ if (legacy_descr == NULL) {
+ return NULL;
+ }
+ }
+ else if (flags == NULL) {
+ Py_INCREF(Py_None);
+ return (PyArray_DTypeMeta *)Py_None;
+ }
+ else if (PyBytes_Check(obj)) {
+ legacy_descr = PyArray_DescrFromType(NPY_BYTE);
+ }
+ else if (PyUnicode_Check(obj)) {
+ legacy_descr = PyArray_DescrFromType(NPY_UNICODE);
+ }
+ else {
+ legacy_descr = _array_find_python_scalar_type(obj);
+ }
+
+ if (legacy_descr != NULL) {
+ DType = NPY_DTYPE(legacy_descr);
+ Py_INCREF(DType);
+ Py_DECREF(legacy_descr);
+ /* TODO: Enable warning about subclass handling */
+ if (0 && !((*flags) & GAVE_SUBCLASS_WARNING)) {
+ if (DEPRECATE_FUTUREWARNING(
+ "in the future NumPy will not automatically find the "
+ "dtype for subclasses of scalars known to NumPy (i.e. "
+ "python types). Use the appropriate `dtype=...` to create "
+ "this array. This will use the `object` dtype or raise "
+ "an error in the future.") < 0) {
+ return NULL;
+ }
+ *flags |= GAVE_SUBCLASS_WARNING;
+ }
+ return DType;
+ }
+ Py_INCREF(Py_None);
+ return (PyArray_DTypeMeta *)Py_None;
+}
+
+
+/*
+ * This function should probably become public API eventually. At this
+ * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`.
+ * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement
+ * this logic.
+ */
+static NPY_INLINE PyArray_Descr *
+cast_descriptor_to_fixed_dtype(
+ PyArray_Descr *descr, PyArray_DTypeMeta *fixed_DType)
+{
+ if (fixed_DType == NULL) {
+ /* Nothing to do, we only need to promote the new dtype */
+ Py_INCREF(descr);
+ return descr;
+ }
+
+ if (!fixed_DType->parametric) {
+ /*
+ * Don't actually do anything, the default is always the result
+ * of any cast.
+ */
+ return fixed_DType->default_descr(fixed_DType);
+ }
+ if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)fixed_DType)) {
+ Py_INCREF(descr);
+ return descr;
+ }
+ /*
+ * TODO: When this is implemented for all dtypes, the special cases
+ * can be removed...
+ */
+ if (fixed_DType->legacy && fixed_DType->parametric &&
+ NPY_DTYPE(descr)->legacy) {
+ PyArray_Descr *flex_dtype = PyArray_DescrFromType(fixed_DType->type_num);
+ return PyArray_AdaptFlexibleDType(descr, flex_dtype);
+ }
+
+ PyErr_SetString(PyExc_NotImplementedError,
+ "Must use casting to find the correct dtype, this is "
+ "not yet implemented! "
+ "(It should not be possible to hit this code currently!)");
+ return NULL;
+}
+
+
+/**
+ * Discover the correct descriptor from a known DType class and scalar.
+ * If the fixed DType can discover a dtype instance/descr all is fine,
+ * if it cannot and DType is used instead, a cast will have to be tried.
+ *
+ * @param fixed_DType A user provided fixed DType, can be NULL
+ * @param DType A discovered DType (by discover_dtype_from_pyobject);
+ * this can be identical to `fixed_DType`, if it obj is a
+ * known scalar. Can be `NULL` indicating no known type.
+ * @param obj The Python scalar object. At the time of calling this function
+ * it must be known that `obj` should represent a scalar.
+ */
+static NPY_INLINE PyArray_Descr *
+find_scalar_descriptor(
+ PyArray_DTypeMeta *fixed_DType, PyArray_DTypeMeta *DType,
+ PyObject *obj)
+{
+ PyArray_Descr *descr;
+
+ if (DType == NULL && fixed_DType == NULL) {
+ /* No known DType and no fixed one means we go to object. */
+ return PyArray_DescrFromType(NPY_OBJECT);
+ }
+ else if (DType == NULL) {
+ /*
+ * If no DType is known/found, give the fixed give one a second
+ * chance. This allows for example string, to call `str(obj)` to
+ * figure out the length for arbitrary objects.
+ */
+ descr = fixed_DType->discover_descr_from_pyobject(fixed_DType, obj);
+ }
+ else {
+ descr = DType->discover_descr_from_pyobject(DType, obj);
+ }
+ if (descr == NULL) {
+ return NULL;
+ }
+ if (fixed_DType == NULL) {
+ return descr;
+ }
+
+ Py_SETREF(descr, cast_descriptor_to_fixed_dtype(descr, fixed_DType));
+ return descr;
+}
+
+
+/**
+ * Assign a single element in an array from a python value.
+ *
+ * The dtypes SETITEM should only be trusted to generally do the right
+ * thing if something is known to be a scalar *and* is of a python type known
+ * to the DType (which should include all basic Python math types), but in
+ * general a cast may be necessary.
+ * This function handles the cast, which is for example hit when assigning
+ * a float128 to complex128.
+ *
+ * At this time, this function does not support arrays (historically we
+ * mainly supported arrays through `__float__()`, etc.). Such support should
+ * possibly be added (although when called from `PyArray_AssignFromCache`
+ * the input cannot be an array).
+ * Note that this is also problematic for some array-likes, such as
+ * `astropy.units.Quantity` and `np.ma.masked`. These are used to us calling
+ * `__float__`/`__int__` for 0-D instances in many cases.
+ * Eventually, we may want to define this as wrong: They must use DTypes
+ * instead of (only) subclasses. Until then, here as well as in
+ * `PyArray_AssignFromCache` (which already does this), we need to special
+ * case 0-D array-likes to behave like arbitrary (unknown!) Python objects.
+ *
+ * @param descr
+ * @param item
+ * @param value
+ * @return 0 on success -1 on failure.
+ */
+/*
+ * TODO: This function should possibly be public API.
+ */
+NPY_NO_EXPORT int
+PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
+{
+ PyArrayObject_fields arr_fields = {
+ .flags = NPY_ARRAY_WRITEABLE, /* assume array is not behaved. */
+ };
+ Py_SET_TYPE(&arr_fields, &PyArray_Type);
+ Py_REFCNT(&arr_fields) = 1;
+
+ if (NPY_UNLIKELY(descr->type_num == NPY_OBJECT)) {
+ /*
+ * We always have store objects directly, casting will lose some
+ * type information. Any other dtype discards the type information.
+ * TODO: For a Categorical[object] this path may be necessary?
+ */
+ arr_fields.descr = descr;
+ return descr->f->setitem(value, item, &arr_fields);
+ }
+
+ /* discover_dtype_from_pyobject includes a check for is_known_scalar_type */
+ PyArray_DTypeMeta *DType = discover_dtype_from_pyobject(
+ value, NULL, NPY_DTYPE(descr));
+ if (DType == NULL) {
+ return -1;
+ }
+ if (DType == NPY_DTYPE(descr) || DType == (PyArray_DTypeMeta *)Py_None) {
+ /* We can set the element directly (or at least will try to) */
+ Py_XDECREF(DType);
+ arr_fields.descr = descr;
+ return descr->f->setitem(value, item, &arr_fields);
+ }
+ PyArray_Descr *tmp_descr;
+ tmp_descr = DType->discover_descr_from_pyobject(DType, value);
+ Py_DECREF(DType);
+ if (tmp_descr == NULL) {
+ return -1;
+ }
+
+ char *data = PyObject_Malloc(tmp_descr->elsize);
+ if (data == NULL) {
+ PyErr_NoMemory();
+ Py_DECREF(tmp_descr);
+ return -1;
+ }
+ if (PyDataType_FLAGCHK(tmp_descr, NPY_NEEDS_INIT)) {
+ memset(data, 0, tmp_descr->elsize);
+ }
+ arr_fields.descr = tmp_descr;
+ if (tmp_descr->f->setitem(value, data, &arr_fields) < 0) {
+ PyObject_Free(data);
+ Py_DECREF(tmp_descr);
+ return -1;
+ }
+ if (PyDataType_REFCHK(tmp_descr)) {
+ /* We could probably use move-references above */
+ PyArray_Item_INCREF(data, tmp_descr);
+ }
+
+ int res = 0;
+ int needs_api = 0;
+ PyArray_StridedUnaryOp *stransfer;
+ NpyAuxData *transferdata;
+ if (PyArray_GetDTypeTransferFunction(
+ 0, 0, 0, tmp_descr, descr, 0, &stransfer, &transferdata,
+ &needs_api) == NPY_FAIL) {
+ res = -1;
+ goto finish;
+ }
+ stransfer(item, 0, data, 0, 1, tmp_descr->elsize, transferdata);
+ NPY_AUXDATA_FREE(transferdata);
+
+ if (needs_api && PyErr_Occurred()) {
+ res = -1;
+ }
+
+ finish:
+ if (PyDataType_REFCHK(tmp_descr)) {
+ /* We could probably use move-references above */
+ PyArray_Item_XDECREF(data, tmp_descr);
+ }
+ PyObject_Free(data);
+ Py_DECREF(tmp_descr);
+ return res;
+}
+
+
+static int
+update_shape(int curr_ndim, int *max_ndim,
+ npy_intp out_shape[NPY_MAXDIMS], int new_ndim,
+ const npy_intp new_shape[NPY_MAXDIMS], npy_bool sequence,
+ enum _dtype_discovery_flags *flags)
+{
+ int success = 0; /* unsuccessful if array is ragged */
+ const npy_bool max_dims_reached = *flags & MAX_DIMS_WAS_REACHED;
+
+ if (curr_ndim + new_ndim > *max_ndim) {
+ success = -1;
+ /* Only update/check as many dims as possible, max_ndim is unchanged */
+ new_ndim = *max_ndim - curr_ndim;
+ }
+ else if (!sequence && (*max_ndim != curr_ndim + new_ndim)) {
+ /*
+ * Sequences do not update max_ndim, otherwise shrink and check.
+ * This is depth first, so if it is already set, `out_shape` is filled.
+ */
+ *max_ndim = curr_ndim + new_ndim;
+ /* If a shape was already set, this is also ragged */
+ if (max_dims_reached) {
+ success = -1;
+ }
+ }
+ for (int i = 0; i < new_ndim; i++) {
+ npy_intp curr_dim = out_shape[curr_ndim + i];
+ npy_intp new_dim = new_shape[i];
+
+ if (!max_dims_reached) {
+ out_shape[curr_ndim + i] = new_dim;
+ }
+ else if (new_dim != curr_dim) {
+ /* The array is ragged, and this dimension is unusable already */
+ success = -1;
+ if (!sequence) {
+ /* Remove dimensions that we cannot use: */
+ *max_ndim -= new_ndim + i;
+ }
+ else {
+ assert(i == 0);
+ /* max_ndim is usually not updated for sequences, so set now: */
+ *max_ndim = curr_ndim;
+ }
+ break;
+ }
+ }
+ if (!sequence) {
+ *flags |= MAX_DIMS_WAS_REACHED;
+ }
+ return success;
+}
+
+
+#define COERCION_CACHE_CACHE_SIZE 5
+static int _coercion_cache_num = 0;
+static coercion_cache_obj *_coercion_cache_cache[COERCION_CACHE_CACHE_SIZE];
+
+/*
+ * Steals a reference to the object.
+ */
+static NPY_INLINE int
+npy_new_coercion_cache(
+ PyObject *converted_obj, PyObject *arr_or_sequence, npy_bool sequence,
+ coercion_cache_obj ***next_ptr, int ndim)
+{
+ coercion_cache_obj *cache;
+ if (_coercion_cache_num > 0) {
+ _coercion_cache_num--;
+ cache = _coercion_cache_cache[_coercion_cache_num];
+ }
+ else {
+ cache = PyObject_MALLOC(sizeof(coercion_cache_obj));
+ }
+ if (cache == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ cache->converted_obj = converted_obj;
+ cache->arr_or_sequence = arr_or_sequence;
+ cache->sequence = sequence;
+ cache->depth = ndim;
+ cache->next = NULL;
+ **next_ptr = cache;
+ *next_ptr = &(cache->next);
+ return 0;
+}
+
+/**
+ * Unlink coercion cache item.
+ *
+ * @param current
+ * @return next coercion cache object (or NULL)
+ */
+NPY_NO_EXPORT NPY_INLINE coercion_cache_obj *
+npy_unlink_coercion_cache(coercion_cache_obj *current)
+{
+ coercion_cache_obj *next = current->next;
+ Py_DECREF(current->arr_or_sequence);
+ if (_coercion_cache_num < COERCION_CACHE_CACHE_SIZE) {
+ _coercion_cache_cache[_coercion_cache_num] = current;
+ _coercion_cache_num++;
+ }
+ else {
+ PyObject_FREE(current);
+ }
+ return next;
+}
+
+NPY_NO_EXPORT NPY_INLINE void
+npy_free_coercion_cache(coercion_cache_obj *next) {
+ /* We only need to check from the last used cache pos */
+ while (next != NULL) {
+ next = npy_unlink_coercion_cache(next);
+ }
+}
+
+#undef COERCION_CACHE_CACHE_SIZE
+
+/**
+ * Do the promotion step and possible casting. This function should
+ * never be called if a descriptor was requested. In that case the output
+ * dtype is not of importance, so we must not risk promotion errors.
+ *
+ * @param out_descr The current descriptor.
+ * @param descr The newly found descriptor to promote with
+ * @param flags dtype discover flags to signal failed promotion.
+ * @return -1 on error, 0 on success.
+ */
+static NPY_INLINE int
+handle_promotion(PyArray_Descr **out_descr, PyArray_Descr *descr,
+ enum _dtype_discovery_flags *flags)
+{
+ assert(!(*flags & DESCRIPTOR_WAS_SET));
+
+ if (*out_descr == NULL) {
+ Py_INCREF(descr);
+ *out_descr = descr;
+ return 0;
+ }
+ PyArray_Descr *new_descr = PyArray_PromoteTypes(descr, *out_descr);
+ if (new_descr == NULL) {
+ PyErr_Clear();
+ *flags |= PROMOTION_FAILED;
+ /* Continue with object, since we may need the dimensionality */
+ new_descr = PyArray_DescrFromType(NPY_OBJECT);
+ }
+ Py_SETREF(*out_descr, new_descr);
+ return 0;
+}
+
+
+/**
+ * Handle a leave node (known scalar) during dtype and shape discovery.
+ *
+ * @param obj The python object or nested sequence to convert
+ * @param max_dims The maximum number of dimensions.
+ * @param curr_dims The current number of dimensions (depth in the recursion)
+ * @param out_shape The discovered output shape, will be filled
+ * @param coercion_cache The coercion cache object to use.
+ * @param DType the DType class that should be used, or NULL, if not provided.
+ * @param flags used signal that this is a ragged array, used internally and
+ * can be expanded if necessary.
+ */
+static NPY_INLINE int
+handle_scalar(
+ PyObject *obj, int curr_dims, int *max_dims,
+ PyArray_Descr **out_descr, npy_intp *out_shape,
+ PyArray_DTypeMeta *fixed_DType,
+ enum _dtype_discovery_flags *flags, PyArray_DTypeMeta *DType)
+{
+ PyArray_Descr *descr;
+
+ if (update_shape(curr_dims, max_dims, out_shape,
+ 0, NULL, NPY_FALSE, flags) < 0) {
+ *flags |= FOUND_RAGGED_ARRAY;
+ return *max_dims;
+ }
+ if (*flags & DESCRIPTOR_WAS_SET) {
+ /* no need to do any promotion */
+ return *max_dims;
+ }
+ /* This is a scalar, so find the descriptor */
+ descr = find_scalar_descriptor(fixed_DType, DType, obj);
+ if (descr == NULL) {
+ return -1;
+ }
+ if (handle_promotion(out_descr, descr, flags) < 0) {
+ Py_DECREF(descr);
+ return -1;
+ }
+ Py_DECREF(descr);
+ return *max_dims;
+}
+
+
+/**
+ * Return the correct descriptor given an array object and a DType class.
+ *
+ * This is identical to casting the arrays descriptor/dtype to the new
+ * DType class
+ *
+ * @param arr The array object.
+ * @param DType The DType class to cast to (or NULL for convenience)
+ * @param out_descr The output descriptor will set. The result can be NULL
+ * when the array is of object dtype and has no elements.
+ *
+ * @return -1 on failure, 0 on success.
+ */
+static int
+find_descriptor_from_array(
+ PyArrayObject *arr, PyArray_DTypeMeta *DType, PyArray_Descr **out_descr)
+{
+ enum _dtype_discovery_flags flags = 0;
+ *out_descr = NULL;
+
+ if (NPY_UNLIKELY(DType != NULL && DType->parametric &&
+ PyArray_ISOBJECT(arr))) {
+ /*
+ * We have one special case, if (and only if) the input array is of
+ * object DType and the dtype is not fixed already but parametric.
+ * Then, we allow inspection of all elements, treating them as
+ * elements. We do this recursively, so nested 0-D arrays can work,
+ * but nested higher dimensional arrays will lead to an error.
+ */
+ assert(DType->type_num != NPY_OBJECT); /* not parametric */
+
+ PyArrayIterObject *iter;
+ iter = (PyArrayIterObject *)PyArray_IterNew((PyObject *)arr);
+ if (iter == NULL) {
+ return -1;
+ }
+ while (iter->index < iter->size) {
+ PyArray_DTypeMeta *item_DType;
+ /*
+ * Note: If the array contains typed objects we may need to use
+ * the dtype to use casting for finding the correct instance.
+ */
+ PyObject *elem = PyArray_GETITEM(arr, iter->dataptr);
+ if (elem == NULL) {
+ Py_DECREF(iter);
+ return -1;
+ }
+ item_DType = discover_dtype_from_pyobject(elem, &flags, DType);
+ if (item_DType == NULL) {
+ Py_DECREF(iter);
+ Py_DECREF(elem);
+ return -1;
+ }
+ if (item_DType == (PyArray_DTypeMeta *)Py_None) {
+ Py_SETREF(item_DType, NULL);
+ }
+ int flat_max_dims = 0;
+ if (handle_scalar(elem, 0, &flat_max_dims, out_descr,
+ NULL, DType, &flags, item_DType) < 0) {
+ Py_DECREF(iter);
+ Py_DECREF(elem);
+ Py_XDECREF(item_DType);
+ return -1;
+ }
+ Py_XDECREF(item_DType);
+ Py_DECREF(elem);
+ PyArray_ITER_NEXT(iter);
+ }
+ Py_DECREF(iter);
+ }
+ else if (DType != NULL && NPY_UNLIKELY(DType->type_num == NPY_DATETIME) &&
+ PyArray_ISSTRING(arr)) {
+ /*
+ * TODO: This branch should be deprecated IMO, the workaround is
+ * to cast to the object to a string array. Although a specific
+ * function (if there is even any need) would be better.
+ * This is value based casting!
+ * Unless of course we actually want to support this kind of thing
+ * in general (not just for object dtype)...
+ */
+ PyArray_DatetimeMetaData meta;
+ meta.base = NPY_FR_GENERIC;
+ meta.num = 1;
+
+ if (find_string_array_datetime64_type(arr, &meta) < 0) {
+ return -1;
+ }
+ else {
+ *out_descr = create_datetime_dtype(NPY_DATETIME, &meta);
+ if (*out_descr == NULL) {
+ return -1;
+ }
+ }
+ }
+ else {
+ /*
+ * If this is not an object array figure out the dtype cast,
+ * or simply use the returned DType.
+ */
+ *out_descr = cast_descriptor_to_fixed_dtype(
+ PyArray_DESCR(arr), DType);
+ if (*out_descr == NULL) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Given a dtype or DType object, find the correct descriptor to cast the
+ * array to.
+ *
+ * This function is identical to normal casting using only the dtype, however,
+ * it supports inspecting the elements when the array has object dtype
+ * (and the given datatype describes a parametric DType class).
+ *
+ * @param arr
+ * @param dtype A dtype instance or class.
+ * @return A concrete dtype instance or NULL
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype)
+{
+ /* If the requested dtype is flexible, adapt it */
+ PyArray_Descr *new_dtype;
+ PyArray_DTypeMeta *new_DType;
+ int res;
+
+ res = PyArray_ExtractDTypeAndDescriptor((PyObject *)dtype,
+ &new_dtype, &new_DType);
+ if (res < 0) {
+ return NULL;
+ }
+ if (new_dtype == NULL) {
+ res = find_descriptor_from_array(arr, new_DType, &new_dtype);
+ if (res < 0) {
+ Py_DECREF(new_DType);
+ return NULL;
+ }
+ if (new_dtype == NULL) {
+ /* This is an object array but contained no elements, use default */
+ new_dtype = new_DType->default_descr(new_DType);
+ }
+ }
+ Py_DECREF(new_DType);
+ return new_dtype;
+}
+
+
+/**
+ * Recursion helper for `PyArray_DiscoverDTypeAndShape`. See its
+ * documentation for additional details.
+ *
+ * @param obj The current (possibly nested) object
+ * @param curr_dims The current depth, i.e. initially 0 and increasing.
+ * @param max_dims Maximum number of dimensions, modified during discovery.
+ * @param out_descr dtype instance (or NULL) to promoted and update.
+ * @param out_shape The current shape (updated)
+ * @param coercion_cache_tail_ptr The tail of the linked list of coercion
+ * cache objects, which hold on to converted sequences and arrays.
+ * This is a pointer to the `->next` slot of the previous cache so
+ * that we can append a new cache object (and update this pointer).
+ * (Initially it is a pointer to the user-provided head pointer).
+ * @param fixed_DType User provided fixed DType class
+ * @param flags Discovery flags (reporting and behaviour flags, see def.)
+ * @return The updated number of maximum dimensions (i.e. scalars will set
+ * this to the current dimensions).
+ */
+NPY_NO_EXPORT int
+PyArray_DiscoverDTypeAndShape_Recursive(
+ PyObject *obj, int curr_dims, int max_dims, PyArray_Descr**out_descr,
+ npy_intp out_shape[NPY_MAXDIMS],
+ coercion_cache_obj ***coercion_cache_tail_ptr,
+ PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags)
+{
+ PyArrayObject *arr = NULL;
+ PyObject *seq;
+
+ /*
+ * The first step is to find the DType class if it was not provided,
+ * alternatively we have to find out that this is not a scalar at all
+ * (which could fail and lead us to `object` dtype).
+ */
+ PyArray_DTypeMeta *DType = NULL;
+
+ if (NPY_UNLIKELY(*flags & DISCOVER_STRINGS_AS_SEQUENCES)) {
+ /*
+ * We currently support that bytes/strings are considered sequences,
+ * if the dtype is np.dtype('c'), this should be deprecated probably,
+ * but requires hacks right now.
+ */
+ if (PyBytes_Check(obj) && PyBytes_Size(obj) != 1) {
+ goto force_sequence_due_to_char_dtype;
+ }
+ else if (PyUnicode_Check(obj) && PyUnicode_GetLength(obj) != 1) {
+ goto force_sequence_due_to_char_dtype;
+ }
+ }
+
+ /* If this is a known scalar, find the corresponding DType class */
+ DType = discover_dtype_from_pyobject(obj, flags, fixed_DType);
+ if (DType == NULL) {
+ return -1;
+ }
+ else if (DType == (PyArray_DTypeMeta *)Py_None) {
+ Py_DECREF(Py_None);
+ }
+ else {
+ max_dims = handle_scalar(
+ obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
+ flags, DType);
+ Py_DECREF(DType);
+ return max_dims;
+ }
+
+ /*
+ * At this point we expect to find either a sequence, or an array-like.
+ * Although it is still possible that this fails and we have to use
+ * `object`.
+ */
+ if (PyArray_Check(obj)) {
+ arr = (PyArrayObject *)obj;
+ Py_INCREF(arr);
+ }
+ else {
+ PyArray_Descr *requested_descr = NULL;
+ if (*flags & DESCRIPTOR_WAS_SET) {
+ /* __array__ may be passed the requested descriptor if provided */
+ requested_descr = *out_descr;
+ }
+ arr = (PyArrayObject *)_array_from_array_like(obj,
+ requested_descr, 0, NULL);
+ if (arr == NULL) {
+ return -1;
+ }
+ else if (arr == (PyArrayObject *)Py_NotImplemented) {
+ Py_DECREF(arr);
+ arr = NULL;
+ }
+ }
+ if (arr != NULL) {
+ /*
+ * This is an array object which will be added to the cache, keeps
+ * the reference to the array alive (takes ownership).
+ */
+ if (npy_new_coercion_cache(obj, (PyObject *)arr,
+ 0, coercion_cache_tail_ptr, curr_dims) < 0) {
+ return -1;
+ }
+
+ if (curr_dims == 0) {
+ /*
+ * Special case for reverse broadcasting, ignore max_dims if this
+ * is a single array-like object; needed for PyArray_CopyObject.
+ */
+ memcpy(out_shape, PyArray_SHAPE(arr),
+ PyArray_NDIM(arr) * sizeof(npy_intp));
+ max_dims = PyArray_NDIM(arr);
+ }
+ else if (update_shape(curr_dims, &max_dims, out_shape,
+ PyArray_NDIM(arr), PyArray_SHAPE(arr), NPY_FALSE, flags) < 0) {
+ *flags |= FOUND_RAGGED_ARRAY;
+ return max_dims;
+ }
+
+ if (*flags & DESCRIPTOR_WAS_SET) {
+ return max_dims;
+ }
+ /*
+ * For arrays we may not just need to cast the dtype to the user
+ * provided fixed_DType. If this is an object array, the elements
+ * may need to be inspected individually.
+ * Note, this finds the descriptor of the array first and only then
+ * promotes here (different associativity).
+ */
+ PyArray_Descr *cast_descr;
+ if (find_descriptor_from_array(arr, fixed_DType, &cast_descr) < 0) {
+ return -1;
+ }
+ if (cast_descr == NULL) {
+ /* object array with no elements, no need to promote/adjust. */
+ return max_dims;
+ }
+ if (handle_promotion(out_descr, cast_descr, flags) < 0) {
+ Py_DECREF(cast_descr);
+ return -1;
+ }
+ Py_DECREF(cast_descr);
+ return max_dims;
+ }
+
+ /*
+ * The last step is to assume the input should be handled as a sequence
+ * and to handle it recursively. That is, unless we have hit the
+ * dimension limit.
+ */
+ npy_bool is_sequence = (PySequence_Check(obj) && PySequence_Size(obj) >= 0);
+ if (NPY_UNLIKELY(*flags & DISCOVER_TUPLES_AS_ELEMENTS) &&
+ PyTuple_Check(obj)) {
+ is_sequence = NPY_FALSE;
+ }
+ if (curr_dims == max_dims || !is_sequence) {
+ /* Clear any PySequence_Size error which would corrupts further calls */
+ PyErr_Clear();
+ max_dims = handle_scalar(
+ obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
+ flags, NULL);
+ if (is_sequence) {
+ /* Flag as ragged or too deep array */
+ *flags |= FOUND_RAGGED_ARRAY;
+ }
+ return max_dims;
+ }
+ /* If we stop supporting bytes/str subclasses, more may be required here: */
+ assert(!PyBytes_Check(obj) && !PyUnicode_Check(obj));
+
+ force_sequence_due_to_char_dtype:
+
+ /* Ensure we have a sequence (required for PyPy) */
+ seq = PySequence_Fast(obj, "Could not convert object to sequence");
+ if (seq == NULL) {
+ /*
+ * Specifically do not fail on things that look like a dictionary,
+ * instead treat them as scalar.
+ */
+ if (PyErr_ExceptionMatches(PyExc_KeyError)) {
+ PyErr_Clear();
+ max_dims = handle_scalar(
+ obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
+ flags, NULL);
+ return max_dims;
+ }
+ return -1;
+ }
+ /* The cache takes ownership of the sequence here. */
+ if (npy_new_coercion_cache(obj, seq, 1, coercion_cache_tail_ptr, curr_dims) < 0) {
+ return -1;
+ }
+
+ npy_intp size = PySequence_Fast_GET_SIZE(seq);
+ PyObject **objects = PySequence_Fast_ITEMS(seq);
+
+ if (update_shape(curr_dims, &max_dims,
+ out_shape, 1, &size, NPY_TRUE, flags) < 0) {
+ /* But do update, if there this is a ragged case */
+ *flags |= FOUND_RAGGED_ARRAY;
+ return max_dims;
+ }
+ if (size == 0) {
+ /* If the sequence is empty, this must be the last dimension */
+ *flags |= MAX_DIMS_WAS_REACHED;
+ return curr_dims + 1;
+ }
+
+ /* Recursive call for each sequence item */
+ for (Py_ssize_t i = 0; i < size; i++) {
+ max_dims = PyArray_DiscoverDTypeAndShape_Recursive(
+ objects[i], curr_dims + 1, max_dims,
+ out_descr, out_shape, coercion_cache_tail_ptr, fixed_DType,
+ flags);
+
+ if (max_dims < 0) {
+ return -1;
+ }
+ }
+ return max_dims;
+}
+
+
+/**
+ * Finds the DType and shape of an arbitrary nested sequence. This is the
+ * general purpose function to find the parameters of the array (but not
+ * the array itself) as returned by `np.array()`
+ *
+ * Note: Before considering to make part of this public, we should consider
+ * whether things such as `out_descr != NULL` should be supported in
+ * a public API.
+ *
+ * @param obj Scalar or nested sequences.
+ * @param max_dims Maximum number of dimensions (after this scalars are forced)
+ * @param out_shape Will be filled with the output shape (more than the actual
+ * shape may be written).
+ * @param coercion_cache NULL initialized reference to a cache pointer.
+ * May be set to the first coercion_cache, and has to be freed using
+ * npy_free_coercion_cache.
+ * This should be stored in a thread-safe manner (i.e. function static)
+ * and is designed to be consumed by `PyArray_AssignFromCache`.
+ * If not consumed, must be freed using `npy_free_coercion_cache`.
+ * @param fixed_DType A user provided fixed DType class.
+ * @param requested_descr A user provided fixed descriptor. This is always
+ * returned as the discovered descriptor, but currently only used
+ * for the ``__array__`` protocol.
+ * @param out_descr Set to the discovered output descriptor. This may be
+ * non NULL but only when fixed_DType/requested_descr are not given.
+ * If non NULL, it is the first dtype being promoted and used if there
+ * are no elements.
+ * The result may be unchanged (remain NULL) when converting a
+ * sequence with no elements. In this case it is callers responsibility
+ * to choose a default.
+ * @return dimensions of the discovered object or -1 on error.
+ * WARNING: If (and only if) the output is a single array, the ndim
+ * returned _can_ exceed the maximum allowed number of dimensions.
+ * It might be nice to deprecate this? But it allows things such as
+ * `arr1d[...] = np.array([[1,2,3,4]])`
+ */
+NPY_NO_EXPORT int
+PyArray_DiscoverDTypeAndShape(
+ PyObject *obj, int max_dims,
+ npy_intp out_shape[NPY_MAXDIMS],
+ coercion_cache_obj **coercion_cache,
+ PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
+ PyArray_Descr **out_descr)
+{
+ coercion_cache_obj **coercion_cache_head = coercion_cache;
+ *coercion_cache = NULL;
+ enum _dtype_discovery_flags flags = 0;
+
+ /*
+ * Support a passed in descriptor (but only if nothing was specified).
+ */
+ assert(*out_descr == NULL || fixed_DType == NULL);
+ /* Validate input of requested descriptor and DType */
+ if (fixed_DType != NULL) {
+ assert(PyObject_TypeCheck(
+ (PyObject *)fixed_DType, (PyTypeObject *)&PyArrayDTypeMeta_Type));
+ }
+
+ if (requested_descr != NULL) {
+ assert(fixed_DType == NPY_DTYPE(requested_descr));
+ /* The output descriptor must be the input. */
+ Py_INCREF(requested_descr);
+ *out_descr = requested_descr;
+ flags |= DESCRIPTOR_WAS_SET;
+ }
+
+ /*
+ * Call the recursive function, the setup for this may need expanding
+ * to handle caching better.
+ */
+
+ /* Legacy discovery flags */
+ if (requested_descr != NULL) {
+ if (requested_descr->type_num == NPY_STRING &&
+ requested_descr->type == 'c') {
+ /* Character dtype variation of string (should be deprecated...) */
+ flags |= DISCOVER_STRINGS_AS_SEQUENCES;
+ }
+ else if (requested_descr->type_num == NPY_VOID &&
+ (requested_descr->names || requested_descr->subarray)) {
+ /* Void is a chimera, in that it may or may not be structured... */
+ flags |= DISCOVER_TUPLES_AS_ELEMENTS;
+ }
+ }
+
+ int ndim = PyArray_DiscoverDTypeAndShape_Recursive(
+ obj, 0, max_dims, out_descr, out_shape, &coercion_cache,
+ fixed_DType, &flags);
+ if (ndim < 0) {
+ goto fail;
+ }
+
+ if (NPY_UNLIKELY(flags & FOUND_RAGGED_ARRAY)) {
+ /*
+ * If max-dims was reached and the dimensions reduced, this is ragged.
+ * Otherwise, we merely reached the maximum dimensions, which is
+ * slightly different. This happens for example for `[1, [2, 3]]`
+ * where the maximum dimensions is 1, but then a sequence found.
+ *
+ * In this case we need to inform the user and clean out the cache
+ * since it may be too deep.
+ */
+
+ /* Handle reaching the maximum depth differently: */
+ int too_deep = ndim == max_dims;
+
+ if (fixed_DType == NULL) {
+ /* This is discovered as object, but deprecated */
+ static PyObject *visibleDeprecationWarning = NULL;
+ npy_cache_import(
+ "numpy", "VisibleDeprecationWarning",
+ &visibleDeprecationWarning);
+ if (visibleDeprecationWarning == NULL) {
+ goto fail;
+ }
+ if (!too_deep) {
+ /* NumPy 1.19, 2019-11-01 */
+ if (PyErr_WarnEx(visibleDeprecationWarning,
+ "Creating an ndarray from ragged nested sequences (which "
+ "is a list-or-tuple of lists-or-tuples-or ndarrays with "
+ "different lengths or shapes) is deprecated. If you "
+ "meant to do this, you must specify 'dtype=object' "
+ "when creating the ndarray.", 1) < 0) {
+ goto fail;
+ }
+ }
+ else {
+ /* NumPy 1.20, 2020-05-08 */
+ /* Note, max_dims should normally always be NPY_MAXDIMS here */
+ if (PyErr_WarnFormat(visibleDeprecationWarning, 1,
+ "Creating an ndarray from nested sequences exceeding "
+ "the maximum number of dimensions of %d is deprecated. "
+ "If you mean to do this, you must specify "
+ "'dtype=object' when creating the ndarray.",
+ max_dims) < 0) {
+ goto fail;
+ }
+ }
+ /* Ensure that ragged arrays always return object dtype */
+ Py_XSETREF(*out_descr, PyArray_DescrFromType(NPY_OBJECT));
+ }
+ else if (fixed_DType->type_num != NPY_OBJECT) {
+ /* Only object DType supports ragged cases unify error */
+ if (!too_deep) {
+ PyObject *shape = PyArray_IntTupleFromIntp(ndim, out_shape);
+ PyErr_Format(PyExc_ValueError,
+ "setting an array element with a sequence. The "
+ "requested array has an inhomogeneous shape after "
+ "%d dimensions. The detected shape was "
+ "%R + inhomogeneous part.",
+ ndim, shape);
+ Py_DECREF(shape);
+ goto fail;
+ }
+ else {
+ PyErr_Format(PyExc_ValueError,
+ "setting an array element with a sequence. The "
+ "requested array would exceed the maximum number of "
+ "dimension of %d.",
+ max_dims);
+ goto fail;
+ }
+ }
+
+ /*
+ * If the array is ragged, the cache may be too deep, so clean it.
+ * The cache is left at the same depth as the array though.
+ */
+ coercion_cache_obj **next_ptr = coercion_cache_head;
+ coercion_cache_obj *current = *coercion_cache_head; /* item to check */
+ while (current != NULL) {
+ if (current->depth > ndim) {
+ /* delete "next" cache item and advanced it (unlike later) */
+ current = npy_unlink_coercion_cache(current);
+ continue;
+ }
+ /* advance both prev and next, and set prev->next to new item */
+ *next_ptr = current;
+ next_ptr = &(current->next);
+ current = current->next;
+ }
+ *next_ptr = NULL;
+ }
+ /* We could check here for max-ndims being reached as well */
+
+ if (requested_descr != NULL) {
+ /* descriptor was provided, we did not accidentally change it */
+ assert(*out_descr == requested_descr);
+ }
+ else if (NPY_UNLIKELY(*out_descr == NULL)) {
+ /*
+ * When the object contained no elements (sequence of length zero),
+ * the no descriptor may have been found. When a DType was requested
+ * we use it to define the output dtype.
+ * Otherwise, out_descr will remain NULL and the caller has to set
+ * the correct default.
+ */
+ if (fixed_DType != NULL) {
+ if (fixed_DType->default_descr == NULL) {
+ Py_INCREF(fixed_DType->singleton);
+ *out_descr = fixed_DType->singleton;
+ }
+ else {
+ *out_descr = fixed_DType->default_descr(fixed_DType);
+ if (*out_descr == NULL) {
+ goto fail;
+ }
+ }
+ }
+ }
+ return ndim;
+
+ fail:
+ npy_free_coercion_cache(*coercion_cache_head);
+ *coercion_cache_head = NULL;
+ Py_XSETREF(*out_descr, NULL);
+ return -1;
+}
+
+
+
+/**
+ * Check the descriptor is a legacy "flexible" DType instance, this is
+ * an instance which is (normally) not attached to an array, such as a string
+ * of length 0 or a datetime with no unit.
+ * These should be largely deprecated, and represent only the DType class
+ * for most `dtype` parameters.
+ *
+ * TODO: This function should eventually recieve a deprecation warning and
+ * be removed.
+ *
+ * @param descr
+ * @return 1 if this is not a concrete dtype instance 0 otherwise
+ */
+static int
+descr_is_legacy_parametric_instance(PyArray_Descr *descr)
+{
+ if (PyDataType_ISUNSIZED(descr)) {
+ return 1;
+ }
+ /* Flexible descr with generic time unit (which can be adapted) */
+ if (PyDataType_ISDATETIME(descr)) {
+ PyArray_DatetimeMetaData *meta;
+ meta = get_datetime_metadata_from_dtype(descr);
+ if (meta->base == NPY_FR_GENERIC) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * Given either a DType instance or class, (or legacy flexible instance),
+ * ands sets output dtype instance and DType class. Both results may be
+ * NULL, but if `out_descr` is set `out_DType` will always be the
+ * corresponding class.
+ *
+ * @param dtype
+ * @param out_descr
+ * @param out_DType
+ * @return 0 on success -1 on failure
+ */
+NPY_NO_EXPORT int
+PyArray_ExtractDTypeAndDescriptor(PyObject *dtype,
+ PyArray_Descr **out_descr, PyArray_DTypeMeta **out_DType)
+{
+ *out_DType = NULL;
+ *out_descr = NULL;
+
+ if (dtype != NULL) {
+ if (PyObject_TypeCheck(dtype, (PyTypeObject *)&PyArrayDTypeMeta_Type)) {
+ assert(dtype != (PyObject * )&PyArrayDescr_Type); /* not np.dtype */
+ *out_DType = (PyArray_DTypeMeta *)dtype;
+ Py_INCREF(*out_DType);
+ }
+ else if (PyObject_TypeCheck((PyObject *)Py_TYPE(dtype),
+ (PyTypeObject *)&PyArrayDTypeMeta_Type)) {
+ *out_DType = NPY_DTYPE(dtype);
+ Py_INCREF(*out_DType);
+ if (!descr_is_legacy_parametric_instance((PyArray_Descr *)dtype)) {
+ *out_descr = (PyArray_Descr *)dtype;
+ Py_INCREF(*out_descr);
+ }
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "dtype parameter must be a DType instance or class.");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
+/*
+ * Python API function to expose the dtype+shape discovery functionality
+ * directly.
+ */
+NPY_NO_EXPORT PyObject *
+_discover_array_parameters(PyObject *NPY_UNUSED(self),
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = {"obj", "dtype", NULL};
+
+ PyObject *obj;
+ PyObject *dtype = NULL;
+ PyArray_Descr *fixed_descriptor = NULL;
+ PyArray_DTypeMeta *fixed_DType = NULL;
+ npy_intp shape[NPY_MAXDIMS];
+
+ if (!PyArg_ParseTupleAndKeywords(
+ args, kwargs, "O|O:_discover_array_parameters", kwlist,
+ &obj, &dtype)) {
+ return NULL;
+ }
+
+ if (PyArray_ExtractDTypeAndDescriptor(dtype,
+ &fixed_descriptor, &fixed_DType) < 0) {
+ return NULL;
+ }
+
+ coercion_cache_obj *coercion_cache = NULL;
+ PyObject *out_dtype = NULL;
+ int ndim = PyArray_DiscoverDTypeAndShape(
+ obj, NPY_MAXDIMS, shape,
+ &coercion_cache,
+ fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype);
+ Py_XDECREF(fixed_DType);
+ Py_XDECREF(fixed_descriptor);
+ if (ndim < 0) {
+ return NULL;
+ }
+ npy_free_coercion_cache(coercion_cache);
+ if (out_dtype == NULL) {
+ /* Empty sequence, report this as None. */
+ out_dtype = Py_None;
+ Py_INCREF(Py_None);
+ }
+
+ PyObject *shape_tuple = PyArray_IntTupleFromIntp(ndim, shape);
+ if (shape_tuple == NULL) {
+ return NULL;
+ }
+
+ PyObject *res = PyTuple_Pack(2, (PyObject *)out_dtype, shape_tuple);
+ Py_DECREF(out_dtype);
+ Py_DECREF(shape_tuple);
+ return res;
+}
diff --git a/numpy/core/src/multiarray/array_coercion.h b/numpy/core/src/multiarray/array_coercion.h
new file mode 100644
index 000000000..90ce0355a
--- /dev/null
+++ b/numpy/core/src/multiarray/array_coercion.h
@@ -0,0 +1,58 @@
+#ifndef _NPY_ARRAY_COERCION_H
+#define _NPY_ARRAY_COERCION_H
+
+
+/*
+ * We do not want to coerce arrays many times unless absolutely necessary.
+ * The same goes for sequences, so everything we have seen, we will have
+ * to store somehow. This is a linked list of these objects.
+ */
+typedef struct coercion_cache_obj {
+ PyObject *converted_obj;
+ PyObject *arr_or_sequence;
+ struct coercion_cache_obj *next;
+ npy_bool sequence;
+ int depth; /* the dimension at which this object was found. */
+} coercion_cache_obj;
+
+
+NPY_NO_EXPORT int
+_PyArray_MapPyTypeToDType(
+ PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef);
+
+NPY_NO_EXPORT int
+PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value);
+
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype);
+
+NPY_NO_EXPORT int
+PyArray_DiscoverDTypeAndShape(
+ PyObject *obj, int max_dims,
+ npy_intp out_shape[NPY_MAXDIMS],
+ coercion_cache_obj **coercion_cache,
+ PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
+ PyArray_Descr **out_descr);
+
+NPY_NO_EXPORT int
+PyArray_ExtractDTypeAndDescriptor(PyObject *dtype,
+ PyArray_Descr **out_descr, PyArray_DTypeMeta **out_DType);
+
+NPY_NO_EXPORT PyObject *
+_discover_array_parameters(PyObject *NPY_UNUSED(self),
+ PyObject *args, PyObject *kwargs);
+
+
+/* Would make sense to inline the freeing functions everywhere */
+/* Frees the coercion cache object recursively. */
+NPY_NO_EXPORT void
+npy_free_coercion_cache(coercion_cache_obj *first);
+
+/* unlink a single item and return the next */
+NPY_NO_EXPORT coercion_cache_obj *
+npy_unlink_coercion_cache(coercion_cache_obj *current);
+
+NPY_NO_EXPORT int
+PyArray_AssignFromCache(PyArrayObject *self, coercion_cache_obj *cache);
+
+#endif /* _NPY_ARRAY_COERCION_H */
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index dedaf38eb..95c650674 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -43,6 +43,7 @@ maintainer email: oliphant.travis@ieee.org
#include "arrayobject.h"
#include "conversion_utils.h"
#include "ctors.h"
+#include "dtypemeta.h"
#include "methods.h"
#include "descriptor.h"
#include "iterators.h"
@@ -57,6 +58,7 @@ maintainer email: oliphant.travis@ieee.org
#include "strfuncs.h"
#include "binop_override.h"
+#include "array_coercion.h"
/*NUMPY_API
Compute the size of an array (in number of items)
@@ -235,136 +237,96 @@ PyArray_SetBaseObject(PyArrayObject *arr, PyObject *obj)
}
+/**
+ * Assign an arbitrary object a NumPy array. This is largely basically
+ * identical to PyArray_FromAny, but assigns directly to the output array.
+ *
+ * @param dest Array to be written to
+ * @param src_object Object to be assigned, array-coercion rules apply.
+ * @return 0 on success -1 on failures.
+ */
/*NUMPY_API*/
NPY_NO_EXPORT int
PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object)
{
int ret = 0;
- PyArrayObject *src;
+ PyArrayObject *view;
PyArray_Descr *dtype = NULL;
- int ndim = 0;
+ int ndim;
npy_intp dims[NPY_MAXDIMS];
+ coercion_cache_obj *cache = NULL;
- Py_INCREF(src_object);
/*
- * Special code to mimic Numeric behavior for
- * character arrays.
+ * We have to set the maximum number of dimensions here to support
+ * sequences within object arrays.
*/
- if (PyArray_DESCR(dest)->type == NPY_CHARLTR &&
- PyArray_NDIM(dest) > 0 &&
- PyString_Check(src_object)) {
- npy_intp n_new, n_old;
- char *new_string;
- PyObject *tmp;
+ ndim = PyArray_DiscoverDTypeAndShape(src_object,
+ PyArray_NDIM(dest), dims, &cache,
+ NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype);
+ if (ndim < 0) {
+ return -1;
+ }
- n_new = PyArray_DIMS(dest)[PyArray_NDIM(dest)-1];
- n_old = PyString_Size(src_object);
- if (n_new > n_old) {
- new_string = malloc(n_new);
- if (new_string == NULL) {
- Py_DECREF(src_object);
- PyErr_NoMemory();
- return -1;
- }
- memcpy(new_string, PyString_AS_STRING(src_object), n_old);
- memset(new_string + n_old, ' ', n_new - n_old);
- tmp = PyString_FromStringAndSize(new_string, n_new);
- free(new_string);
- Py_DECREF(src_object);
- src_object = tmp;
- }
+ if (cache != NULL && !(cache->sequence)) {
+ /* The input is an array or array object, so assign directly */
+ assert(cache->converted_obj == src_object);
+ view = (PyArrayObject *)cache->arr_or_sequence;
+ Py_DECREF(dtype);
+ ret = PyArray_AssignArray(dest, view, NULL, NPY_UNSAFE_CASTING);
+ npy_free_coercion_cache(cache);
+ return ret;
}
/*
- * Get either an array object we can copy from, or its parameters
- * if there isn't a convenient array available.
+ * We may need to broadcast, due to shape mismatches, in this case
+ * create a temporary array first, and assign that after filling
+ * it from the sequences/scalar.
*/
- if (PyArray_GetArrayParamsFromObject_int(src_object,
- PyArray_DESCR(dest), 0, &dtype, &ndim, dims, &src) < 0) {
- Py_DECREF(src_object);
- return -1;
+ if (ndim != PyArray_NDIM(dest) ||
+ !PyArray_CompareLists(PyArray_DIMS(dest), dims, ndim)) {
+ /*
+ * Broadcasting may be necessary, so assign to a view first.
+ * This branch could lead to a shape mismatch error later.
+ */
+ assert (ndim <= PyArray_NDIM(dest)); /* would error during discovery */
+ view = (PyArrayObject *) PyArray_NewFromDescr(
+ &PyArray_Type, dtype, ndim, dims, NULL, NULL,
+ PyArray_FLAGS(dest) & NPY_ARRAY_F_CONTIGUOUS, NULL);
+ if (view == NULL) {
+ npy_free_coercion_cache(cache);
+ return -1;
+ }
+ }
+ else {
+ Py_DECREF(dtype);
+ view = dest;
}
- /* If it's not an array, either assign from a sequence or as a scalar */
- if (src == NULL) {
- /* If the input is scalar */
- if (ndim == 0) {
- /* If there's one dest element and src is a Python scalar */
- if (PyArray_IsScalar(src_object, Generic)) {
- char *value;
- int retcode;
-
- value = scalar_value(src_object, dtype);
- if (value == NULL) {
- Py_DECREF(dtype);
- Py_DECREF(src_object);
- return -1;
- }
-
- /* TODO: switch to SAME_KIND casting */
- retcode = PyArray_AssignRawScalar(dest, dtype, value,
- NULL, NPY_UNSAFE_CASTING);
- Py_DECREF(dtype);
- Py_DECREF(src_object);
- return retcode;
- }
- /* Otherwise use the dtype's setitem function */
- else {
- if (PyArray_SIZE(dest) == 1) {
- Py_DECREF(dtype);
- Py_DECREF(src_object);
- ret = PyArray_SETITEM(dest, PyArray_DATA(dest), src_object);
- return ret;
- }
- else {
- src = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype, 0, NULL, NULL,
- NULL, 0, NULL);
- if (src == NULL) {
- Py_DECREF(src_object);
- return -1;
- }
- if (PyArray_SETITEM(src, PyArray_DATA(src), src_object) < 0) {
- Py_DECREF(src_object);
- Py_DECREF(src);
- return -1;
- }
- }
- }
+ /* Assign the values to `view` (whichever array that is) */
+ if (cache == NULL) {
+ /* single (non-array) item, assign immediately */
+ if (PyArray_Pack(
+ PyArray_DESCR(view), PyArray_DATA(view), src_object) < 0) {
+ goto fail;
}
- else {
- /*
- * If there are more than enough dims, use AssignFromSequence
- * because it can handle this style of broadcasting.
- */
- if (ndim >= PyArray_NDIM(dest)) {
- int res;
- Py_DECREF(dtype);
- res = PyArray_AssignFromSequence(dest, src_object);
- Py_DECREF(src_object);
- return res;
- }
- /* Otherwise convert to an array and do an array-based copy */
- src = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- dtype, ndim, dims, NULL, NULL,
- PyArray_ISFORTRAN(dest), NULL);
- if (src == NULL) {
- Py_DECREF(src_object);
- return -1;
- }
- if (PyArray_AssignFromSequence(src, src_object) < 0) {
- Py_DECREF(src);
- Py_DECREF(src_object);
- return -1;
- }
+ }
+ else {
+ if (PyArray_AssignFromCache(view, cache) < 0) {
+ goto fail;
}
}
-
- /* If it's an array, do a move (handling possible overlapping data) */
- ret = PyArray_MoveInto(dest, src);
- Py_DECREF(src);
- Py_DECREF(src_object);
+ if (view == dest) {
+ return 0;
+ }
+ ret = PyArray_AssignArray(dest, view, NULL, NPY_UNSAFE_CASTING);
+ Py_DECREF(view);
return ret;
+
+ fail:
+ if (view != dest) {
+ Py_DECREF(view);
+ }
+ return -1;
}
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 55ae73779..2abc79167 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -9,6 +9,7 @@
#include "npy_pycompat.h"
#include "common.h"
+#include "abstractdtypes.h"
#include "usertypes.h"
#include "common.h"
@@ -16,6 +17,7 @@
#include "get_attr_string.h"
#include "mem_overlap.h"
+#include "array_coercion.h"
/*
* The casting to use for implicit assignment operations resulting from
@@ -44,88 +46,19 @@ _array_find_python_scalar_type(PyObject *op)
else if (PyComplex_Check(op)) {
return PyArray_DescrFromType(NPY_CDOUBLE);
}
- else if (PyInt_Check(op)) {
- /* bools are a subclass of int */
- if (PyBool_Check(op)) {
- return PyArray_DescrFromType(NPY_BOOL);
- }
- else {
- return PyArray_DescrFromType(NPY_LONG);
- }
- }
else if (PyLong_Check(op)) {
- /* check to see if integer can fit into a longlong or ulonglong
- and return that --- otherwise return object */
- if ((PyLong_AsLongLong(op) == -1) && PyErr_Occurred()) {
- PyErr_Clear();
- }
- else {
- return PyArray_DescrFromType(NPY_LONGLONG);
- }
-
- if ((PyLong_AsUnsignedLongLong(op) == (unsigned long long) -1)
- && PyErr_Occurred()){
- PyErr_Clear();
- }
- else {
- return PyArray_DescrFromType(NPY_ULONGLONG);
- }
-
- return PyArray_DescrFromType(NPY_OBJECT);
+ return PyArray_PyIntAbstractDType.discover_descr_from_pyobject(
+ &PyArray_PyIntAbstractDType, op);
}
return NULL;
}
-/*
- * These constants are used to signal that the recursive dtype determination in
- * PyArray_DTypeFromObject encountered a string type, and that the recursive
- * search must be restarted so that string representation lengths can be
- * computed for all scalar types.
- */
-#define RETRY_WITH_STRING 1
-#define RETRY_WITH_UNICODE 2
-
-/*
- * Recursively examines the object to determine an appropriate dtype
- * to use for converting to an ndarray.
- *
- * 'obj' is the object to be converted to an ndarray.
- *
- * 'maxdims' is the maximum recursion depth.
- *
- * 'out_dtype' should be either NULL or a minimal starting dtype when
- * the function is called. It is updated with the results of type
- * promotion. This dtype does not get updated when processing NA objects.
- * This is reset to NULL on failure.
- *
- * Returns 0 on success, -1 on failure.
- */
- NPY_NO_EXPORT int
-PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
-{
- int res;
-
- res = PyArray_DTypeFromObjectHelper(obj, maxdims, out_dtype, 0);
- if (res == RETRY_WITH_STRING) {
- res = PyArray_DTypeFromObjectHelper(obj, maxdims,
- out_dtype, NPY_STRING);
- if (res == RETRY_WITH_UNICODE) {
- res = PyArray_DTypeFromObjectHelper(obj, maxdims,
- out_dtype, NPY_UNICODE);
- }
- }
- else if (res == RETRY_WITH_UNICODE) {
- res = PyArray_DTypeFromObjectHelper(obj, maxdims,
- out_dtype, NPY_UNICODE);
- }
- return res;
-}
/*
* Get a suitable string dtype by calling `__str__`.
* For `np.bytes_`, this assumes an ASCII encoding.
*/
-static PyArray_Descr *
+NPY_NO_EXPORT PyArray_Descr *
PyArray_DTypeFromObjectStringDiscovery(
PyObject *obj, PyArray_Descr *last_dtype, int string_type)
{
@@ -159,8 +92,8 @@ PyArray_DTypeFromObjectStringDiscovery(
return NULL;
}
if (last_dtype != NULL &&
- last_dtype->type_num == string_type &&
- last_dtype->elsize >= itemsize) {
+ last_dtype->type_num == string_type &&
+ last_dtype->elsize >= itemsize) {
Py_INCREF(last_dtype);
return last_dtype;
}
@@ -172,348 +105,28 @@ PyArray_DTypeFromObjectStringDiscovery(
return dtype;
}
+
+/*
+ * This function is now identical to the new PyArray_DiscoverDTypeAndShape
+ * but only returns the the dtype. It should in most cases be slowly phased
+ * out. (Which may need some refactoring to PyArray_FromAny to make it simpler)
+ */
NPY_NO_EXPORT int
-PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
- PyArray_Descr **out_dtype, int string_type)
+PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
{
- int i, size;
- PyArray_Descr *dtype = NULL;
- PyObject *ip;
- Py_buffer buffer_view;
- /* types for sequence handling */
- PyObject ** objects;
- PyObject * seq;
- PyTypeObject * common_type;
-
- /* Check if it's an ndarray */
- if (PyArray_Check(obj)) {
- dtype = PyArray_DESCR((PyArrayObject *)obj);
- Py_INCREF(dtype);
- goto promote_types;
- }
-
- /* See if it's a python None */
- if (obj == Py_None) {
- dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (dtype == NULL) {
- goto fail;
- }
- goto promote_types;
- }
- /* Check if it's a NumPy scalar */
- else if (PyArray_IsScalar(obj, Generic)) {
- if (!string_type) {
- dtype = PyArray_DescrFromScalar(obj);
- if (dtype == NULL) {
- goto fail;
- }
- }
- else {
- dtype = PyArray_DTypeFromObjectStringDiscovery(
- obj, *out_dtype, string_type);
- if (dtype == NULL) {
- goto fail;
- }
-
- /* nothing to do, dtype is already correct */
- if (dtype == *out_dtype){
- Py_DECREF(dtype);
- return 0;
- }
- }
- goto promote_types;
- }
-
- /* Check if it's a Python scalar */
- dtype = _array_find_python_scalar_type(obj);
- if (dtype != NULL) {
- if (string_type) {
- /* dtype is not used in this (string discovery) branch */
- Py_DECREF(dtype);
- dtype = PyArray_DTypeFromObjectStringDiscovery(
- obj, *out_dtype, string_type);
- if (dtype == NULL) {
- goto fail;
- }
-
- /* nothing to do, dtype is already correct */
- if (dtype == *out_dtype){
- Py_DECREF(dtype);
- return 0;
- }
- }
- goto promote_types;
- }
-
- /* Check if it's an ASCII string */
- if (PyBytes_Check(obj)) {
- int itemsize = PyString_GET_SIZE(obj);
-
- /* If it's already a big enough string, don't bother type promoting */
- if (*out_dtype != NULL &&
- (*out_dtype)->type_num == NPY_STRING &&
- (*out_dtype)->elsize >= itemsize) {
- return 0;
- }
- dtype = PyArray_DescrNewFromType(NPY_STRING);
- if (dtype == NULL) {
- goto fail;
- }
- dtype->elsize = itemsize;
- goto promote_types;
- }
-
- /* Check if it's a Unicode string */
- if (PyUnicode_Check(obj)) {
- int itemsize = PyUnicode_GetLength(obj);
- if (itemsize < 0) {
- goto fail;
- }
- itemsize *= 4;
+ coercion_cache_obj *cache = NULL;
+ npy_intp shape[NPY_MAXDIMS];
+ int ndim;
- /*
- * If it's already a big enough unicode object,
- * don't bother type promoting
- */
- if (*out_dtype != NULL &&
- (*out_dtype)->type_num == NPY_UNICODE &&
- (*out_dtype)->elsize >= itemsize) {
- return 0;
- }
- dtype = PyArray_DescrNewFromType(NPY_UNICODE);
- if (dtype == NULL) {
- goto fail;
- }
- dtype->elsize = itemsize;
- goto promote_types;
- }
-
- /* PEP 3118 buffer interface */
- if (PyObject_CheckBuffer(obj) == 1) {
- memset(&buffer_view, 0, sizeof(Py_buffer));
- if (PyObject_GetBuffer(obj, &buffer_view,
- PyBUF_FORMAT|PyBUF_STRIDES) == 0 ||
- PyObject_GetBuffer(obj, &buffer_view,
- PyBUF_FORMAT|PyBUF_SIMPLE) == 0) {
-
- PyErr_Clear();
- dtype = _descriptor_from_pep3118_format(buffer_view.format);
- PyBuffer_Release(&buffer_view);
- if (dtype) {
- goto promote_types;
- }
- }
- else if (PyObject_GetBuffer(obj, &buffer_view, PyBUF_STRIDES) == 0 ||
- PyObject_GetBuffer(obj, &buffer_view, PyBUF_SIMPLE) == 0) {
-
- PyErr_Clear();
- dtype = PyArray_DescrNewFromType(NPY_VOID);
- dtype->elsize = buffer_view.itemsize;
- PyBuffer_Release(&buffer_view);
- goto promote_types;
- }
- else {
- PyErr_Clear();
- }
- }
-
- /* The array interface */
- ip = PyArray_LookupSpecial_OnInstance(obj, "__array_interface__");
- if (ip != NULL) {
- if (PyDict_Check(ip)) {
- PyObject *typestr;
- PyObject *tmp = NULL;
- typestr = _PyDict_GetItemStringWithError(ip, "typestr");
- if (typestr == NULL && PyErr_Occurred()) {
- goto fail;
- }
- /* Allow unicode type strings */
- if (typestr && PyUnicode_Check(typestr)) {
- tmp = PyUnicode_AsASCIIString(typestr);
- typestr = tmp;
- }
- if (typestr && PyBytes_Check(typestr)) {
- dtype =_array_typedescr_fromstr(PyBytes_AS_STRING(typestr));
- if (tmp == typestr) {
- Py_DECREF(tmp);
- }
- Py_DECREF(ip);
- if (dtype == NULL) {
- goto fail;
- }
- goto promote_types;
- }
- }
- Py_DECREF(ip);
- }
- else if (PyErr_Occurred()) {
- PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
- }
-
-
- /* The array struct interface */
- ip = PyArray_LookupSpecial_OnInstance(obj, "__array_struct__");
- if (ip != NULL) {
- PyArrayInterface *inter;
- char buf[40];
-
- if (NpyCapsule_Check(ip)) {
- inter = (PyArrayInterface *)NpyCapsule_AsVoidPtr(ip);
- if (inter->two == 2) {
- PyOS_snprintf(buf, sizeof(buf),
- "|%c%d", inter->typekind, inter->itemsize);
- dtype = _array_typedescr_fromstr(buf);
- Py_DECREF(ip);
- if (dtype == NULL) {
- goto fail;
- }
- goto promote_types;
- }
- }
- Py_DECREF(ip);
- }
- else if (PyErr_Occurred()) {
- PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
- }
-
- /* The __array__ attribute */
- ip = PyArray_LookupSpecial_OnInstance(obj, "__array__");
- if (ip != NULL) {
- Py_DECREF(ip);
- ip = PyObject_CallMethod(obj, "__array__", NULL);
- if(ip && PyArray_Check(ip)) {
- dtype = PyArray_DESCR((PyArrayObject *)ip);
- Py_INCREF(dtype);
- Py_DECREF(ip);
- goto promote_types;
- }
- Py_XDECREF(ip);
- if (PyErr_Occurred()) {
- goto fail;
- }
- }
- else if (PyErr_Occurred()) {
- PyErr_Clear(); /* TODO[gh-14801]: propagate crashes during attribute access? */
- }
-
- /*
- * If we reached the maximum recursion depth without hitting one
- * of the above cases, and obj isn't a sequence-like object, the output
- * dtype should be either OBJECT or a user-defined type.
- *
- * Note that some libraries define sequence-like classes but want them to
- * be treated as objects, and they expect numpy to treat it as an object if
- * __len__ is not defined.
- */
- if (maxdims == 0 || !PySequence_Check(obj) || PySequence_Size(obj) < 0) {
- /* clear any PySequence_Size error which corrupts further calls */
- PyErr_Clear();
-
- if (*out_dtype == NULL || (*out_dtype)->type_num != NPY_OBJECT) {
- Py_XDECREF(*out_dtype);
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (*out_dtype == NULL) {
- return -1;
- }
- }
- return 0;
- }
-
- /*
- * The C-API recommends calling PySequence_Fast before any of the other
- * PySequence_Fast* functions. This is required for PyPy
- */
- seq = PySequence_Fast(obj, "Could not convert object to sequence");
- if (seq == NULL) {
- goto fail;
- }
-
- /* Recursive case, first check the sequence contains only one type */
- size = PySequence_Fast_GET_SIZE(seq);
- /* objects is borrowed, do not release seq */
- objects = PySequence_Fast_ITEMS(seq);
- common_type = size > 0 ? Py_TYPE(objects[0]) : NULL;
- for (i = 1; i < size; ++i) {
- if (Py_TYPE(objects[i]) != common_type) {
- common_type = NULL;
- break;
- }
- }
-
- /* all types are the same and scalar, one recursive call is enough */
- if (common_type != NULL && !string_type &&
- (common_type == &PyFloat_Type ||
-/* TODO: we could add longs if we add a range check */
- common_type == &PyBool_Type ||
- common_type == &PyComplex_Type)) {
- size = 1;
- }
-
- /* Recursive call for each sequence item */
- for (i = 0; i < size; ++i) {
- int res = PyArray_DTypeFromObjectHelper(objects[i], maxdims - 1,
- out_dtype, string_type);
- if (res < 0) {
- Py_DECREF(seq);
- goto fail;
- }
- else if (res > 0) {
- Py_DECREF(seq);
- return res;
- }
+ ndim = PyArray_DiscoverDTypeAndShape(
+ obj, maxdims, shape, &cache, NULL, NULL, out_dtype);
+ if (ndim < 0) {
+ return -1;
}
-
- Py_DECREF(seq);
-
+ npy_free_coercion_cache(cache);
return 0;
-
-
-promote_types:
- /* Set 'out_dtype' if it's NULL */
- if (*out_dtype == NULL) {
- if (!string_type && dtype->type_num == NPY_STRING) {
- Py_DECREF(dtype);
- return RETRY_WITH_STRING;
- }
- if (!string_type && dtype->type_num == NPY_UNICODE) {
- Py_DECREF(dtype);
- return RETRY_WITH_UNICODE;
- }
- *out_dtype = dtype;
- return 0;
- }
- /* Do type promotion with 'out_dtype' */
- else {
- PyArray_Descr *res_dtype = PyArray_PromoteTypes(dtype, *out_dtype);
- Py_DECREF(dtype);
- if (res_dtype == NULL) {
- goto fail;
- }
- if (!string_type &&
- res_dtype->type_num == NPY_UNICODE &&
- (*out_dtype)->type_num != NPY_UNICODE) {
- Py_DECREF(res_dtype);
- return RETRY_WITH_UNICODE;
- }
- if (!string_type &&
- res_dtype->type_num == NPY_STRING &&
- (*out_dtype)->type_num != NPY_STRING) {
- Py_DECREF(res_dtype);
- return RETRY_WITH_STRING;
- }
- Py_DECREF(*out_dtype);
- *out_dtype = res_dtype;
- return 0;
- }
-
-fail:
- Py_XDECREF(*out_dtype);
- *out_dtype = NULL;
- return -1;
}
-#undef RETRY_WITH_STRING
-#undef RETRY_WITH_UNICODE
/* new reference */
NPY_NO_EXPORT PyArray_Descr *
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index 4ba25c079..793cefaf8 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -20,6 +20,11 @@
#define NPY_BEGIN_THREADS_NDITER(iter)
#endif
+
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_DTypeFromObjectStringDiscovery(
+ PyObject *obj, PyArray_Descr *last_dtype, int string_type);
+
/*
* Recursively examines the object to determine an appropriate dtype
* to use for converting to an ndarray.
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index 7bd088677..94cd1e5fa 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -12,8 +12,10 @@
#include "npy_pycompat.h"
#include "numpy/npy_math.h"
+#include "array_coercion.h"
#include "common.h"
#include "ctors.h"
+#include "dtypemeta.h"
#include "scalartypes.h"
#include "mapping.h"
@@ -47,11 +49,11 @@ PyArray_CastToType(PyArrayObject *arr, PyArray_Descr *dtype, int is_f_order)
{
PyObject *out;
- /* If the requested dtype is flexible, adapt it */
- dtype = PyArray_AdaptFlexibleDType((PyObject *)arr, PyArray_DESCR(arr), dtype);
+ Py_SETREF(dtype, PyArray_AdaptDescriptorToArray(arr, (PyObject *)dtype));
if (dtype == NULL) {
return NULL;
}
+
out = PyArray_NewFromDescr(Py_TYPE(arr), dtype,
PyArray_NDIM(arr),
PyArray_DIMS(arr),
@@ -128,24 +130,22 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num)
}
/*
+ * Legacy function to find the correct dtype when casting from any built-in
+ * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic
+ * units.
+ *
* This function returns a dtype based on flex_dtype and the values in
- * data_dtype and data_obj. It also calls Py_DECREF on the flex_dtype. If the
+ * data_dtype. It also calls Py_DECREF on the flex_dtype. If the
* flex_dtype is not flexible, it returns it as-is.
*
* Usually, if data_obj is not an array, dtype should be the result
* given by the PyArray_GetArrayParamsFromObject function.
*
- * The data_obj may be NULL if just a dtype is known for the source.
- *
* If *flex_dtype is NULL, returns immediately, without setting an
* exception, leaving any previous error handling intact.
- *
- * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID,
- * and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT PyArray_Descr *
-PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
- PyArray_Descr *flex_dtype)
+PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype)
{
PyArray_DatetimeMetaData *meta;
PyArray_Descr *retval = NULL;
@@ -227,73 +227,6 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
break;
case NPY_OBJECT:
size = 64;
- if ((flex_type_num == NPY_STRING ||
- flex_type_num == NPY_UNICODE) &&
- data_obj != NULL) {
- PyObject *list;
-
- if (PyArray_CheckScalar(data_obj)) {
- list = PyArray_ToList((PyArrayObject *)data_obj);
- if (list != NULL) {
- PyObject *s = PyObject_Str(list);
- if (s == NULL) {
- Py_DECREF(list);
- Py_DECREF(retval);
- return NULL;
- }
- else {
- size = PyObject_Length(s);
- Py_DECREF(s);
- }
- Py_DECREF(list);
- }
- }
- else if (PyArray_Check(data_obj)) {
- /*
- * Convert data array to list of objects since
- * GetArrayParamsFromObject won't iterate over
- * array.
- */
- PyArray_Descr *dtype = NULL;
- PyArrayObject *arr = NULL;
- int result;
- int ndim = 0;
- npy_intp dims[NPY_MAXDIMS];
- list = PyArray_ToList((PyArrayObject *)data_obj);
- result = PyArray_GetArrayParamsFromObject_int(
- list,
- retval,
- 0, &dtype,
- &ndim, dims, &arr);
- Py_DECREF(list);
- Py_XDECREF(arr);
- if (result < 0) {
- Py_XDECREF(dtype);
- Py_DECREF(retval);
- return NULL;
- }
- if (result == 0 && dtype != NULL) {
- if (flex_type_num == NPY_UNICODE) {
- size = dtype->elsize / 4;
- }
- else {
- size = dtype->elsize;
- }
- }
- Py_XDECREF(dtype);
- }
- else if (PyArray_IsPythonScalar(data_obj)) {
- PyObject *s = PyObject_Str(data_obj);
- if (s == NULL) {
- Py_DECREF(retval);
- return NULL;
- }
- else {
- size = PyObject_Length(s);
- Py_DECREF(s);
- }
- }
- }
break;
case NPY_STRING:
case NPY_VOID:
@@ -353,12 +286,6 @@ PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
retval = create_datetime_dtype(flex_type_num, meta);
Py_DECREF(flex_dtype);
}
- else if (data_obj != NULL) {
- /* Detect the unit from the input's data */
- retval = find_object_datetime_type(data_obj,
- flex_type_num);
- Py_DECREF(flex_dtype);
- }
}
}
else {
@@ -1292,7 +1219,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
PyArray_Descr *temp = PyArray_DescrNew(type1);
PyDataType_MAKEUNSIZED(temp);
- temp = PyArray_AdaptFlexibleDType(NULL, type2, temp);
+ temp = PyArray_AdaptFlexibleDType(type2, temp);
if (temp == NULL) {
return NULL;
}
@@ -1333,7 +1260,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
PyArray_Descr *ret = NULL;
PyArray_Descr *temp = PyArray_DescrNew(type1);
PyDataType_MAKEUNSIZED(temp);
- temp = PyArray_AdaptFlexibleDType(NULL, type2, temp);
+ temp = PyArray_AdaptFlexibleDType(type2, temp);
if (temp == NULL) {
return NULL;
}
@@ -1384,7 +1311,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
PyArray_Descr *ret = NULL;
PyArray_Descr *temp = PyArray_DescrNew(type2);
PyDataType_MAKEUNSIZED(temp);
- temp = PyArray_AdaptFlexibleDType(NULL, type1, temp);
+ temp = PyArray_AdaptFlexibleDType(type1, temp);
if (temp == NULL) {
return NULL;
}
@@ -1404,7 +1331,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
PyArray_Descr *ret = NULL;
PyArray_Descr *temp = PyArray_DescrNew(type2);
PyDataType_MAKEUNSIZED(temp);
- temp = PyArray_AdaptFlexibleDType(NULL, type1, temp);
+ temp = PyArray_AdaptFlexibleDType(type1, temp);
if (temp == NULL) {
return NULL;
}
@@ -2154,7 +2081,6 @@ PyArray_ObjectType(PyObject *op, int minimum_type)
return NPY_NOTYPE;
}
}
-
if (PyArray_DTypeFromObject(op, NPY_MAXDIMS, &dtype) < 0) {
return NPY_NOTYPE;
}
@@ -2162,6 +2088,19 @@ PyArray_ObjectType(PyObject *op, int minimum_type)
if (dtype == NULL) {
ret = NPY_DEFAULT_TYPE;
}
+ else if (!NPY_DTYPE(dtype)->legacy) {
+ /*
+ * TODO: If we keep all type number style API working, by defining
+ * type numbers always. We may be able to allow this again.
+ */
+ PyErr_Format(PyExc_TypeError,
+ "This function currently only supports native NumPy dtypes "
+ "and old-style user dtypes, but the dtype was %S.\n"
+ "(The function may need to be updated to support arbitrary"
+ "user dtypes.)",
+ dtype);
+ ret = NPY_NOTYPE;
+ }
else {
ret = dtype->type_num;
}
diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h
index 4a7d85187..9b7f39db2 100644
--- a/numpy/core/src/multiarray/convert_datatype.h
+++ b/numpy/core/src/multiarray/convert_datatype.h
@@ -47,7 +47,6 @@ npy_set_invalid_cast_error(
* and NPY_DATETIME with generic units.
*/
NPY_NO_EXPORT PyArray_Descr *
-PyArray_AdaptFlexibleDType(PyObject *data_obj, PyArray_Descr *data_dtype,
- PyArray_Descr *flex_dtype);
+PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype);
#endif
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index ccebe9da6..0c4ffe141 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -30,6 +30,7 @@
#include <assert.h>
#include "get_attr_string.h"
+#include "array_coercion.h"
/*
* Reading from a file or a string.
@@ -52,9 +53,6 @@
typedef int (*next_element)(void **, void *, PyArray_Descr *, void *);
typedef int (*skip_separator)(void **, const char *, void *);
-static PyObject *
-_array_from_array_like(PyObject *op,
- PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context);
static npy_bool
string_is_fully_read(char const* start, char const* end) {
@@ -455,420 +453,169 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems,
}
}
+
/*
- * adapted from Numarray,
- * a: destination array
- * s: source object, array or sequence
- * dim: current recursion dimension, must be 0 on first call
- * dst: must be NULL on first call
- * it is a view on the destination array viewing the place where to put the
- * data of the current recursion
+ * Recursive helper to assign using a coercion cache. This function
+ * must consume the cache depth first, just as the cache was originally
+ * produced.
*/
-static int
-setArrayFromSequence(PyArrayObject *a, PyObject *s,
- int dim, PyArrayObject * dst)
+NPY_NO_EXPORT int
+PyArray_AssignFromCache_Recursive(
+ PyArrayObject *self, const int ndim, coercion_cache_obj **cache)
{
- Py_ssize_t i, slen;
- int res = -1;
-
- /* first recursion, view equal destination */
- if (dst == NULL)
- dst = a;
+ /* Consume first cache element by extracting information and freeing it */
+ PyObject *original_obj = (*cache)->converted_obj;
+ PyObject *obj = (*cache)->arr_or_sequence;
+ Py_INCREF(obj);
+ npy_bool sequence = (*cache)->sequence;
+ int depth = (*cache)->depth;
+ *cache = npy_unlink_coercion_cache(*cache);
/*
- * This code is to ensure that the sequence access below will
- * return a lower-dimensional sequence.
+ * The maximum depth is special (specifically for objects), but usually
+ * unrolled in the sequence branch below.
*/
-
- /* INCREF on entry DECREF on exit */
- Py_INCREF(s);
-
- PyObject *seq = NULL;
-
- if (PyArray_Check(s)) {
- if (!(PyArray_CheckExact(s))) {
+ if (NPY_UNLIKELY(depth == ndim)) {
+ /*
+ * We have reached the maximum depth. We should simply assign to the
+ * element in principle. There is one exception. If this is a 0-D
+ * array being stored into a 0-D array (but we do not reach here then).
+ */
+ if (PyArray_ISOBJECT(self)) {
+ assert(ndim != 0); /* guaranteed by PyArray_AssignFromCache */
+ assert(PyArray_NDIM(self) == 0);
+ Py_DECREF(obj);
+ return PyArray_Pack(PyArray_DESCR(self), PyArray_BYTES(self),
+ original_obj);
+ }
+ if (sequence) {
/*
- * make sure a base-class array is used so that the dimensionality
- * reduction assumption is correct.
+ * Sanity check which may be removed, the error is raised already
+ * in `PyArray_DiscoverDTypeAndShape`.
*/
- /* This will DECREF(s) if replaced */
- s = PyArray_EnsureArray(s);
- if (s == NULL) {
- goto fail;
- }
- }
-
- /* dst points to correct array subsection */
- if (PyArray_CopyInto(dst, (PyArrayObject *)s) < 0) {
+ assert(0);
+ PyErr_SetString(PyExc_RuntimeError,
+ "setting an array element with a sequence");
goto fail;
}
-
- Py_DECREF(s);
- return 0;
- }
-
- if (dim > PyArray_NDIM(a)) {
- PyErr_Format(PyExc_ValueError,
- "setArrayFromSequence: sequence/array dimensions mismatch.");
- goto fail;
+ else if (original_obj != obj || !PyArray_CheckExact(obj)) {
+ /*
+ * If the leave node is an array-like, but not a numpy array,
+ * we pretend it is an arbitrary scalar. This means that in
+ * most cases (where the dtype is int or float), we will end
+ * up using float(array-like), or int(array-like). That does
+ * not support general casting, but helps Quantity and masked
+ * arrays, because it allows them to raise an error when
+ * `__float__()` or `__int__()` is called.
+ */
+ Py_DECREF(obj);
+ return PyArray_SETITEM(self, PyArray_BYTES(self), original_obj);
+ }
}
- /* Try __array__ before using s as a sequence */
- PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL);
- if (tmp == NULL) {
- goto fail;
- }
- else if (tmp == Py_NotImplemented) {
- Py_DECREF(tmp);
+ /* The element is either a sequence, or an array */
+ if (!sequence) {
+ /* Straight forward array assignment */
+ assert(PyArray_Check(obj));
+ if (PyArray_CopyInto(self, (PyArrayObject *)obj) < 0) {
+ goto fail;
+ }
}
else {
- int r = PyArray_CopyInto(dst, (PyArrayObject *)tmp);
- Py_DECREF(tmp);
- if (r < 0) {
+ assert(depth != ndim);
+ npy_intp length = PySequence_Length(obj);
+ if (length != PyArray_DIMS(self)[0]) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Inconsistent object during array creation? "
+ "Content of sequences changed (length inconsistent).");
goto fail;
}
- Py_DECREF(s);
- return 0;
- }
-
- seq = PySequence_Fast(s, "Could not convert object to sequence");
- if (seq == NULL) {
- goto fail;
- }
- slen = PySequence_Fast_GET_SIZE(seq);
- /*
- * Either the dimensions match, or the sequence has length 1 and can
- * be broadcast to the destination.
- */
- if (slen != PyArray_DIMS(a)[dim] && slen != 1) {
- PyErr_Format(PyExc_ValueError,
- "cannot copy sequence with size %zd to array axis "
- "with dimension %" NPY_INTP_FMT, slen, PyArray_DIMS(a)[dim]);
- goto fail;
- }
+ for (npy_intp i = 0; i < length; i++) {
+ PyObject *value = PySequence_Fast_GET_ITEM(obj, i);
- /* Broadcast the one element from the sequence to all the outputs */
- if (slen == 1) {
- PyObject *o = PySequence_Fast_GET_ITEM(seq, 0);
- npy_intp alen = PyArray_DIM(a, dim);
-
- for (i = 0; i < alen; i++) {
- if ((PyArray_NDIM(a) - dim) > 1) {
- PyArrayObject * tmp =
- (PyArrayObject *)array_item_asarray(dst, i);
- if (tmp == NULL) {
+ if (*cache == NULL || (*cache)->converted_obj != value ||
+ (*cache)->depth != depth + 1) {
+ if (ndim != depth + 1) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Inconsistent object during array creation? "
+ "Content of sequences changed (now too shallow).");
goto fail;
}
-
- res = setArrayFromSequence(a, o, dim+1, tmp);
- Py_DECREF(tmp);
- }
- else {
- char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
- res = PyArray_SETITEM(dst, b, o);
- }
- if (res < 0) {
- goto fail;
- }
- }
- }
- /* Copy element by element */
- else {
- for (i = 0; i < slen; i++) {
- PyObject * o = PySequence_Fast_GET_ITEM(seq, i);
- if ((PyArray_NDIM(a) - dim) > 1) {
- PyArrayObject * tmp =
- (PyArrayObject *)array_item_asarray(dst, i);
- if (tmp == NULL) {
+ /* Straight forward assignment of elements */
+ char *item;
+ item = (PyArray_BYTES(self) + i * PyArray_STRIDES(self)[0]);
+ if (PyArray_Pack(PyArray_DESCR(self), item, value) < 0) {
goto fail;
}
-
- res = setArrayFromSequence(a, o, dim+1, tmp);
- Py_DECREF(tmp);
- }
- else {
- char * b = (PyArray_BYTES(dst) + i * PyArray_STRIDES(dst)[0]);
- res = PyArray_SETITEM(dst, b, o);
- }
- if (res < 0) {
- goto fail;
- }
- }
- }
-
- Py_DECREF(seq);
- Py_DECREF(s);
- return 0;
-
- fail:
- Py_XDECREF(seq);
- Py_DECREF(s);
- return res;
-}
-
-NPY_NO_EXPORT int
-PyArray_AssignFromSequence(PyArrayObject *self, PyObject *v)
-{
- if (!PySequence_Check(v)) {
- PyErr_SetString(PyExc_ValueError,
- "assignment from non-sequence");
- return -1;
- }
- if (PyArray_NDIM(self) == 0) {
- PyErr_SetString(PyExc_ValueError,
- "assignment to 0-d array");
- return -1;
- }
- return setArrayFromSequence(self, v, 0, NULL);
-}
-
-/*
- * The rest of this code is to build the right kind of array
- * from a python object.
- */
-
-static int
-discover_itemsize(PyObject *s, int nd, int *itemsize, int string_type)
-{
- int r;
- npy_intp n, i;
-
- if (PyArray_Check(s)) {
- *itemsize = PyArray_MAX(*itemsize, PyArray_ITEMSIZE((PyArrayObject *)s));
- return 0;
- }
-
- if ((nd == 0) || PyString_Check(s) ||
- PyMemoryView_Check(s) || PyUnicode_Check(s)) {
- /* If an object has no length, leave it be */
- if (string_type && s != NULL &&
- !PyString_Check(s) && !PyUnicode_Check(s)) {
- PyObject *s_string = NULL;
- if (string_type == NPY_STRING) {
- s_string = PyObject_Str(s);
}
else {
- s_string = PyObject_Str(s);
- }
- if (s_string) {
- n = PyObject_Length(s_string);
- Py_DECREF(s_string);
- }
- else {
- n = -1;
+ PyArrayObject *view;
+ view = (PyArrayObject *)array_item_asarray(self, i);
+ if (view < 0) {
+ goto fail;
+ }
+ if (PyArray_AssignFromCache_Recursive(view, ndim, cache) < 0) {
+ Py_DECREF(view);
+ goto fail;
+ }
+ Py_DECREF(view);
}
}
- else {
- n = PyObject_Length(s);
- }
- if (n == -1) {
- PyErr_Clear();
- }
- else {
- *itemsize = PyArray_MAX(*itemsize, n);
- }
- return 0;
}
-
- n = PySequence_Length(s);
- for (i = 0; i < n; i++) {
- PyObject *e = PySequence_GetItem(s,i);
-
- if (e == NULL) {
- return -1;
- }
-
- r = discover_itemsize(e, nd - 1, itemsize, string_type);
- Py_DECREF(e);
- if (r == -1) {
- return -1;
- }
- }
-
+ Py_DECREF(obj);
return 0;
-}
-
-
-typedef enum {
- DISCOVERED_OK = 0,
- DISCOVERED_RAGGED = 1,
- DISCOVERED_OBJECT = 2
-} discovered_t;
-
-static void
-_discover_dimensions_array(PyArrayObject *arr, int *maxndim, npy_intp *d) {
- if (PyArray_NDIM(arr) < *maxndim) {
- *maxndim = PyArray_NDIM(arr);
- }
- for (int i = 0; i < *maxndim; i++) {
- d[i] = PyArray_DIM(arr, i);
- }
+ fail:
+ Py_DECREF(obj);
+ return -1;
}
-/*
- * Take an arbitrary object and discover how many dimensions it
- * has, filling in the dimensions as we go.
+/**
+ * Fills an item based on a coercion cache object. It consumes the cache
+ * object while doing so.
+ *
+ * @param self Array to fill.
+ * @param cache coercion_cache_object, will be consumed. The cache must not
+ * contain a single array (must start with a sequence). The array case
+ * should be handled by `PyArray_FromArray()` before.
+ * @return 0 on success -1 on failure.
*/
-static int
-discover_dimensions(PyObject *obj, int *maxndim, npy_intp *d, int check_it,
- int stop_at_string, int stop_at_tuple,
- discovered_t *out_is_object)
-{
- PyObject *e;
- npy_intp n, i;
- PyObject * seq;
-
- if (*maxndim == 0) {
- return 0;
- }
-
- /* obj is an Array */
- if (PyArray_Check(obj)) {
- _discover_dimensions_array((PyArrayObject *)obj, maxndim, d);
- return 0;
- }
-
- /* obj is a Scalar */
- if (PyArray_IsScalar(obj, Generic)) {
- *maxndim = 0;
- return 0;
- }
-
- /* obj is not a Sequence */
- if (!PySequence_Check(obj) ||
- PySequence_Length(obj) < 0) {
- *maxndim = 0;
- PyErr_Clear();
- return 0;
- }
-
- /* obj is a String */
- if (PyString_Check(obj) ||
- PyUnicode_Check(obj)) {
- if (stop_at_string) {
- *maxndim = 0;
- }
- else {
- d[0] = PySequence_Length(obj);
- *maxndim = 1;
- }
- return 0;
- }
-
- /* obj is a Tuple, but tuples aren't expanded */
- if (stop_at_tuple && PyTuple_Check(obj)) {
- *maxndim = 0;
- return 0;
- }
-
+NPY_NO_EXPORT int
+PyArray_AssignFromCache(PyArrayObject *self, coercion_cache_obj *cache) {
+ int ndim = PyArray_NDIM(self);
/*
- * In the future, the result of `_array_from_array_like` should possibly
- * be cached. This may require passing the correct dtype/writable
- * information already in the dimension discovery step (if they are
- * distinct steps).
+ * Do not support ndim == 0 now with an array in the cache.
+ * The ndim == 0 is special because np.array(np.array(0), dtype=object)
+ * should unpack the inner array.
+ * Since the single-array case is special, it is handled previously
+ * in either case.
*/
- e = _array_from_array_like(obj, NULL, NPY_FALSE, NULL);
- if (e == Py_NotImplemented) {
- Py_DECREF(e);
- }
- else if (e != NULL) {
- _discover_dimensions_array((PyArrayObject *)e, maxndim, d);
- Py_DECREF(e);
- return 0;
- }
- else if (PyErr_Occurred()) {
- /* TODO[gh-14801]: propagate crashes during attribute access? */
- PyErr_Clear();
- }
-
- seq = PySequence_Fast(obj, "Could not convert object to sequence");
- if (seq == NULL) {
- /*
- * PySequence_Check detects whether an old type object is a
- * sequence by the presence of the __getitem__ attribute, and
- * for new type objects that aren't dictionaries by the
- * presence of the __len__ attribute as well. In either case it
- * is possible to have an object that tests as a sequence but
- * doesn't behave as a sequence and consequently, the
- * PySequence_GetItem call can fail. When that happens and the
- * object looks like a dictionary, we truncate the dimensions
- * and set the object creation flag, otherwise we pass the
- * error back up the call chain.
- */
- if (PyErr_ExceptionMatches(PyExc_KeyError)) {
- PyErr_Clear();
- *maxndim = 0;
- *out_is_object = DISCOVERED_OBJECT;
- return 0;
- }
- else {
- return -1;
- }
- }
- n = PySequence_Fast_GET_SIZE(seq);
+ assert(cache->sequence);
+ assert(ndim != 0); /* guaranteed if cache contains a sequence */
- d[0] = n;
-
- /* 1-dimensional sequence */
- if (n == 0 || *maxndim == 1) {
- *maxndim = 1;
- Py_DECREF(seq);
- return 0;
+ if (PyArray_AssignFromCache_Recursive(self, ndim, &cache) < 0) {
+ /* free the remaining cache. */
+ npy_free_coercion_cache(cache);
+ return -1;
}
- else {
- int all_elems_maxndim = *maxndim - 1;
- npy_intp *all_elems_d = d + 1;
- int all_dimensions_match = 1;
-
- /* Get the dimensions of the first item as a baseline */
- PyObject *first = PySequence_Fast_GET_ITEM(seq, 0);
- if (discover_dimensions(
- first, &all_elems_maxndim, all_elems_d, check_it,
- stop_at_string, stop_at_tuple, out_is_object) < 0) {
- Py_DECREF(seq);
- return -1;
- }
-
- /* Compare the dimensions of all the remaining items */
- for (i = 1; i < n; ++i) {
- int j;
- int elem_maxndim = *maxndim - 1;
- npy_intp elem_d[NPY_MAXDIMS];
-
- PyObject *elem = PySequence_Fast_GET_ITEM(seq, i);
- if (discover_dimensions(
- elem, &elem_maxndim, elem_d, check_it,
- stop_at_string, stop_at_tuple, out_is_object) < 0) {
- Py_DECREF(seq);
- return -1;
- }
- /* Find the number of left-dimensions which match, j */
- for (j = 0; j < elem_maxndim && j < all_elems_maxndim; ++j) {
- if (elem_d[j] != all_elems_d[j]) {
- break;
- }
- }
- if (j != elem_maxndim || j != all_elems_maxndim) {
- all_dimensions_match = 0;
- }
- all_elems_maxndim = j;
- }
- *maxndim = all_elems_maxndim + 1;
- if (!all_dimensions_match) {
- /* typically results in an array containing variable-length lists */
- *out_is_object = DISCOVERED_RAGGED;
- }
+ /*
+ * Sanity check, this is the initial call, and when it returns, the
+ * cache has to be fully consumed, otherwise something is wrong.
+ * NOTE: May be nicer to put into a recursion helper.
+ */
+ if (cache != NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Inconsistent object during array creation? "
+ "Content of sequences changed (cache not consumed).");
+ return -1;
}
-
- Py_DECREF(seq);
-
return 0;
}
+
static void
raise_memory_error(int nd, npy_intp const *dims, PyArray_Descr *descr)
{
@@ -1518,7 +1265,7 @@ fail:
* or NULL with an error set. (A new reference to Py_NotImplemented
* is returned.)
*/
-static PyObject *
+NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op,
PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context) {
PyObject* tmp;
@@ -1598,313 +1345,25 @@ _array_from_array_like(PyObject *op,
}
-/*
- * Retrieves the array parameters for viewing/converting an arbitrary
- * PyObject* to a NumPy array. This allows the "innate type and shape"
- * of Python list-of-lists to be discovered without
- * actually converting to an array.
- *
- * In some cases, such as structured arrays and the __array__ interface,
- * a data type needs to be used to make sense of the object. When
- * this is needed, provide a Descr for 'requested_dtype', otherwise
- * provide NULL. This reference is not stolen. Also, if the requested
- * dtype doesn't modify the interpretation of the input, out_dtype will
- * still get the "innate" dtype of the object, not the dtype passed
- * in 'requested_dtype'.
- *
- * If writing to the value in 'op' is desired, set the boolean
- * 'writeable' to 1. This raises an error when 'op' is a scalar, list
- * of lists, or other non-writeable 'op'.
- *
- * Result: When success (0 return value) is returned, either out_arr
- * is filled with a non-NULL PyArrayObject and
- * the rest of the parameters are untouched, or out_arr is
- * filled with NULL, and the rest of the parameters are
- * filled.
- *
- * Typical usage:
- *
- * PyArrayObject *arr = NULL;
- * PyArray_Descr *dtype = NULL;
- * int ndim = 0;
- * npy_intp dims[NPY_MAXDIMS];
- *
- * if (PyArray_GetArrayParamsFromObject(op, NULL, 1, &dtype,
- * &ndim, dims, &arr, NULL) < 0) {
- * return NULL;
- * }
- * if (arr == NULL) {
- * ... validate/change dtype, validate flags, ndim, etc ...
- * // Could make custom strides here too
- * arr = PyArray_NewFromDescr(&PyArray_Type, dtype, ndim,
- * dims, NULL,
- * is_f_order ? NPY_ARRAY_F_CONTIGUOUS : 0,
- * NULL);
- * if (arr == NULL) {
- * return NULL;
- * }
- * if (PyArray_CopyObject(arr, op) < 0) {
- * Py_DECREF(arr);
- * return NULL;
- * }
- * }
- * else {
- * ... in this case the other parameters weren't filled, just
- * validate and possibly copy arr itself ...
- * }
- * ... use arr ...
- */
+/*NUMPY_API*/
NPY_NO_EXPORT int
-PyArray_GetArrayParamsFromObject_int(PyObject *op,
- PyArray_Descr *requested_dtype,
- npy_bool writeable,
- PyArray_Descr **out_dtype,
- int *out_ndim, npy_intp *out_dims,
- PyArrayObject **out_arr)
+PyArray_GetArrayParamsFromObject(PyObject *NPY_UNUSED(op),
+ PyArray_Descr *NPY_UNUSED(requested_dtype),
+ npy_bool NPY_UNUSED(writeable),
+ PyArray_Descr **NPY_UNUSED(out_dtype),
+ int *NPY_UNUSED(out_ndim), npy_intp *NPY_UNUSED(out_dims),
+ PyArrayObject **NPY_UNUSED(out_arr), PyObject *NPY_UNUSED(context))
{
- PyObject *tmp;
-
- /* If op is an array */
- if (PyArray_Check(op)) {
- if (writeable
- && PyArray_FailUnlessWriteable((PyArrayObject *)op, "array") < 0) {
- return -1;
- }
- Py_INCREF(op);
- *out_arr = (PyArrayObject *)op;
- return 0;
- }
-
- /* If op is a NumPy scalar */
- if (PyArray_IsScalar(op, Generic)) {
- if (writeable) {
- PyErr_SetString(PyExc_RuntimeError,
- "cannot write to scalar");
- return -1;
- }
- *out_dtype = PyArray_DescrFromScalar(op);
- if (*out_dtype == NULL) {
- return -1;
- }
- *out_ndim = 0;
- *out_arr = NULL;
- return 0;
- }
-
- /* If op is a Python scalar */
- *out_dtype = _array_find_python_scalar_type(op);
- if (*out_dtype != NULL) {
- if (writeable) {
- PyErr_SetString(PyExc_RuntimeError,
- "cannot write to scalar");
- Py_DECREF(*out_dtype);
- return -1;
- }
- *out_ndim = 0;
- *out_arr = NULL;
- return 0;
- }
-
- /* If op is an array-like */
- tmp = _array_from_array_like(op, requested_dtype, writeable, NULL);
- if (tmp == NULL) {
- return -1;
- }
- else if (tmp != Py_NotImplemented) {
- *out_arr = (PyArrayObject*) tmp;
- return 0;
- }
- else {
- Py_DECREF(Py_NotImplemented);
- }
-
- /* Try to treat op as a list of lists */
- if (!writeable && PySequence_Check(op)) {
- int check_it, stop_at_string, stop_at_tuple;
- int type_num, type;
-
- /*
- * Determine the type, using the requested data type if
- * it will affect how the array is retrieved
- */
- if (requested_dtype != NULL && (
- requested_dtype->type_num == NPY_STRING ||
- requested_dtype->type_num == NPY_UNICODE ||
- (requested_dtype->type_num == NPY_VOID &&
- (requested_dtype->names || requested_dtype->subarray)) ||
- requested_dtype->type == NPY_CHARLTR ||
- requested_dtype->type_num == NPY_OBJECT)) {
- Py_INCREF(requested_dtype);
- *out_dtype = requested_dtype;
- }
- else {
- *out_dtype = NULL;
- if (PyArray_DTypeFromObject(op, NPY_MAXDIMS, out_dtype) < 0) {
- if (PyErr_ExceptionMatches(PyExc_MemoryError)) {
- return -1;
- }
- /* Return NPY_OBJECT for most exceptions */
- else {
- PyErr_Clear();
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (*out_dtype == NULL) {
- return -1;
- }
- }
- }
- if (*out_dtype == NULL) {
- *out_dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
- if (*out_dtype == NULL) {
- return -1;
- }
- }
- }
-
- type_num = (*out_dtype)->type_num;
- type = (*out_dtype)->type;
-
- check_it = (type != NPY_CHARLTR);
- stop_at_string = (type_num != NPY_STRING) ||
- (type == NPY_STRINGLTR);
- stop_at_tuple = (type_num == NPY_VOID &&
- ((*out_dtype)->names || (*out_dtype)->subarray));
-
- *out_ndim = NPY_MAXDIMS;
- discovered_t is_object = DISCOVERED_OK;
- if (discover_dimensions(
- op, out_ndim, out_dims, check_it,
- stop_at_string, stop_at_tuple, &is_object) < 0) {
- Py_DECREF(*out_dtype);
- if (PyErr_Occurred()) {
- return -1;
- }
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (*out_dtype == NULL) {
- return -1;
- }
- *out_ndim = 0;
- *out_arr = NULL;
- return 0;
- }
- /* If object arrays are forced */
- if (is_object != DISCOVERED_OK) {
- static PyObject *visibleDeprecationWarning = NULL;
- npy_cache_import(
- "numpy", "VisibleDeprecationWarning",
- &visibleDeprecationWarning);
- if (visibleDeprecationWarning == NULL) {
- return -1;
- }
- if (is_object == DISCOVERED_RAGGED && requested_dtype == NULL) {
- /* NumPy 1.19, 2019-11-01 */
- if (PyErr_WarnEx(visibleDeprecationWarning, "Creating an "
- "ndarray from ragged nested sequences (which is a "
- "list-or-tuple of lists-or-tuples-or ndarrays with "
- "different lengths or shapes) is deprecated. If you "
- "meant to do this, you must specify 'dtype=object' "
- "when creating the ndarray", 1) < 0)
- {
- return -1;
- }
- }
- /* either DISCOVERED_OBJECT or there is a requested_dtype */
- Py_DECREF(*out_dtype);
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (*out_dtype == NULL) {
- return -1;
- }
- }
-
- if ((*out_dtype)->type == NPY_CHARLTR && (*out_ndim) > 0 &&
- out_dims[(*out_ndim) - 1] == 1) {
- (*out_ndim) -= 1;
- }
-
- /* If the type is flexible, determine its size */
- if (PyDataType_ISUNSIZED(*out_dtype) &&
- PyTypeNum_ISEXTENDED((*out_dtype)->type_num)) {
- int itemsize = 0;
- int string_type = 0;
- if ((*out_dtype)->type_num == NPY_STRING ||
- (*out_dtype)->type_num == NPY_UNICODE) {
- string_type = (*out_dtype)->type_num;
- }
- if (discover_itemsize(op, *out_ndim, &itemsize, string_type) < 0) {
- Py_DECREF(*out_dtype);
- if (PyErr_Occurred() &&
- PyErr_GivenExceptionMatches(PyErr_Occurred(),
- PyExc_MemoryError)) {
- return -1;
- }
- /* Say it's an OBJECT scalar if there's an error */
- PyErr_Clear();
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- *out_ndim = 0;
- *out_arr = NULL;
- return 0;
- }
- if ((*out_dtype)->type_num == NPY_UNICODE) {
- itemsize *= 4;
- }
-
- if (itemsize != (*out_dtype)->elsize) {
- PyArray_DESCR_REPLACE(*out_dtype);
- (*out_dtype)->elsize = itemsize;
- }
- }
-
- *out_arr = NULL;
- return 0;
- }
-
- /* Anything can be viewed as an object, unless it needs to be writeable */
- if (!writeable) {
- *out_dtype = PyArray_DescrFromType(NPY_OBJECT);
- if (*out_dtype == NULL) {
- return -1;
- }
- *out_ndim = 0;
- *out_arr = NULL;
- return 0;
- }
-
+ /* Deprecated in NumPy 1.19, removed in NumPy 1.20. */
PyErr_SetString(PyExc_RuntimeError,
- "object cannot be viewed as a writeable numpy array");
+ "PyArray_GetArrayParamsFromObject() C-API function is removed "
+ "`PyArray_FromAny()` should be used at this time. New C-API "
+ "may be exposed in the future (please do request this if it "
+ "would help you).");
return -1;
}
-/*NUMPY_API*/
-NPY_NO_EXPORT int
-PyArray_GetArrayParamsFromObject(PyObject *op,
- PyArray_Descr *requested_dtype,
- npy_bool writeable,
- PyArray_Descr **out_dtype,
- int *out_ndim, npy_intp *out_dims,
- PyArrayObject **out_arr, PyObject *context)
-{
- /* NumPy 1.19, 2020-01-24 */
- if (DEPRECATE(
- "PyArray_GetArrayParamsFromObject() C-API function is deprecated "
- "and expected to be removed rapidly. If you are using it (i.e. see "
- "this warning/error), please notify the NumPy developers. "
- "As of now it is expected that any use case is served similarly "
- "well by `PyArray_FromAny()` and this function is unused outside "
- "of NumPy itself.") < 0) {
- return -1;
- }
-
- if (context != NULL) {
- PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL");
- return -1;
- }
-
- return PyArray_GetArrayParamsFromObject_int(op,
- requested_dtype, writeable, out_dtype, out_ndim, out_dims,
- out_arr);
-}
-
-
/*NUMPY_API
* Does not check for NPY_ARRAY_ENSURECOPY and NPY_ARRAY_NOTSWAPPED in flags
* Steals a reference to newtype --- which can be NULL
@@ -1919,6 +1378,7 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
*/
PyArrayObject *arr = NULL, *ret;
PyArray_Descr *dtype = NULL;
+ coercion_cache_obj *cache = NULL;
int ndim = 0;
npy_intp dims[NPY_MAXDIMS];
@@ -1927,124 +1387,104 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
return NULL;
}
- /* Get either the array or its parameters if it isn't an array */
- if (PyArray_GetArrayParamsFromObject_int(op,
- newtype, 0, &dtype, &ndim, dims, &arr) < 0) {
+ PyArray_Descr *fixed_descriptor;
+ PyArray_DTypeMeta *fixed_DType;
+ if (PyArray_ExtractDTypeAndDescriptor((PyObject *)newtype,
+ &fixed_descriptor, &fixed_DType) < 0) {
Py_XDECREF(newtype);
return NULL;
}
+ Py_XDECREF(newtype);
- /* If the requested dtype is flexible, adapt it */
- if (newtype != NULL) {
- newtype = PyArray_AdaptFlexibleDType((arr == NULL) ? op : (PyObject *)arr,
- (dtype == NULL) ? PyArray_DESCR(arr) : dtype,
- newtype);
- if (newtype == NULL) {
- return NULL;
- }
+ ndim = PyArray_DiscoverDTypeAndShape(op,
+ NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype);
+
+ Py_XDECREF(fixed_descriptor);
+ Py_XDECREF(fixed_DType);
+ if (ndim < 0) {
+ return NULL;
+ }
+ if (dtype == NULL) {
+ dtype = PyArray_DescrFromType(NPY_DEFAULT_TYPE);
}
- /* If we got dimensions and dtype instead of an array */
- if (arr == NULL) {
- if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
- (flags & NPY_ARRAY_UPDATEIFCOPY)) {
- Py_DECREF(dtype);
- Py_XDECREF(newtype);
- PyErr_SetString(PyExc_TypeError,
- "WRITEBACKIFCOPY used for non-array input.");
- return NULL;
- }
- else if (min_depth != 0 && ndim < min_depth) {
- Py_DECREF(dtype);
- Py_XDECREF(newtype);
- PyErr_SetString(PyExc_ValueError,
- "object of too small depth for desired array");
- ret = NULL;
- }
- else if (max_depth != 0 && ndim > max_depth) {
- Py_DECREF(dtype);
- Py_XDECREF(newtype);
- PyErr_SetString(PyExc_ValueError,
- "object too deep for desired array");
- ret = NULL;
- }
- else if (ndim == 0 && PyArray_IsScalar(op, Generic)) {
- ret = (PyArrayObject *)PyArray_FromScalar(op, newtype);
- Py_DECREF(dtype);
- }
- else {
- if (newtype == NULL) {
- newtype = dtype;
- }
- else {
- /*
- * TODO: would be nice to do this too, but it's
- * a behavior change. It's also a bit tricky
- * for downcasting to small integer and float
- * types, and might be better to modify
- * PyArray_AssignFromSequence and descr->f->setitem
- * to have a 'casting' parameter and
- * to check each value with scalar rules like
- * in PyArray_MinScalarType.
- */
- /*
- if (!(flags&NPY_ARRAY_FORCECAST) && ndim > 0 &&
- !PyArray_CanCastTo(dtype, newtype)) {
- Py_DECREF(dtype);
- Py_XDECREF(newtype);
- PyErr_SetString(PyExc_TypeError,
- "object cannot be safely cast to array "
- "of required type");
- return NULL;
- }
- */
- Py_DECREF(dtype);
- }
+ if (min_depth != 0 && ndim < min_depth) {
+ PyErr_SetString(PyExc_ValueError,
+ "object of too small depth for desired array");
+ Py_DECREF(dtype);
+ npy_free_coercion_cache(cache);
+ return NULL;
+ }
+ if (max_depth != 0 && ndim > max_depth) {
+ PyErr_SetString(PyExc_ValueError,
+ "object too deep for desired array");
+ Py_DECREF(dtype);
+ npy_free_coercion_cache(cache);
+ return NULL;
+ }
- /* Create an array and copy the data */
- ret = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, newtype,
- ndim, dims,
- NULL, NULL,
- flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
- if (ret == NULL) {
- return NULL;
- }
+ /* Got the correct parameters, but the cache may already hold the result */
+ if (cache != NULL && !(cache->sequence)) {
+ /*
+ * There is only a single array-like and it was converted, it
+ * may still have the incorrect type, but that is handled below.
+ */
+ assert(cache->converted_obj == op);
+ arr = (PyArrayObject *)(cache->arr_or_sequence);
+ /* we may need to cast or assert flags (e.g. copy) */
+ PyObject *res = PyArray_FromArray(arr, dtype, flags);
+ npy_unlink_coercion_cache(cache);
+ return res;
+ }
+ else if (cache == NULL && PyArray_IsScalar(op, Void) &&
+ !(((PyVoidScalarObject *)op)->flags & NPY_ARRAY_OWNDATA) &&
+ PyArray_EquivTypes(((PyVoidScalarObject *)op)->descr, dtype)) {
+ /*
+ * Special case, we return a *view* into void scalars, mainly to
+ * allow "reversed" assignment:
+ * arr[indx]["field"] = val # instead of arr["field"][indx] = val
+ */
+ assert(ndim == 0);
- if (ndim > 0) {
- if (PyArray_AssignFromSequence(ret, op) < 0) {
- Py_DECREF(ret);
- ret = NULL;
- }
- }
- else {
- if (PyArray_SETITEM(ret, PyArray_DATA(ret), op) < 0) {
- Py_DECREF(ret);
- ret = NULL;
- }
- }
- }
+ return PyArray_NewFromDescrAndBase(
+ &PyArray_Type, dtype,
+ 0, NULL, NULL,
+ ((PyVoidScalarObject *)op)->obval,
+ ((PyVoidScalarObject *)op)->flags,
+ NULL, op);
}
- else {
- if (min_depth != 0 && PyArray_NDIM(arr) < min_depth) {
- PyErr_SetString(PyExc_ValueError,
- "object of too small depth for desired array");
- Py_DECREF(arr);
- Py_XDECREF(newtype);
- ret = NULL;
- }
- else if (max_depth != 0 && PyArray_NDIM(arr) > max_depth) {
- PyErr_SetString(PyExc_ValueError,
- "object too deep for desired array");
- Py_DECREF(arr);
- Py_XDECREF(newtype);
- ret = NULL;
- }
- else {
- ret = (PyArrayObject *)PyArray_FromArray(arr, newtype, flags);
- Py_DECREF(arr);
- }
+
+ /* There was no array (or array-like) passed in directly. */
+ if ((flags & NPY_ARRAY_WRITEBACKIFCOPY) ||
+ (flags & NPY_ARRAY_UPDATEIFCOPY)) {
+ PyErr_SetString(PyExc_TypeError,
+ "WRITEBACKIFCOPY used for non-array input.");
+ Py_DECREF(dtype);
+ return NULL;
}
+ /* Create a new array and copy the data */
+ ret = (PyArrayObject *)PyArray_NewFromDescr(
+ &PyArray_Type, dtype, ndim, dims, NULL, NULL,
+ flags&NPY_ARRAY_F_CONTIGUOUS, NULL);
+ if (ret == NULL) {
+ return NULL;
+ }
+ if (cache == NULL) {
+ /* This is a single item. Set it directly. */
+ assert(ndim == 0);
+ if (PyArray_Pack(PyArray_DESCR(ret), PyArray_DATA(ret), op) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
+ return (PyObject *)ret;
+ }
+ assert(ndim != 0);
+ assert(op == cache->converted_obj);
+ if (PyArray_AssignFromCache(ret, cache) < 0) {
+ Py_DECREF(ret);
+ return NULL;
+ }
return (PyObject *)ret;
}
diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h
index 9e63cd7d2..8db1412c7 100644
--- a/numpy/core/src/multiarray/ctors.h
+++ b/numpy/core/src/multiarray/ctors.h
@@ -30,13 +30,9 @@ PyArray_New(
PyTypeObject *, int nd, npy_intp const *,
int, npy_intp const*, void *, int, int, PyObject *);
-NPY_NO_EXPORT int
-PyArray_GetArrayParamsFromObject_int(PyObject *op,
- PyArray_Descr *requested_dtype,
- npy_bool writeable,
- PyArray_Descr **out_dtype,
- int *out_ndim, npy_intp *out_dims,
- PyArrayObject **out_arr);
+NPY_NO_EXPORT PyObject *
+_array_from_array_like(PyObject *op,
+ PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context);
NPY_NO_EXPORT PyObject *
PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
@@ -98,9 +94,6 @@ copy_and_swap(void *dst, void *src, int itemsize, npy_intp numitems,
NPY_NO_EXPORT void
byte_swap_vector(void *p, npy_intp n, int size);
-NPY_NO_EXPORT int
-PyArray_AssignFromSequence(PyArrayObject *self, PyObject *v);
-
/*
* Calls arr_of_subclass.__array_wrap__(towrap), in order to make 'towrap'
* have the same ndarray subclass as 'arr_of_subclass'.
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index cfe801898..8f3948c23 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -3429,7 +3429,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step,
*
* Returns 0 on success, -1 on failure.
*/
-static int
+NPY_NO_EXPORT int
find_string_array_datetime64_type(PyArrayObject *arr,
PyArray_DatetimeMetaData *meta)
{
@@ -3552,44 +3552,9 @@ fail:
* Returns 0 on success, -1 on failure.
*/
static int
-recursive_find_object_datetime64_type(PyObject *obj,
- PyArray_DatetimeMetaData *meta)
+find_object_datetime64_meta(PyObject *obj, PyArray_DatetimeMetaData *meta)
{
- /* Array -> use its metadata */
- if (PyArray_Check(obj)) {
- PyArrayObject *arr = (PyArrayObject *)obj;
- PyArray_Descr *arr_dtype = PyArray_DESCR(arr);
-
- if (arr_dtype->type_num == NPY_STRING ||
- arr_dtype->type_num == NPY_UNICODE) {
- return find_string_array_datetime64_type(arr, meta);
- }
- /* If the array has metadata, use it */
- else if (arr_dtype->type_num == NPY_DATETIME ||
- arr_dtype->type_num == NPY_TIMEDELTA) {
- PyArray_DatetimeMetaData *tmp_meta;
-
- /* Get the metadata from the type */
- tmp_meta = get_datetime_metadata_from_dtype(arr_dtype);
- if (tmp_meta == NULL) {
- return -1;
- }
-
- /* Combine it with 'meta' */
- if (compute_datetime_metadata_greatest_common_divisor(meta,
- tmp_meta, meta, 0, 0) < 0) {
- return -1;
- }
-
- return 0;
- }
- /* If it's not an object array, stop looking */
- else if (arr_dtype->type_num != NPY_OBJECT) {
- return 0;
- }
- }
- /* Datetime scalar -> use its metadata */
- else if (PyArray_IsScalar(obj, Datetime)) {
+ if (PyArray_IsScalar(obj, Datetime)) {
PyDatetimeScalarObject *dts = (PyDatetimeScalarObject *)obj;
/* Combine it with 'meta' */
@@ -3661,34 +3626,6 @@ recursive_find_object_datetime64_type(PyObject *obj,
return 0;
}
-
- /* Now check if what we have left is a sequence for recursion */
- if (PySequence_Check(obj)) {
- Py_ssize_t i, len = PySequence_Size(obj);
- if (len < 0 && PyErr_Occurred()) {
- return -1;
- }
-
- for (i = 0; i < len; ++i) {
- int ret;
- PyObject *f = PySequence_GetItem(obj, i);
- if (f == NULL) {
- return -1;
- }
- if (Npy_EnterRecursiveCall(" in recursive_find_object_datetime64_type") != 0) {
- Py_DECREF(f);
- return -1;
- }
- ret = recursive_find_object_datetime64_type(f, meta);
- Py_LeaveRecursiveCall();
- Py_DECREF(f);
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
- }
/* Otherwise ignore it */
else {
return 0;
@@ -3722,70 +3659,10 @@ delta_checker(PyArray_DatetimeMetaData *meta)
* Returns 0 on success, -1 on failure.
*/
static int
-recursive_find_object_timedelta64_type(PyObject *obj,
- PyArray_DatetimeMetaData *meta)
+find_object_timedelta64_meta(PyObject *obj, PyArray_DatetimeMetaData *meta)
{
- /* Array -> use its metadata */
- if (PyArray_Check(obj)) {
- PyArrayObject *arr = (PyArrayObject *)obj;
- PyArray_Descr *arr_dtype = PyArray_DESCR(arr);
-
- /* If the array has metadata, use it */
- if (arr_dtype->type_num == NPY_DATETIME ||
- arr_dtype->type_num == NPY_TIMEDELTA) {
- PyArray_DatetimeMetaData *tmp_meta;
-
- /* Get the metadata from the type */
- tmp_meta = get_datetime_metadata_from_dtype(arr_dtype);
- if (tmp_meta == NULL) {
- return -1;
- }
-
- /* Combine it with 'meta' */
- if (compute_datetime_metadata_greatest_common_divisor(meta,
- tmp_meta, meta, 0, 0) < 0) {
- return -1;
- }
-
- return 0;
- }
- /* If it's not an object array, stop looking */
- else if (arr_dtype->type_num != NPY_OBJECT) {
- return 0;
- }
- else {
- if (PyArray_NDIM(arr) == 0) {
- /*
- * special handling of 0 dimensional NumPy object
- * arrays, which may be indexed to retrieve their
- * single object using [()], but not by using
- * __getitem__(integer) approaches
- */
- PyObject *item, *args;
-
- args = PyTuple_New(0);
- if (args == NULL) {
- return 0;
- }
- item = PyObject_GetItem(obj, args);
- Py_DECREF(args);
- if (item == NULL) {
- return 0;
- }
- /*
- * NOTE: may need other type checks here in the future
- * for expanded 0 D datetime array conversions?
- */
- if (PyDelta_Check(item)) {
- Py_DECREF(item);
- return delta_checker(meta);
- }
- Py_DECREF(item);
- }
- }
- }
/* Datetime scalar -> use its metadata */
- else if (PyArray_IsScalar(obj, Timedelta)) {
+ if (PyArray_IsScalar(obj, Timedelta)) {
PyTimedeltaScalarObject *dts = (PyTimedeltaScalarObject *)obj;
/* Combine it with 'meta' */
@@ -3805,34 +3682,6 @@ recursive_find_object_timedelta64_type(PyObject *obj,
else if (PyDelta_Check(obj)) {
return delta_checker(meta);
}
-
- /* Now check if what we have left is a sequence for recursion */
- if (PySequence_Check(obj)) {
- Py_ssize_t i, len = PySequence_Size(obj);
- if (len < 0 && PyErr_Occurred()) {
- return -1;
- }
-
- for (i = 0; i < len; ++i) {
- int ret;
- PyObject *f = PySequence_GetItem(obj, i);
- if (f == NULL) {
- return -1;
- }
- if (Npy_EnterRecursiveCall(" in recursive_find_object_timedelta64_type") != 0) {
- Py_DECREF(f);
- return -1;
- }
- ret = recursive_find_object_timedelta64_type(f, meta);
- Py_LeaveRecursiveCall();
- Py_DECREF(f);
- if (ret < 0) {
- return ret;
- }
- }
-
- return 0;
- }
/* Otherwise ignore it */
else {
return 0;
@@ -3853,7 +3702,7 @@ find_object_datetime_type(PyObject *obj, int type_num)
meta.num = 1;
if (type_num == NPY_DATETIME) {
- if (recursive_find_object_datetime64_type(obj, &meta) < 0) {
+ if (find_object_datetime64_meta(obj, &meta) < 0) {
return NULL;
}
else {
@@ -3861,7 +3710,7 @@ find_object_datetime_type(PyObject *obj, int type_num)
}
}
else if (type_num == NPY_TIMEDELTA) {
- if (recursive_find_object_timedelta64_type(obj, &meta) < 0) {
+ if (find_object_timedelta64_meta(obj, &meta) < 0) {
return NULL;
}
else {
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 4e37b9628..a7c33d88f 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1801,9 +1801,10 @@ static void
arraydescr_dealloc(PyArray_Descr *self)
{
if (self->fields == Py_None) {
- fprintf(stderr, "*** Reference count error detected: \n" \
- "an attempt was made to deallocate %d (%c) ***\n",
+ fprintf(stderr, "*** Reference count error detected: "
+ "an attempt was made to deallocate the dtype %d (%c) ***\n",
self->type_num, self->type);
+ assert(0);
Py_INCREF(self);
Py_INCREF(self);
return;
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index a26426d41..3a58b5849 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -1099,7 +1099,7 @@ get_datetime_to_unicode_transfer_function(int aligned,
/* Get an ASCII string data type, adapted to match the UNICODE one */
str_dtype = PyArray_DescrFromType(NPY_STRING);
- str_dtype = PyArray_AdaptFlexibleDType(NULL, dst_dtype, str_dtype);
+ str_dtype = PyArray_AdaptFlexibleDType(dst_dtype, str_dtype);
if (str_dtype == NULL) {
return NPY_FAIL;
}
@@ -1222,7 +1222,7 @@ get_unicode_to_datetime_transfer_function(int aligned,
/* Get an ASCII string data type, adapted to match the UNICODE one */
str_dtype = PyArray_DescrFromType(NPY_STRING);
- str_dtype = PyArray_AdaptFlexibleDType(NULL, src_dtype, str_dtype);
+ str_dtype = PyArray_AdaptFlexibleDType(src_dtype, str_dtype);
if (str_dtype == NULL) {
return NPY_FAIL;
}
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index 9982cd676..3026e68e9 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -8,9 +8,13 @@
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
#include <numpy/ndarraytypes.h>
+#include <numpy/arrayscalars.h>
#include "npy_pycompat.h"
+#include "common.h"
#include "dtypemeta.h"
+#include "_datetime.h"
+#include "array_coercion.h"
static void
@@ -104,6 +108,179 @@ legacy_dtype_default_new(PyArray_DTypeMeta *self,
return (PyObject *)self->singleton;
}
+
+static PyArray_Descr *
+nonparametric_discover_descr_from_pyobject(
+ PyArray_DTypeMeta *cls, PyObject *obj)
+{
+ /* If the object is of the correct scalar type return our singleton */
+ assert(!cls->parametric);
+ Py_INCREF(cls->singleton);
+ return cls->singleton;
+}
+
+
+static PyArray_Descr *
+string_discover_descr_from_pyobject(
+ PyArray_DTypeMeta *cls, PyObject *obj)
+{
+ npy_intp itemsize = -1;
+ if (PyBytes_Check(obj)) {
+ itemsize = PyBytes_Size(obj);
+ }
+ else if (PyUnicode_Check(obj)) {
+ itemsize = PyUnicode_GetLength(obj);
+ }
+ if (itemsize != -1) {
+ if (cls->type_num == NPY_UNICODE) {
+ itemsize *= 4;
+ }
+ if (itemsize > NPY_MAX_INT) {
+ PyErr_SetString(PyExc_TypeError,
+ "string to large to store inside array.");
+ }
+ PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num);
+ res->elsize = (int)itemsize;
+ return res;
+ }
+ return PyArray_DTypeFromObjectStringDiscovery(obj, NULL, cls->type_num);
+}
+
+
+static PyArray_Descr *
+void_discover_descr_from_pyobject(
+ PyArray_DTypeMeta *NPY_UNUSED(cls), PyObject *obj)
+{
+ if (PyArray_IsScalar(obj, Void)) {
+ PyVoidScalarObject *void_obj = (PyVoidScalarObject *)obj;
+ Py_INCREF(void_obj->descr);
+ return void_obj->descr;
+ }
+ if (PyBytes_Check(obj)) {
+ PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_VOID);
+ Py_ssize_t itemsize = (int)PyBytes_Size(obj);
+ if (itemsize > NPY_MAX_INT) {
+ PyErr_SetString(PyExc_TypeError,
+ "byte-like to large to store inside array.");
+ }
+ descr->elsize = itemsize;
+ return descr;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "A bytes-like object is required, not '%s'", Py_TYPE(obj)->tp_name);
+ return NULL;
+}
+
+
+static PyArray_Descr *
+discover_datetime_and_timedelta_from_pyobject(
+ PyArray_DTypeMeta *cls, PyObject *obj) {
+ if (PyArray_IsScalar(obj, Datetime) ||
+ PyArray_IsScalar(obj, Timedelta)) {
+ PyArray_DatetimeMetaData *meta;
+ PyArray_Descr *descr = PyArray_DescrFromScalar(obj);
+ meta = get_datetime_metadata_from_dtype(descr);
+ if (meta == NULL) {
+ return NULL;
+ }
+ PyArray_Descr *new_descr = create_datetime_dtype(cls->type_num, meta);
+ Py_DECREF(descr);
+ return new_descr;
+ }
+ else {
+ return find_object_datetime_type(obj, cls->type_num);
+ }
+}
+
+
+static PyArray_Descr *
+flexible_default_descr(PyArray_DTypeMeta *cls)
+{
+ PyArray_Descr *res = PyArray_DescrNewFromType(cls->type_num);
+ if (res == NULL) {
+ return NULL;
+ }
+ res->elsize = 1;
+ if (cls->type_num == NPY_UNICODE) {
+ res->elsize *= 4;
+ }
+ return res;
+}
+
+
+static int
+python_builtins_are_known_scalar_types(
+ PyArray_DTypeMeta *NPY_UNUSED(cls), PyTypeObject *pytype)
+{
+ /*
+ * Always accept the common Python types, this ensures that we do not
+ * convert pyfloat->float64->integers. Subclasses are hopefully rejected
+ * as being discovered.
+ * This is necessary only for python scalar classes which we discover
+ * as valid DTypes.
+ */
+ if (pytype == &PyFloat_Type) {
+ return 1;
+ }
+ if (pytype == &PyLong_Type) {
+ return 1;
+ }
+ if (pytype == &PyBool_Type) {
+ return 1;
+ }
+ if (pytype == &PyComplex_Type) {
+ return 1;
+ }
+ if (pytype == &PyUnicode_Type) {
+ return 1;
+ }
+ if (pytype == &PyBytes_Type) {
+ return 1;
+ }
+ return 0;
+}
+
+
+static int
+datetime_known_scalar_types(
+ PyArray_DTypeMeta *cls, PyTypeObject *pytype)
+{
+ if (python_builtins_are_known_scalar_types(cls, pytype)) {
+ return 1;
+ }
+ /*
+ * To be able to identify the descriptor from e.g. any string, datetime
+ * must take charge. Otherwise we would attempt casting which does not
+ * truly support this. Only object arrays are special cased in this way.
+ */
+ return (PyType_IsSubtype(pytype, &PyString_Type) ||
+ PyType_IsSubtype(pytype, &PyUnicode_Type));
+}
+
+
+static int
+string_known_scalar_types(
+ PyArray_DTypeMeta *cls, PyTypeObject *pytype) {
+ if (python_builtins_are_known_scalar_types(cls, pytype)) {
+ return 1;
+ }
+ if (PyType_IsSubtype(pytype, &PyDatetimeArrType_Type)) {
+ /*
+ * TODO: This should likely be deprecated or otherwise resolved.
+ * Deprecation has to occur in `String->setitem` unfortunately.
+ *
+ * Datetime currently do not cast to shorter strings, but string
+ * coercion for arbitrary values uses `str(obj)[:len]` so it works.
+ * This means `np.array(np.datetime64("2020-01-01"), "U9")`
+ * and `np.array(np.datetime64("2020-01-01")).astype("U9")` behave
+ * differently.
+ */
+ return 1;
+ }
+ return 0;
+}
+
+
/**
* This function takes a PyArray_Descr and replaces its base class with
* a newly created dtype subclass (DTypeMeta instances).
@@ -221,12 +398,41 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
dtype_class->f = descr->f;
dtype_class->kind = descr->kind;
+ /* Strings and voids have (strange) logic around scalars. */
+ dtype_class->is_known_scalar_type = python_builtins_are_known_scalar_types;
+
if (PyTypeNum_ISDATETIME(descr->type_num)) {
/* Datetimes are flexible, but were not considered previously */
dtype_class->parametric = NPY_TRUE;
+ dtype_class->discover_descr_from_pyobject = (
+ discover_datetime_and_timedelta_from_pyobject);
+ if (descr->type_num == NPY_DATETIME) {
+ dtype_class->is_known_scalar_type = datetime_known_scalar_types;
+ }
}
else if (PyTypeNum_ISFLEXIBLE(descr->type_num)) {
dtype_class->parametric = NPY_TRUE;
+ dtype_class->default_descr = flexible_default_descr;
+ if (descr->type_num == NPY_VOID) {
+ dtype_class->discover_descr_from_pyobject = (
+ void_discover_descr_from_pyobject);
+ }
+ else {
+ dtype_class->is_known_scalar_type = string_known_scalar_types;
+ dtype_class->discover_descr_from_pyobject = (
+ string_discover_descr_from_pyobject);
+ }
+ }
+ else {
+ /* nonparametric case */
+ dtype_class->discover_descr_from_pyobject = (
+ nonparametric_discover_descr_from_pyobject);
+ }
+
+ if (_PyArray_MapPyTypeToDType(dtype_class, descr->typeobj,
+ PyTypeNum_ISUSERDEF(dtype_class->type_num)) < 0) {
+ Py_DECREF(dtype_class);
+ return -1;
}
/* Finally, replace the current class of the descr */
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index 97152d1ad..e0909a7eb 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -1,6 +1,8 @@
#ifndef _NPY_DTYPEMETA_H
#define _NPY_DTYPEMETA_H
+#define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
+
NPY_NO_EXPORT int
dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem);
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 45c019f49..8052e24e4 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -26,7 +26,7 @@
#include "npy_binsearch.h"
#include "alloc.h"
#include "arraytypes.h"
-
+#include "array_coercion.h"
static NPY_GCC_OPT_3 NPY_INLINE int
@@ -2629,5 +2629,5 @@ PyArray_MultiIndexSetItem(PyArrayObject *self, const npy_intp *multi_index,
data += ind * strides[idim];
}
- return PyArray_SETITEM(self, data, obj);
+ return PyArray_Pack(PyArray_DESCR(self), data, obj);
}
diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index c71b7b770..ac5b90400 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -15,6 +15,7 @@
#include "iterators.h"
#include "ctors.h"
#include "common.h"
+#include "array_coercion.h"
#define NEWAXIS_INDEX -1
#define ELLIPSIS_INDEX -2
@@ -824,7 +825,7 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val)
if (PyBool_Check(ind)) {
retval = 0;
if (PyObject_IsTrue(ind)) {
- retval = PyArray_SETITEM(self->ao, self->dataptr, val);
+ retval = PyArray_Pack(PyArray_DESCR(self->ao), self->dataptr, val);
}
goto finish;
}
@@ -841,7 +842,7 @@ iter_ass_subscript(PyArrayIterObject *self, PyObject *ind, PyObject *val)
goto finish;
}
PyArray_ITER_GOTO1D(self, start);
- retval = type->f->setitem(val, self->dataptr, self->ao);
+ retval = PyArray_Pack(PyArray_DESCR(self->ao), self->dataptr, val);
PyArray_ITER_RESET(self);
if (retval < 0) {
PyErr_SetString(PyExc_ValueError,
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index f73cb48d9..c27e0c391 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -22,6 +22,7 @@
#include "item_selection.h"
#include "mem_overlap.h"
#include "array_assign.h"
+#include "array_coercion.h"
#define HAS_INTEGER 1
@@ -1754,7 +1755,7 @@ array_assign_item(PyArrayObject *self, Py_ssize_t i, PyObject *op)
if (get_item_pointer(self, &item, indices, 1) < 0) {
return -1;
}
- if (PyArray_SETITEM(self, item, op) < 0) {
+ if (PyArray_Pack(PyArray_DESCR(self), item, op) < 0) {
return -1;
}
}
@@ -1832,7 +1833,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
if (get_item_pointer(self, &item, indices, index_num) < 0) {
return -1;
}
- if (PyArray_SETITEM(self, item, op) < 0) {
+ if (PyArray_Pack(PyArray_DESCR(self), item, op) < 0) {
return -1;
}
/* integers do not store objects in indices */
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index d81650ecd..a2db8042f 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -14,6 +14,7 @@
#include "npy_pycompat.h"
#include "npy_import.h"
#include "ufunc_override.h"
+#include "array_coercion.h"
#include "common.h"
#include "templ_common.h" /* for npy_mul_with_overflow_intp */
#include "ctors.h"
@@ -809,6 +810,12 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
return NULL;
}
+ /* If it is not a concrete dtype instance find the best one for the array */
+ Py_SETREF(dtype, PyArray_AdaptDescriptorToArray(self, (PyObject *)dtype));
+ if (dtype == NULL) {
+ return NULL;
+ }
+
/*
* If the memory layout matches and, data types are equivalent,
* and it's not a subtype if subok is False, then we
@@ -831,13 +838,6 @@ array_astype(PyArrayObject *self, PyObject *args, PyObject *kwds)
else if (PyArray_CanCastArrayTo(self, dtype, casting)) {
PyArrayObject *ret;
- /* If the requested dtype is flexible, adapt it */
- dtype = PyArray_AdaptFlexibleDType((PyObject *)self,
- PyArray_DESCR(self), dtype);
- if (dtype == NULL) {
- return NULL;
- }
-
/* This steals the reference to dtype, so no DECREF of dtype */
ret = (PyArrayObject *)PyArray_NewLikeArray(
self, order, dtype, subok);
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 4190c53bd..9a34685f4 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -35,6 +35,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
/* Internal APIs */
#include "alloc.h"
+#include "abstractdtypes.h"
+#include "array_coercion.h"
#include "arrayfunction_override.h"
#include "arraytypes.h"
#include "arrayobject.h"
@@ -823,6 +825,9 @@ PyArray_InnerProduct(PyObject *op1, PyObject *op2)
PyObject* ret = NULL;
typenum = PyArray_ObjectType(op1, 0);
+ if (typenum == NPY_NOTYPE && PyErr_Occurred()) {
+ return NULL;
+ }
typenum = PyArray_ObjectType(op2, typenum);
typec = PyArray_DescrFromType(typenum);
if (typec == NULL) {
@@ -912,6 +917,9 @@ PyArray_MatrixProduct2(PyObject *op1, PyObject *op2, PyArrayObject* out)
NPY_BEGIN_THREADS_DEF;
typenum = PyArray_ObjectType(op1, 0);
+ if (typenum == NPY_NOTYPE && PyErr_Occurred()) {
+ return NULL;
+ }
typenum = PyArray_ObjectType(op2, typenum);
typec = PyArray_DescrFromType(typenum);
if (typec == NULL) {
@@ -3975,6 +3983,7 @@ normalize_axis_index(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
return PyInt_FromLong(axis);
}
+
static struct PyMethodDef array_module_methods[] = {
{"_get_implementing_args",
(PyCFunction)array__get_implementing_args,
@@ -4151,6 +4160,8 @@ static struct PyMethodDef array_module_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"set_legacy_print_mode", (PyCFunction)set_legacy_print_mode,
METH_VARARGS, NULL},
+ {"_discover_array_parameters", (PyCFunction)_discover_array_parameters,
+ METH_VARARGS | METH_KEYWORDS, NULL},
/* from umath */
{"frompyfunc",
(PyCFunction) ufunc_frompyfunc,
@@ -4620,6 +4631,9 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
if (set_typeinfo(d) != 0) {
goto err;
}
+ if (initialize_and_map_pytypes_to_dtypes() < 0) {
+ goto err;
+ }
if (initumath(m) != 0) {
goto err;
}
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index f2dbc9f03..7da17eafe 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -16,6 +16,7 @@
#include "nditer_impl.h"
#include "arrayobject.h"
+#include "array_coercion.h"
#include "templ_common.h"
#include "array_assign.h"
@@ -1101,17 +1102,11 @@ npyiter_prepare_one_operand(PyArrayObject **op,
*/
if (op_request_dtype != NULL) {
/* We just have a borrowed reference to op_request_dtype */
- Py_INCREF(op_request_dtype);
- /* If the requested dtype is flexible, adapt it */
- op_request_dtype = PyArray_AdaptFlexibleDType((PyObject *)(*op), PyArray_DESCR(*op),
- op_request_dtype);
- if (op_request_dtype == NULL) {
+ Py_SETREF(*op_dtype, PyArray_AdaptDescriptorToArray(
+ *op, (PyObject *)op_request_dtype));
+ if (*op_dtype == NULL) {
return 0;
}
-
- /* Store the requested dtype */
- Py_DECREF(*op_dtype);
- *op_dtype = op_request_dtype;
}
/* Check if the operand is in the byte order requested */
diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py
index b8d4b5cdf..30019b253 100644
--- a/numpy/core/tests/test_array_coercion.py
+++ b/numpy/core/tests/test_array_coercion.py
@@ -141,12 +141,6 @@ class TestStringDiscovery:
[object(), 1.2, 10**43, None, "string"],
ids=["object", "1.2", "10**43", "None", "string"])
def test_basic_stringlength(self, obj):
- if not isinstance(obj, (str, int)):
- pytest.xfail(
- "The Single object (first assert) uses a different branch "
- "and thus gives a different result (either wrong or longer"
- "string than normally discovered).")
-
length = len(str(obj))
expected = np.dtype(f"S{length}")
@@ -156,8 +150,9 @@ class TestStringDiscovery:
# A nested array is also discovered correctly
arr = np.array(obj, dtype="O")
assert np.array(arr, dtype="S").dtype == expected
+ # Check that .astype() behaves identical
+ assert arr.astype("S").dtype == expected
- @pytest.mark.xfail(reason="Only single array unpacking is supported")
@pytest.mark.parametrize("obj",
[object(), 1.2, 10**43, None, "string"],
ids=["object", "1.2", "10**43", "None", "string"])
@@ -167,7 +162,6 @@ class TestStringDiscovery:
arr = np.array(obj, dtype="O")
assert np.array([arr, arr], dtype="S").dtype == expected
- @pytest.mark.xfail(reason="Only single array unpacking is supported")
@pytest.mark.parametrize("arraylike", arraylikes())
def test_unpack_first_level(self, arraylike):
# We unpack exactly one level of array likes
@@ -223,21 +217,22 @@ class TestScalarDiscovery:
assert arr.shape == ()
assert arr.dtype == scalar.dtype
- if type(scalar) is np.bytes_:
- pytest.xfail("Nested bytes use len(str(scalar)) currently.")
-
arr = np.array([[scalar, scalar]])
assert arr.shape == (1, 2)
assert arr.dtype == scalar.dtype
# Additionally to string this test also runs into a corner case
# with datetime promotion (the difference is the promotion order).
- @pytest.mark.xfail(reason="Coercion to string is not symmetric")
def test_scalar_promotion(self):
for sc1, sc2 in product(scalar_instances(), scalar_instances()):
sc1, sc2 = sc1.values[0], sc2.values[0]
# test all combinations:
- arr = np.array([sc1, sc2])
+ try:
+ arr = np.array([sc1, sc2])
+ except (TypeError, ValueError):
+ # The promotion between two times can fail
+ # XFAIL (ValueError): Some object casts are currently undefined
+ continue
assert arr.shape == (2,)
try:
dt1, dt2 = sc1.dtype, sc2.dtype
@@ -255,11 +250,10 @@ class TestScalarDiscovery:
# Ensure we have a full-precision number if available
scalar = type(scalar)((scalar * 2)**0.5)
- if is_parametric_dtype(scalar.dtype) or type(scalar) is rational:
- # datetime with unit will be named "datetime64[unit]"
+ if type(scalar) is rational:
# Rational generally fails due to a missing cast. In the future
# object casts should automatically be defined based on `setitem`.
- pytest.xfail("0-D object array to a unit-less datetime cast fails")
+ pytest.xfail("Rational to object cast is undefined currently.")
# Use casting from object:
arr = np.array(scalar, dtype=object).astype(scalar.dtype)
@@ -279,9 +273,7 @@ class TestScalarDiscovery:
@pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy")
@pytest.mark.filterwarnings("ignore::numpy.ComplexWarning")
- # After change, can enable times here, and below and it will work,
- # Right now times are too complex, so map out some details below.
- @pytest.mark.parametrize("cast_to", scalar_instances(times=False))
+ @pytest.mark.parametrize("cast_to", scalar_instances())
def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to):
"""
Test that in most cases:
@@ -293,10 +285,7 @@ class TestScalarDiscovery:
"""
dtype = cast_to.dtype # use to parametrize only the target dtype
- # XFAIL: Some extended precision tests fail, because assigning to
- # complex256 will use float(float128). Rational fails currently.
- for scalar in scalar_instances(
- times=False, extended_precision=False, user_dtype=False):
+ for scalar in scalar_instances(times=False):
scalar = scalar.values[0]
if dtype.type == np.void:
@@ -306,7 +295,7 @@ class TestScalarDiscovery:
# this, but has different rules than the cast.
with pytest.raises(TypeError):
np.array(scalar).astype(dtype)
- # XFAIL: np.array(scalar, dtype=dtype)
+ np.array(scalar, dtype=dtype)
np.array([scalar], dtype=dtype)
continue
@@ -342,9 +331,6 @@ class TestTimeScalars:
param(np.timedelta64(123, "s"), id="timedelta64[s]"),
param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"),
param(np.datetime64(1, "D"), id="datetime64[D]")],)
- @pytest.mark.xfail(
- reason="This uses int(scalar) or float(scalar) to assign, which "
- "fails. However, casting currently does not fail.")
def test_coercion_basic(self, dtype, scalar):
arr = np.array(scalar, dtype=dtype)
cast = np.array(scalar).astype(dtype)
@@ -369,25 +355,48 @@ class TestTimeScalars:
assert_array_equal(arr, cast)
assert_array_equal(cast, cast)
+ @pytest.mark.parametrize("dtype", ["S6", "U6"])
@pytest.mark.parametrize(["val", "unit"],
[param(123, "s", id="[s]"), param(123, "D", id="[D]")])
- @pytest.mark.parametrize("scalar_type", [np.datetime64, np.timedelta64])
- @pytest.mark.xfail(reason="Error not raised for assignment")
- def test_coercion_assignment_times(self, scalar_type, val, unit):
- scalar = scalar_type(val, unit)
+ def test_coercion_assignment_datetime(self, val, unit, dtype):
+ # String from datetime64 assignment is currently special cased to
+ # never use casting. This is because casting will error in this
+ # case, and traditionally in most cases the behaviour is maintained
+ # like this. (`np.array(scalar, dtype="U6")` would have failed before)
+ # TODO: This discrepency _should_ be resolved, either by relaxing the
+ # cast, or by deprecating the first part.
+ scalar = np.datetime64(val, unit)
+ dtype = np.dtype(dtype)
+ cut_string = dtype.type(str(scalar)[:6])
+
+ arr = np.array(scalar, dtype=dtype)
+ assert arr[()] == cut_string
+ ass = np.ones((), dtype=dtype)
+ ass[()] = scalar
+ assert ass[()] == cut_string
- # The error type is not ideal, fails because string is too short:
- with pytest.raises(RuntimeError):
- np.array(scalar, dtype="S6")
- with pytest.raises(RuntimeError):
- cast = np.array(scalar).astype("S6")
- ass = np.ones((), dtype="S6")
with pytest.raises(RuntimeError):
- ass[()] = scalar
+ # However, unlike the above assignment using `str(scalar)[:6]`
+ # due to being handled by the string DType and not be casting
+ # the explicit cast fails:
+ np.array(scalar).astype(dtype)
+ @pytest.mark.parametrize(["val", "unit"],
+ [param(123, "s", id="[s]"), param(123, "D", id="[D]")])
+ def test_coercion_assignment_timedelta(self, val, unit):
+ scalar = np.timedelta64(val, unit)
+
+ # Unlike datetime64, timedelta allows the unsafe cast:
+ np.array(scalar, dtype="S6")
+ cast = np.array(scalar).astype("S6")
+ ass = np.ones((), dtype="S6")
+ ass[()] = scalar
+ expected = scalar.astype("S")[:6]
+ assert cast[()] == expected
+ assert ass[()] == expected
+
class TestNested:
- @pytest.mark.xfail(reason="No deprecation warning given.")
def test_nested_simple(self):
initial = [1.2]
nested = initial
@@ -417,11 +426,6 @@ class TestNested:
arr = np.array([l, [None], l], dtype=object)
assert arr.shape == (3, 1)
- @pytest.mark.xfail(
- reason="For arrays and memoryview, this used to not complain "
- "and assign to a too small array instead. For other "
- "array-likes the error is different because fewer (only "
- "MAXDIM-1) dimensions are found, failing the last test.")
@pytest.mark.parametrize("arraylike", arraylikes())
def test_nested_arraylikes(self, arraylike):
# We try storing an array like into an array, but the array-like
@@ -432,10 +436,6 @@ class TestNested:
# assigned to it (which does work for object or if `float(arraylike)`
# works).
initial = arraylike(np.ones((1, 1)))
- #if not isinstance(initial, (np.ndarray, memoryview)):
- # pytest.xfail(
- # "When coercing to object, these cases currently discover "
- # "fewer dimensions than ndarray failing the second part.")
nested = initial
for i in range(np.MAXDIMS - 1):
@@ -463,11 +463,6 @@ class TestNested:
assert out[0] is arr
assert type(out[1]) is list
- if not isinstance(arr, (np.ndarray, memoryview)):
- pytest.xfail(
- "does not raise ValueError below, because it discovers "
- "the dimension as (2,) and not (2, 2, 2)")
-
# Array is ragged in the third dimension:
with pytest.raises(ValueError):
# This is a broadcast error during assignment, because
@@ -500,7 +495,7 @@ class TestBadSequences:
obj.append(mylist([1, 2]))
- with pytest.raises(ValueError): # changes to RuntimeError
+ with pytest.raises(RuntimeError):
np.array(obj)
# Note: We do not test a shrinking list. These do very evil things
@@ -517,8 +512,8 @@ class TestBadSequences:
obj.append([2, 3])
obj.append(mylist([1, 2]))
- #with pytest.raises(RuntimeError): # Will error in the future
- np.array(obj)
+ with pytest.raises(RuntimeError):
+ np.array(obj)
def test_replace_0d_array(self):
# List to coerce, `mylist` will mutate the first element
@@ -534,8 +529,8 @@ class TestBadSequences:
# Runs into a corner case in the new code, the `array(2)` is cached
# so replacing it invalidates the cache.
obj.append([np.array(2), baditem()])
- # with pytest.raises(RuntimeError): # Will error in the future
- np.array(obj)
+ with pytest.raises(RuntimeError):
+ np.array(obj)
class TestArrayLikes:
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index fef1e24d8..59a3954fd 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -2329,9 +2329,21 @@ class TestDateTime:
obj_arr = np.array([None])
obj_arr[0] = a
- # gh-11154: This shouldn't cause a C stack overflow
- assert_raises(RecursionError, obj_arr.astype, 'M8')
- assert_raises(RecursionError, obj_arr.astype, 'm8')
+ # At some point this caused a stack overflow (gh-11154). Now raises
+ # ValueError since the nested list cannot be converted to a datetime.
+ assert_raises(ValueError, obj_arr.astype, 'M8')
+ assert_raises(ValueError, obj_arr.astype, 'm8')
+
+ @pytest.mark.parametrize("shape", [(), (1,)])
+ def test_discovery_from_object_array(self, shape):
+ arr = np.array("2020-10-10", dtype=object).reshape(shape)
+ res = np.array("2020-10-10", dtype="M8").reshape(shape)
+ assert res.dtype == np.dtype("M8[D]")
+ assert_equal(arr.astype("M8"), res)
+ arr[...] = np.bytes_("2020-10-10") # try a numpy string type
+ assert_equal(arr.astype("M8"), res)
+ arr = arr.astype("S")
+ assert_equal(arr.astype("S").astype("M8"), res)
@pytest.mark.parametrize("time_unit", [
"Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as",
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 239d20c9d..68502adda 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -537,6 +537,22 @@ def test_deprecate_ragged_arrays():
np.array(arg)
+class TestTooDeepDeprecation(_VisibleDeprecationTestCase):
+ # NumPy 1.20, 2020-05-08
+ # This is a bit similar to the above ragged array deprecation case.
+ message = re.escape("Creating an ndarray from nested sequences exceeding")
+
+ def test_deprecation(self):
+ nested = [1]
+ for i in range(np.MAXDIMS - 1):
+ nested = [nested]
+ self.assert_not_deprecated(np.array, args=(nested,))
+ self.assert_not_deprecated(np.array,
+ args=(nested,), kwargs=dict(dtype=object))
+
+ self.assert_deprecated(np.array, args=([nested],))
+
+
class TestToString(_DeprecationTestCase):
# 2020-03-06 1.19.0
message = re.escape("tostring() is deprecated. Use tobytes() instead.")
diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py
index f6e263774..1069cbe8d 100644
--- a/numpy/core/tests/test_indexing.py
+++ b/numpy/core/tests/test_indexing.py
@@ -538,6 +538,15 @@ class TestIndexing:
arr[slices] = 10
assert_array_equal(arr, 10.)
+ def test_character_assignment(self):
+ # This is an example a function going through CopyObject which
+ # used to have an untested special path for scalars
+ # (the character special dtype case, should be deprecated probably)
+ arr = np.zeros((1, 5), dtype="c")
+ arr[0] = np.str_("asdfg") # must assign as a sequence
+ assert_array_equal(arr[0], np.array("asdfg", dtype="c"))
+ assert arr[0, 1] == b"s" # make sure not all were set to "a" for both
+
class TestFieldIndexing:
def test_scalar_return_type(self):
# Field access on an array should return an array, even if it
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 01169f938..fb3a5f50b 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -1012,6 +1012,8 @@ class TestCreation:
with assert_raises(ValueError):
a[:] = C() # Segfault!
+ np.array(C()) == list(C())
+
def test_failed_len_sequence(self):
# gh-7393
class A:
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index cf18a5d93..0b921fcb7 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -2450,7 +2450,8 @@ class TestRegression:
class T:
__array_interface__ = {}
- np.array([T()])
+ with assert_raises(ValueError):
+ np.array([T()])
def test_2d__array__shape(self):
class T(object):
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 76a92f5ca..27f14a5e7 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -215,6 +215,17 @@ class TestMaskedArray:
y = array([1, 2, 3], mask=x._mask, copy=True)
assert_(not np.may_share_memory(x.mask, y.mask))
+ def test_masked_singleton_array_creation_warns(self):
+ # The first works, but should not (ideally), there may be no way
+ # to solve this, however, as long as `np.ma.masked` is an ndarray.
+ np.array(np.ma.masked)
+ with pytest.warns(UserWarning):
+ # Tries to create a float array, using `float(np.ma.masked)`.
+ # We may want to define this is invalid behaviour in the future!
+ # (requiring np.ma.masked to be a known NumPy scalar probably
+ # with a DType.)
+ np.array([3., np.ma.masked])
+
def test_creation_with_list_of_maskedarrays(self):
# Tests creating a masked array from a list of masked arrays.
x = array(np.arange(5), mask=[1, 0, 0, 0, 0])