diff options
24 files changed, 4122 insertions, 701 deletions
diff --git a/doc/neps/nep-0042-new-dtypes.rst b/doc/neps/nep-0042-new-dtypes.rst index ff92e1612..d1ddb7101 100644 --- a/doc/neps/nep-0042-new-dtypes.rst +++ b/doc/neps/nep-0042-new-dtypes.rst @@ -784,7 +784,7 @@ Its ``resolve_descriptors`` function may look like:: # This is always an "unsafe" cast, but for int64, we can represent # it by a simple view (if the dtypes are both canonical). # (represented as C-side flags here). - safety_and_view = NPY_UNSAFE_CASTING | NPY_CAST_IS_VIEW + safety_and_view = NPY_UNSAFE_CASTING | _NPY_CAST_IS_VIEW return safety_and_view, (from_dtype, to_dtype) .. note:: @@ -1305,7 +1305,7 @@ The external API for ``CastingImpl`` will be limited initially to defining: ``casting`` will be set to ``NPY_EQUIV_CASTING``, ``NPY_SAFE_CASTING``, ``NPY_UNSAFE_CASTING``, or ``NPY_SAME_KIND_CASTING``. A new, additional flag, - ``NPY_CAST_IS_VIEW``, can be set to indicate that no cast is necessary and a + ``_NPY_CAST_IS_VIEW``, can be set to indicate that no cast is necessary and a view is sufficient to perform the cast. The cast should return ``-1`` when a custom error is set and ``NPY_NO_CASTING`` to indicate that a generic casting error should be set (this is in most cases diff --git a/doc/source/reference/global_state.rst b/doc/source/reference/global_state.rst index 7bf9310e8..b59467210 100644 --- a/doc/source/reference/global_state.rst +++ b/doc/source/reference/global_state.rst @@ -83,3 +83,18 @@ in C which iterates through arrays that may or may not be contiguous in memory. Most users will have no reason to change these; for details see the :ref:`memory layout <memory-layout>` documentation. + +Using the new casting implementation +------------------------------------ + +Within NumPy 1.20 it is possible to enable the new experimental casting +implementation for testing purposes. To do this set:: + + NPY_USE_NEW_CASTINGIMPL=1 + +Setting the flag is only useful to aid with NumPy developement to ensure the +new version is bug free and should be avoided for production code. +It is a helpful test for projects that either create custom datatypes or +use for example complicated structured dtypes. The flag is expected to be +removed in 1.21 with the new version being always in use. + diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index 856db0410..ca6a22828 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -26,6 +26,7 @@ API_FILES = [join('multiarray', 'alloc.c'), join('multiarray', 'array_assign_array.c'), join('multiarray', 'array_assign_scalar.c'), join('multiarray', 'array_coercion.c'), + join('multiarray', 'array_method.c'), join('multiarray', 'arrayobject.c'), join('multiarray', 'arraytypes.c.src'), join('multiarray', 'buffer.c'), diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 6bf54938f..75e9519fe 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -210,6 +210,7 @@ typedef enum { /* For specifying allowed casting in operations which support it */ typedef enum { + _NPY_ERROR_OCCURRED_IN_CAST = -1, /* Only allow identical types */ NPY_NO_CASTING=0, /* Allow identical and byte swapped types */ @@ -219,7 +220,14 @@ typedef enum { /* Allow safe casts or casts within the same kind */ NPY_SAME_KIND_CASTING=3, /* Allow any casts */ - NPY_UNSAFE_CASTING=4 + NPY_UNSAFE_CASTING=4, + /* + * Flag to allow signalling that a cast is a view, this flag is not + * valid when requesting a cast of specific safety. + * _NPY_CAST_IS_VIEW|NPY_EQUIV_CASTING means the same as NPY_NO_CASTING. + */ + // TODO-DTYPES: Needs to be documented. + _NPY_CAST_IS_VIEW = 1 << 16, } NPY_CASTING; typedef enum { @@ -1900,6 +1908,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, default_descr_function *default_descr; common_dtype_function *common_dtype; common_instance_function *common_instance; + /* + * Dictionary of ArrayMethods representing most possible casts + * (structured and object are exceptions). + * This should potentially become a weak mapping in the future. + */ + PyObject *castingimpls; }; #endif /* NPY_INTERNAL_BUILD */ diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 6ada03f73..08510fcc8 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -23,6 +23,11 @@ NPY_RELAXED_STRIDES_CHECKING = (os.environ.get('NPY_RELAXED_STRIDES_CHECKING', " NPY_RELAXED_STRIDES_DEBUG = (os.environ.get('NPY_RELAXED_STRIDES_DEBUG', "0") != "0") NPY_RELAXED_STRIDES_DEBUG = NPY_RELAXED_STRIDES_DEBUG and NPY_RELAXED_STRIDES_CHECKING +# Set to True to use the new casting implementation as much as implemented. +# Allows running the full test suit to excercise the new machinery until +# it is used as default and the old version is eventually deleted. +NPY_USE_NEW_CASTINGIMPL = os.environ.get('NPY_USE_NEW_CASTINGIMPL', "0") != 0 + # XXX: ugly, we use a class to avoid calling twice some expensive functions in # config.h/numpyconfig.h. I don't see a better way because distutils force # config.h generation inside an Extension class, and as such sharing @@ -468,6 +473,10 @@ def configuration(parent_package='',top_path=None): if NPY_RELAXED_STRIDES_DEBUG: moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + # Use the new experimental casting implementation in NumPy 1.20: + if NPY_USE_NEW_CASTINGIMPL: + moredefs.append(('NPY_USE_NEW_CASTINGIMPL', 1)) + # Get long double representation rep = check_long_double_representation(config_cmd) moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) @@ -769,6 +778,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'arraytypes.h'), join('src', 'multiarray', 'arrayfunction_override.h'), join('src', 'multiarray', 'array_coercion.h'), + join('src', 'multiarray', 'array_method.h'), join('src', 'multiarray', 'npy_buffer.h'), join('src', 'multiarray', 'calculation.h'), join('src', 'multiarray', 'common.h'), @@ -784,6 +794,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'getset.h'), join('src', 'multiarray', 'hashdescr.h'), join('src', 'multiarray', 'iterators.h'), + join('src', 'multiarray', 'legacy_dtype_implementation.h'), join('src', 'multiarray', 'mapping.h'), join('src', 'multiarray', 'methods.h'), join('src', 'multiarray', 'multiarraymodule.h'), @@ -824,6 +835,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'arrayobject.c'), join('src', 'multiarray', 'arraytypes.c.src'), join('src', 'multiarray', 'array_coercion.c'), + join('src', 'multiarray', 'array_method.c'), join('src', 'multiarray', 'array_assign_scalar.c'), join('src', 'multiarray', 'array_assign_array.c'), join('src', 'multiarray', 'arrayfunction_override.c'), @@ -850,6 +862,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'hashdescr.c'), join('src', 'multiarray', 'item_selection.c'), join('src', 'multiarray', 'iterators.c'), + join('src', 'multiarray', 'legacy_dtype_implementation.c'), join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), join('src', 'multiarray', 'mapping.c'), join('src', 'multiarray', 'methods.c'), diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index 421b03f93..4979747e4 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -373,4 +373,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step, NPY_NO_EXPORT PyArray_Descr * find_object_datetime_type(PyObject *obj, int type_num); +NPY_NO_EXPORT int +PyArray_InitializeDatetimeCasts(); + #endif diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 5b6b6dc78..58bb76950 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -9,6 +9,7 @@ #include "common.h" #include "mem_overlap.h" #include "npy_extint128.h" +#include "array_method.h" #if defined(MS_WIN32) || defined(__CYGWIN__) #define EXPORT(x) __declspec(dllexport) x @@ -977,6 +978,79 @@ get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg) } +static PyObject * +get_all_cast_information(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args)) +{ + PyObject *result = PyList_New(0); + if (result == NULL) { + return NULL; + } + PyObject *classes = PyObject_CallMethod( + (PyObject *)&PyArrayDescr_Type, "__subclasses__", ""); + if (classes == NULL) { + return NULL; + } + Py_SETREF(classes, PySequence_Fast(classes, NULL)); + if (classes == NULL) { + goto fail; + } + + Py_ssize_t nclass = PySequence_Length(classes); + for (Py_ssize_t i = 0; i < nclass; i++) { + PyArray_DTypeMeta *from_dtype = ( + (PyArray_DTypeMeta *)PySequence_Fast_GET_ITEM(classes, i)); + if (from_dtype->abstract) { + /* + * TODO: In principle probably needs to recursively check this, + * also we may allow casts to abstract dtypes at some point. + */ + continue; + } + + PyObject *to_dtype, *cast_obj; + Py_ssize_t pos = 0; + + while (PyDict_Next(from_dtype->castingimpls, &pos, &to_dtype, &cast_obj)) { + if (cast_obj == Py_None) { + continue; + } + PyArrayMethodObject *cast = (PyArrayMethodObject *)cast_obj; + + /* Pass some information about this cast out! */ + PyObject *cast_info = Py_BuildValue("{sOsOsisisisisisssi}", + "from", from_dtype, + "to", to_dtype, + "legacy", (cast->name != NULL && + strncmp(cast->name, "legacy_", 7) == 0), + "casting", cast->casting & ~_NPY_CAST_IS_VIEW, + "requires_pyapi", cast->flags & NPY_METH_REQUIRES_PYAPI, + "supports_unaligned", + cast->flags & NPY_METH_SUPPORTS_UNALIGNED, + "no_floatingpoint_errors", + cast->flags & NPY_METH_NO_FLOATINGPOINT_ERRORS, + "name", cast->name, + "cast_is_view", + cast->casting & _NPY_CAST_IS_VIEW); + if (cast_info == NULL) { + goto fail; + } + int res = PyList_Append(result, cast_info); + Py_DECREF(cast_info); + if (res < 0) { + goto fail; + } + } + } + Py_DECREF(classes); + return result; + + fail: + Py_XDECREF(classes); + Py_XDECREF(result); + return NULL; +} + + /* * Test C-api level item getting. */ @@ -2010,6 +2084,18 @@ getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) return ret; } + +static PyObject * +uses_new_casts(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ +#if NPY_USE_NEW_CASTINGIMPL + Py_RETURN_TRUE; +#else + Py_RETURN_FALSE; +#endif +} + + static PyObject * run_byteorder_converter(PyObject* NPY_UNUSED(self), PyObject *args) { @@ -2113,8 +2199,8 @@ run_casting_converter(PyObject* NPY_UNUSED(self), PyObject *args) case NPY_SAFE_CASTING: return PyUnicode_FromString("NPY_SAFE_CASTING"); case NPY_SAME_KIND_CASTING: return PyUnicode_FromString("NPY_SAME_KIND_CASTING"); case NPY_UNSAFE_CASTING: return PyUnicode_FromString("NPY_UNSAFE_CASTING"); + default: return PyLong_FromLong(casting); } - return PyLong_FromLong(casting); } static PyObject * @@ -2194,6 +2280,12 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"get_c_wrapping_array", get_c_wrapping_array, METH_O, NULL}, + {"get_all_cast_information", + get_all_cast_information, + METH_NOARGS, + "Return a list with info on all available casts. Some of the info" + "may differ for an actual cast if it uses value-based casting " + "(flexible types)."}, {"array_indexing", array_indexing, METH_VARARGS, NULL}, @@ -2254,6 +2346,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"getset_numericops", getset_numericops, METH_NOARGS, NULL}, + {"uses_new_casts", + uses_new_casts, + METH_NOARGS, NULL}, /**begin repeat * #name = cabs, carg# */ diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c new file mode 100644 index 000000000..076dd43aa --- /dev/null +++ b/numpy/core/src/multiarray/array_method.c @@ -0,0 +1,608 @@ +/* + * This file implements an abstraction layer for "Array methods", which + * work with a specific DType class input and provide low-level C function + * pointers to do fast operations on the given input functions. + * It thus adds an abstraction layer around individual ufunc loops. + * + * Unlike methods, a ArrayMethod can have multiple inputs and outputs. + * This has some serious implication for garbage collection, and as far + * as I (@seberg) understands, it is not possible to always guarantee correct + * cyclic garbage collection of dynamically created DTypes with methods. + * The keyword (or rather the solution) for this seems to be an "ephemeron" + * which I believe should allow correct garbage collection but seems + * not implemented in Python at this time. + * The vast majority of use-cases will not require correct garbage collection. + * Some use cases may require the user to be careful. + * + * Generally there are two main ways to solve this issue: + * + * 1. A method with a single input (or inputs of all the same DTypes) can + * be "owned" by that DType (it becomes unusable when the DType is deleted). + * This holds especially for all casts, which must have a defined output + * DType and must hold on to it strongly. + * 2. A method which can infer the output DType(s) from the input types does + * not need to keep the output type alive. (It can use NULL for the type, + * or an abstract base class which is known to be persistent.) + * It is then sufficient for a ufunc (or other owner) to only hold a + * weak reference to the input DTypes. + */ + + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <npy_pycompat.h> +#include "arrayobject.h" +#include "array_method.h" +#include "dtypemeta.h" +#include "convert_datatype.h" + + +/* + * The default descriptor resolution function. The logic is as follows: + * + * 1. The output is ensured to be canonical (currently native byte order), + * if it is of the correct DType. + * 2. If any DType is was not defined, it is replaced by the common DType + * of all inputs. (If that common DType is parametric, this is an error.) + * + * We could allow setting the output descriptors specifically to simplify + * this step. + */ +static NPY_CASTING +default_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **input_descrs, + PyArray_Descr **output_descrs) +{ + int nin = context->nin; + int nout = context->nout; + int all_defined = 1; + + for (int i = 0; i < nin + nout; i++) { + PyArray_DTypeMeta *dtype = context->dtypes[i]; + if (dtype == NULL) { + output_descrs[i] = NULL; + all_defined = 0; + continue; + } + if (NPY_DTYPE(input_descrs[i]) == dtype) { + output_descrs[i] = ensure_dtype_nbo(input_descrs[i]); + } + else { + output_descrs[i] = dtype->default_descr(dtype); + } + if (NPY_UNLIKELY(output_descrs[i] == NULL)) { + goto fail; + } + } + if (all_defined) { + return context->method->casting; + } + + if (NPY_UNLIKELY(nin == 0 || context->dtypes[0] == NULL)) { + /* Registration should reject this, so this would be indicates a bug */ + PyErr_SetString(PyExc_RuntimeError, + "Invalid use of default resolver without inputs or with " + "input or output DType incorrectly missing."); + goto fail; + } + /* We find the common dtype of all inputs, and use it for the unknowns */ + PyArray_DTypeMeta *common_dtype = context->dtypes[0]; + assert(common_dtype != NULL); + for (int i = 1; i < nin; i++) { + Py_SETREF(common_dtype, PyArray_CommonDType(common_dtype, context->dtypes[i])); + if (common_dtype == NULL) { + goto fail; + } + } + for (int i = nin; i < nin + nout; i++) { + if (output_descrs[i] != NULL) { + continue; + } + if (NPY_DTYPE(input_descrs[i]) == common_dtype) { + output_descrs[i] = ensure_dtype_nbo(input_descrs[i]); + } + else { + output_descrs[i] = common_dtype->default_descr(common_dtype); + } + if (NPY_UNLIKELY(output_descrs[i] == NULL)) { + goto fail; + } + } + + return context->method->casting; + + fail: + for (int i = 0; i < nin + nout; i++) { + Py_XDECREF(output_descrs[i]); + } + return -1; +} + + +/** + * The default method to fetch the correct loop for a cast or ufunc + * (at the time of writing only casts). + * The default version can return loops explicitly registered during method + * creation. It does specialize contiguous loops, although has to check + * all descriptors itemsizes for this. + * + * @param context + * @param aligned + * @param move_references UNUSED. + * @param strides + * @param descriptors + * @param out_loop + * @param out_transferdata + * @param flags + * @return 0 on success -1 on failure. + */ +static int +default_get_strided_loop( + PyArrayMethod_Context *NPY_UNUSED(context), + int NPY_UNUSED(aligned), int NPY_UNUSED(move_references), + npy_intp *NPY_UNUSED(strides), + PyArray_StridedUnaryOp **NPY_UNUSED(out_loop), + NpyAuxData **NPY_UNUSED(out_transferdata), + NPY_ARRAYMETHOD_FLAGS *NPY_UNUSED(flags)) +{ + PyErr_SetString(PyExc_NotImplementedError, + "default loop getter is not implemented"); + return -1; +} + + +/** + * Validate that the input is usable to create a new ArrayMethod. + * + * @param spec + * @return 0 on success -1 on error. + */ +static int +validate_spec(PyArrayMethod_Spec *spec) +{ + int nargs = spec->nin + spec->nout; + /* Check the passed spec for invalid fields/values */ + if (spec->nin < 0 || spec->nout < 0 || nargs > NPY_MAXARGS) { + PyErr_Format(PyExc_ValueError, + "ArrayMethod inputs and outputs must be greater zero and" + "not exceed %d. (method: %s)", NPY_MAXARGS, spec->name); + return -1; + } + switch (spec->casting & ~_NPY_CAST_IS_VIEW) { + case NPY_NO_CASTING: + case NPY_EQUIV_CASTING: + case NPY_SAFE_CASTING: + case NPY_SAME_KIND_CASTING: + case NPY_UNSAFE_CASTING: + break; + default: + PyErr_Format(PyExc_TypeError, + "ArrayMethod has invalid casting `%d`. (method: %s)", + spec->casting, spec->name); + return -1; + } + + for (int i = 0; i < nargs; i++) { + if (spec->dtypes[i] == NULL && i < spec->nin) { + PyErr_Format(PyExc_TypeError, + "ArrayMethod must have well defined input DTypes. " + "(method: %s)", spec->name); + return -1; + } + if (!PyObject_TypeCheck(spec->dtypes[i], &PyArrayDTypeMeta_Type)) { + PyErr_Format(PyExc_TypeError, + "ArrayMethod provided object %R is not a DType." + "(method: %s)", spec->dtypes[i], spec->name); + return -1; + } + if (spec->dtypes[i]->abstract && i < spec->nin) { + PyErr_Format(PyExc_TypeError, + "abstract DType %S are currently not allowed for inputs." + "(method: %s defined at %s)", spec->dtypes[i], spec->name); + return -1; + } + } + return 0; +} + + +static int +fill_arraymethod_from_slots( + PyBoundArrayMethodObject *res, PyArrayMethod_Spec *spec, + int private) +{ + PyArrayMethodObject *meth = res->method; + + /* Set the defaults */ + meth->get_strided_loop = &default_get_strided_loop; + meth->resolve_descriptors = &default_resolve_descriptors; + + /* Fill in the slots passed by the user */ + /* + * TODO: This is reasonable for now, but it would be nice to find a + * shorter solution, and add some additional error checking (e.g. + * the same slot used twice). Python uses an array of slot offsets. + */ + for (PyType_Slot *slot = &spec->slots[0]; slot->slot != 0; slot++) { + switch (slot->slot) { + case NPY_DTMETH_resolve_descriptors: + meth->resolve_descriptors = slot->pfunc; + continue; + case NPY_DTMETH_get_loop: + if (private) { + /* Only allow override for private functions initially */ + meth->get_strided_loop = slot->pfunc; + continue; + } + break; + case NPY_DTMETH_strided_loop: + meth->strided_loop = slot->pfunc; + continue; + case NPY_DTMETH_contiguous_loop: + meth->contiguous_loop = slot->pfunc; + continue; + case NPY_DTMETH_unaligned_strided_loop: + meth->unaligned_strided_loop = slot->pfunc; + continue; + case NPY_DTMETH_unaligned_contiguous_loop: + meth->unaligned_contiguous_loop = slot->pfunc; + continue; + default: + break; + } + PyErr_Format(PyExc_RuntimeError, + "invalid slot number %d to ArrayMethod: %s", + slot->slot, spec->name); + return -1; + } + + /* Check whether the slots are valid: */ + if (meth->resolve_descriptors == &default_resolve_descriptors) { + for (int i = 0; i < res->nin + res->nout; i++) { + if (res->dtypes[i] == NULL) { + if (i < res->nin) { + PyErr_Format(PyExc_TypeError, + "All input DTypes must be specified when using " + "the default `resolve_descriptors` function. " + "(method: %s)", spec->name); + return -1; + } + else if (res->nin == 0) { + PyErr_Format(PyExc_TypeError, + "Must specify output DTypes or use custom " + "`resolve_descriptors` when there are no inputs. " + "(method: %s defined at %s)", spec->name); + return -1; + } + } + if (i >= res->nin && res->dtypes[i]->parametric) { + PyErr_Format(PyExc_TypeError, + "must provide a `resolve_descriptors` function if any " + "output DType is parametric. (method: %s)", + spec->name); + return -1; + } + } + } + if (meth->get_strided_loop != &default_get_strided_loop) { + /* Do not check the actual loop fields. */ + return 0; + } + + /* Check whether the provided loops make sense. */ + if (meth->strided_loop == NULL) { + PyErr_Format(PyExc_TypeError, + "Must provide a strided inner loop function. (method: %s)", + spec->name); + return -1; + } + if (meth->contiguous_loop == NULL) { + meth->contiguous_loop = meth->strided_loop; + } + if (meth->unaligned_contiguous_loop != NULL && + meth->unaligned_strided_loop == NULL) { + PyErr_Format(PyExc_TypeError, + "Must provide unaligned strided inner loop when providing " + "a contiguous version. (method: %s)", spec->name); + return -1; + } + if ((meth->unaligned_strided_loop == NULL) != + !(meth->flags & NPY_METH_SUPPORTS_UNALIGNED)) { + PyErr_Format(PyExc_TypeError, + "Must provide unaligned strided inner loop when providing " + "a contiguous version. (method: %s)", spec->name); + return -1; + } + + return 0; +} + + +/** + * Create a new ArrayMethod (internal version). + * + * @param name A name for the individual method, may be NULL. + * @param spec A filled context object to pass generic information about + * the method (such as usually needing the API, and the DTypes). + * Unused fields must be NULL. + * @param slots Slots with the correct pair of IDs and (function) pointers. + * @param private Some slots are currently considered private, if not true, + * these will be rejected. + * + * @returns A new (bound) ArrayMethod object. + */ +NPY_NO_EXPORT PyBoundArrayMethodObject * +PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private) +{ + int nargs = spec->nin + spec->nout; + + if (spec->name == NULL) { + spec->name = "<unknown>"; + } + + if (validate_spec(spec) < 0) { + return NULL; + } + + PyBoundArrayMethodObject *res; + res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type); + if (res == NULL) { + return NULL; + } + res->nin = spec->nin; + res->nout = spec->nout; + res->method = NULL; + + res->dtypes = PyMem_Malloc(sizeof(PyArray_DTypeMeta *) * nargs); + if (res->dtypes == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + for (int i = 0; i < nargs ; i++) { + Py_XINCREF(spec->dtypes[i]); + res->dtypes[i] = spec->dtypes[i]; + } + + res->method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (res->method == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + memset((char *)(res->method) + sizeof(PyObject), 0, + sizeof(PyArrayMethodObject) - sizeof(PyObject)); + + res->method->flags = spec->flags; + res->method->casting = spec->casting; + if (fill_arraymethod_from_slots(res, spec, private) < 0) { + Py_DECREF(res); + return NULL; + } + + ssize_t length = strlen(spec->name); + res->method->name = PyMem_Malloc(length + 1); + if (res->method->name == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + strcpy(res->method->name, spec->name); + + return res; +} + + +static void +arraymethod_dealloc(PyObject *self) +{ + PyArrayMethodObject *meth; + meth = ((PyArrayMethodObject *)self); + + PyMem_Free(meth->name); + + Py_TYPE(self)->tp_free(self); +} + + +NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "numpy._ArrayMethod", + .tp_basicsize = sizeof(PyArrayMethodObject), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_dealloc = arraymethod_dealloc, +}; + + + +static PyObject * +boundarraymethod_repr(PyBoundArrayMethodObject *self) +{ + PyObject *dtypes = PyTuple_New(self->nin + self->nout); + if (dtypes == NULL) { + return NULL; + } + for (int i = 0; i < self->nin + self->nout; i++) { + Py_INCREF(self->dtypes[i]); + PyTuple_SET_ITEM(dtypes, i, (PyObject *)self->dtypes[i]); + } + return PyUnicode_FromFormat( + "<np._BoundArrayMethod `%s` for dtypes %S>", + self->method->name, dtypes); +} + + +static void +boundarraymethod_dealloc(PyObject *self) +{ + PyBoundArrayMethodObject *meth; + meth = ((PyBoundArrayMethodObject *)self); + + for (int i = 0; i < meth->nin + meth->nout; i++) { + Py_XDECREF(meth->dtypes[i]); + } + PyMem_Free(meth->dtypes); + + Py_XDECREF(meth->method); + + Py_TYPE(self)->tp_free(self); +} + + +/* + * Calls resolve_descriptors() and returns the casting level and the resolved + * descriptors as a tuple. If the operation is impossible returns (-1, None). + * May raise an error, but usually should not. + * The function validates the casting attribute compared to the returned + * casting level. + */ +static PyObject * +boundarraymethod__resolve_descripors( + PyBoundArrayMethodObject *self, PyObject *descr_tuple) +{ + PyArray_Descr *given_descrs[NPY_MAXARGS]; + PyArray_Descr *loop_descrs[NPY_MAXARGS]; + + if (!PyTuple_CheckExact(descr_tuple) || + PyTuple_Size(descr_tuple) != self->nin + self->nout) { + PyErr_Format(PyExc_ValueError, + "_resolve_descriptors() takes exactly one tuple with as many " + "elements as the method takes arguments (%d+%d).", + self->nin, self->nout); + return NULL; + } + + for (int i = 0; i < self->nin + self->nout; i++) { + PyObject *tmp = PyTuple_GetItem(descr_tuple, i); + if (tmp == NULL) { + return NULL; + } + else if (tmp == Py_None) { + if (i < self->nin) { + PyErr_SetString(PyExc_ValueError, + "only output dtypes may be omitted (set to None)."); + return NULL; + } + given_descrs[i] = NULL; + } + else if (PyArray_DescrCheck(tmp)) { + if (Py_TYPE(tmp) != (PyTypeObject *)self->dtypes[i]) { + PyErr_Format(PyExc_ValueError, + "input dtype %S was not an exact instance of the bound " + "DType class %S.", tmp, self->dtypes[i]); + return NULL; + } + given_descrs[i] = (PyArray_Descr *)tmp; + } + else { + PyErr_SetString(PyExc_TypeError, + "dtype tuple can only contain dtype instances or None."); + return NULL; + } + } + + PyArrayMethod_Context context = { + .caller = NULL, + .method = self->method, + .dtypes = self->dtypes, + .nin = self->nin, + .nout = self->nout, + .descriptors = NULL, /* Used after resolve_descriptors */ + }; + NPY_CASTING casting = self->method->resolve_descriptors( + &context, given_descrs, loop_descrs); + + if (casting < 0 && PyErr_Occurred()) { + return NULL; + } + else if (casting < 0) { + return Py_BuildValue("iO", casting, Py_None); + } + + PyObject *result_tuple = PyTuple_New(self->nin + self->nout); + if (result_tuple == NULL) { + return NULL; + } + for (int i = 0; i < self->nin + self->nout; i++) { + /* transfer ownership to the tuple. */ + PyTuple_SET_ITEM(result_tuple, i, (PyObject *)loop_descrs[i]); + } + + /* + * The casting flags should be the most generic casting level (except the + * cast-is-view flag. If no input is parametric, it must match exactly. + */ + int parametric = 0; + for (int i = 0; i < self->nin + self->nout; i++) { + if (self->dtypes[i]->parametric) { + parametric = 1; + break; + } + } + if (!parametric) { + /* + * Non-parametric can only mismatch if it switches from no to equiv + * (e.g. due to byteorder changes). + */ + if (self->method->casting != (casting & ~_NPY_CAST_IS_VIEW) && + !(self->method->casting == NPY_NO_CASTING && + casting == NPY_EQUIV_CASTING)) { + PyErr_Format(PyExc_RuntimeError, + "resolve_descriptors cast level did not match stored one " + "(expected %d, got %d) for method %s", + self->method->casting, (casting & ~_NPY_CAST_IS_VIEW), + self->method->name); + Py_DECREF(result_tuple); + return NULL; + } + } + else { + NPY_CASTING cast = casting & ~_NPY_CAST_IS_VIEW; + if (cast != PyArray_MinCastSafety(cast, self->method->casting)) { + PyErr_Format(PyExc_RuntimeError, + "resolve_descriptors cast level did not match stored one " + "(expected %d, got %d) for method %s", + self->method->casting, (casting & ~_NPY_CAST_IS_VIEW), + self->method->name); + Py_DECREF(result_tuple); + return NULL; + } + } + + return Py_BuildValue("iN", casting, result_tuple); +} + + +PyMethodDef boundarraymethod_methods[] = { + {"_resolve_descriptors", (PyCFunction)boundarraymethod__resolve_descripors, + METH_O, "Resolve the given dtypes."}, + {NULL, 0, 0, NULL}, +}; + + +static PyObject * +boundarraymethod__supports_unaligned(PyBoundArrayMethodObject *self) +{ + return PyBool_FromLong(self->method->flags & NPY_METH_SUPPORTS_UNALIGNED); +} + + +PyGetSetDef boundarraymethods_getters[] = { + {"_supports_unaligned", + (getter)boundarraymethod__supports_unaligned, NULL, + "whether the method supports unaligned inputs/outputs.", NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + + +NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "numpy._BoundArrayMethod", + .tp_basicsize = sizeof(PyBoundArrayMethodObject), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_repr = (reprfunc)boundarraymethod_repr, + .tp_dealloc = boundarraymethod_dealloc, + .tp_methods = boundarraymethod_methods, + .tp_getset = boundarraymethods_getters, +}; diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h new file mode 100644 index 000000000..a3361d20a --- /dev/null +++ b/numpy/core/src/multiarray/array_method.h @@ -0,0 +1,145 @@ +#ifndef _NPY_ARRAY_METHOD_H +#define _NPY_ARRAY_METHOD_H + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#include <Python.h> +#include <numpy/ndarraytypes.h> +#include <lowlevel_strided_loops.h> + + +typedef enum { + /* Flag for whether the GIL is required */ + NPY_METH_REQUIRES_PYAPI = 1 << 1, + /* + * Some functions cannot set floating point error flags, this flag + * gives us the option (not requirement) to skip floating point error + * setup/check. No function should set error flags and ignore them + * since it would interfere with chaining operations (e.g. casting). + */ + NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, + /* Whether the method supports unaligned access (not runtime) */ + NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, + + /* All flags which can change at runtime */ + NPY_METH_RUNTIME_FLAGS = ( + NPY_METH_REQUIRES_PYAPI | + NPY_METH_NO_FLOATINGPOINT_ERRORS), +} NPY_ARRAYMETHOD_FLAGS; + + +struct PyArrayMethodObject_tag; + +/* + * This struct is specific to an individual (possibly repeated) call of + * the DTypeMethods strided operator, and as such is passed into the various + * methods of the DTypeMethod object (the adjust_descriptors function, + * the get_loop function and the individual lowlevel strided operator calls). + * It thus has to be persistent for one end-user call, and then be discarded. + * + * We recycle this as a specification for creating new DTypeMethods + * right now. (This should probably be reviewed before making it public) + */ +typedef struct { + PyObject *caller; + struct PyArrayMethodObject_tag *method; + int nin, nout; + + PyArray_DTypeMeta **dtypes; + /* Operand descriptors, filled in by adjust_desciptors */ + PyArray_Descr **descriptors; +} PyArrayMethod_Context; + + +typedef NPY_CASTING (resolve_descriptors_function)( + PyArrayMethod_Context *context, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs); + + +typedef int (get_loop_function)( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + + +/* + * This struct will be public and necessary for creating a new DTypeMethod + * object (casting and ufuncs). + * We could version the struct, although since we allow passing arbitrary + * data using the slots, and have flags, that may be enough? + * (See also PyBoundArrayMethodObject.) + */ +typedef struct { + const char *name; + int nin, nout; + NPY_CASTING casting; + NPY_ARRAYMETHOD_FLAGS flags; + PyArray_DTypeMeta **dtypes; + PyType_Slot *slots; +} PyArrayMethod_Spec; + + +/* + * Structure of the DTypeMethod. This structure should probably not be made + * public. If necessary, we can make certain operations on it public + * (e.g. to allow users access to `get_strided_loop`). + */ +typedef struct PyArrayMethodObject_tag { + PyObject_HEAD + char *name; + /* Casting is normally "safe" for functions, but is important for casts */ + NPY_CASTING casting; + /* default flags. The get_strided_loop function can override these */ + NPY_ARRAYMETHOD_FLAGS flags; + resolve_descriptors_function *resolve_descriptors; + get_loop_function *get_strided_loop; + /* Typical loop functions (contiguous ones are used in current casts) */ + PyArray_StridedUnaryOp *strided_loop; + PyArray_StridedUnaryOp *contiguous_loop; + PyArray_StridedUnaryOp *unaligned_strided_loop; + PyArray_StridedUnaryOp *unaligned_contiguous_loop; +} PyArrayMethodObject; + + +/* + * We will sometimes have to create a DTypeMethod and allow passing it around, + * similar to `instance.method` returning a bound method, e.g. a function like + * `ufunc.resolve()` can return a bound object. + * This or the method itself may need further attributes, such as the `owner` + * (which could be the bound ufunc), the `signature` (of the gufunc), or + * the identity for reduction support. + */ +typedef struct { + PyObject_HEAD + int nin; + int nout; + PyArray_DTypeMeta **dtypes; + PyArrayMethodObject *method; +} PyBoundArrayMethodObject; + + +extern NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type; +extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type; + +/* + * SLOTS IDs For the DTypeMethod creation, one public, the IDs are fixed. + * TODO: Before making it public, consider adding a large constant to private + * slots. + */ +#define NPY_DTMETH_resolve_descriptors 1 +#define NPY_DTMETH_get_loop 2 +#define NPY_DTMETH_strided_loop 3 +#define NPY_DTMETH_contiguous_loop 4 +#define NPY_DTMETH_unaligned_strided_loop 5 +#define NPY_DTMETH_unaligned_contiguous_loop 6 + + +NPY_NO_EXPORT PyBoundArrayMethodObject * +PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private); + +#endif /*_NPY_ARRAY_METHOD_H*/ diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index f700bdc99..7cee072bd 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -18,10 +18,13 @@ #include "dtypemeta.h" #include "scalartypes.h" #include "mapping.h" +#include "legacy_dtype_implementation.h" #include "convert_datatype.h" #include "_datetime.h" #include "datetime_strings.h" +#include "array_method.h" +#include "usertypes.h" /* @@ -35,6 +38,185 @@ */ NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20}; + +static PyObject * +PyArray_GetGenericToVoidCastingImpl(); + +static PyObject * +PyArray_GetVoidToGenericCastingImpl(); + +static PyObject * +PyArray_GetGenericToObjectCastingImpl(); + +static PyObject * +PyArray_GetObjectToGenericCastingImpl(); + + +/** + * Fetch the casting implementation from one DType to another. + * + * @params from + * @params to + * + * @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an + * error set. + */ +static PyObject * +PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyObject *res = PyDict_GetItem(from->castingimpls, (PyObject *)to); + if (res != NULL || PyErr_Occurred()) { + Py_XINCREF(res); + return res; + } + /* + * The following code looks up CastingImpl based on the fact that anything + * can be cast to and from objects or structured (void) dtypes. + * + * The last part adds casts dynamically based on legacy definition + */ + if (from->type_num == NPY_OBJECT) { + res = PyArray_GetObjectToGenericCastingImpl(); + } + else if (to->type_num == NPY_OBJECT) { + res = PyArray_GetGenericToObjectCastingImpl(); + } + else if (from->type_num == NPY_VOID) { + res = PyArray_GetVoidToGenericCastingImpl(); + } + else if (to->type_num == NPY_VOID) { + res = PyArray_GetGenericToVoidCastingImpl(); + } + else if (from->type_num < NPY_NTYPES && to->type_num < NPY_NTYPES) { + /* All builtin dtypes have their casts explicitly defined. */ + PyErr_Format(PyExc_RuntimeError, + "builtin cast from %S to %s not found, this should not " + "be possible.", from, to); + return NULL; + } + else { + if (from->parametric || to->parametric) { + Py_RETURN_NONE; + } + /* Reject non-legacy dtypes (they need to use the new API) */ + if (!from->legacy || !to->legacy) { + Py_RETURN_NONE; + } + if (from != to) { + /* A cast function must have been registered */ + PyArray_VectorUnaryFunc *castfunc = PyArray_GetCastFunc( + from->singleton, to->type_num); + if (castfunc == NULL) { + PyErr_Clear(); + /* Remember that this cast is not possible */ + if (PyDict_SetItem(from->castingimpls, (PyObject *) to, Py_None) < 0) { + return NULL; + } + Py_RETURN_NONE; + } + } + + /* PyArray_AddLegacyWrapping_CastingImpl find the correct casting level: */ + /* + * TODO: Possibly move this to the cast registration time. But if we do + * that, we have to also update the cast when the casting safety + * is registered. + */ + if (PyArray_AddLegacyWrapping_CastingImpl(from, to, -1) < 0) { + return NULL; + } + return PyArray_GetCastingImpl(from, to); + } + + if (res == NULL) { + return NULL; + } + if (PyDict_SetItem(from->castingimpls, (PyObject *)to, res) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} + + +/** + * Fetch the (bound) casting implementation from one DType to another. + * + * @params from + * @params to + * + * @returns A bound casting implementation or None (or NULL for error). + */ +static PyObject * +PyArray_GetBoundCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyObject *method = PyArray_GetCastingImpl(from, to); + if (method == NULL || method == Py_None) { + return method; + } + + /* TODO: Create better way to wrap method into bound method */ + PyBoundArrayMethodObject *res; + res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type); + if (res == NULL) { + return NULL; + } + res->nin = 1; + res->nout = 1; + res->method = (PyArrayMethodObject *)method; + res->dtypes = PyMem_Malloc(2 * sizeof(PyArray_DTypeMeta *)); + if (res->dtypes == NULL) { + Py_DECREF(res); + return NULL; + } + Py_INCREF(from); + res->dtypes[0] = from; + Py_INCREF(to); + res->dtypes[1] = to; + + return (PyObject *)res; +} + + +NPY_NO_EXPORT PyObject * +_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args) +{ + PyArray_DTypeMeta *from, *to; + if (!PyArg_ParseTuple(args, "O!O!:_get_castingimpl", + &PyArrayDTypeMeta_Type, &from, &PyArrayDTypeMeta_Type, &to)) { + return NULL; + } + return PyArray_GetBoundCastingImpl(from, to); +} + + +/** + * Find the minimal cast safety level given two cast-levels as input. + * Supports the NPY_CAST_IS_VIEW check, and should be preferred to allow + * extending cast-levels if necessary. + * It is not valid for one of the arguments to be -1 to indicate an error. + * + * @param casting1 + * @param casting2 + * @return The minimal casting error (can be -1). + */ +NPY_NO_EXPORT NPY_CASTING +PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2) +{ + if (casting1 < 0 || casting2 < 0) { + return -1; + } + NPY_CASTING view = casting1 & casting2 & _NPY_CAST_IS_VIEW; + casting1 = casting1 & ~_NPY_CAST_IS_VIEW; + casting2 = casting2 & ~_NPY_CAST_IS_VIEW; + /* larger casting values are less safe */ + if (casting1 > casting2) { + return casting1 | view; + } + return casting2 | view; +} + + /*NUMPY_API * For backward compatibility * @@ -132,170 +314,6 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) return NULL; } -/* - * Legacy function to find the correct dtype when casting from any built-in - * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic - * units. - * - * This function returns a dtype based on flex_dtype and the values in - * data_dtype. It also calls Py_DECREF on the flex_dtype. If the - * flex_dtype is not flexible, it returns it as-is. - * - * Usually, if data_obj is not an array, dtype should be the result - * given by the PyArray_GetArrayParamsFromObject function. - * - * If *flex_dtype is NULL, returns immediately, without setting an - * exception, leaving any previous error handling intact. - */ -NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype) -{ - PyArray_DatetimeMetaData *meta; - PyArray_Descr *retval = NULL; - int flex_type_num; - - if (flex_dtype == NULL) { - return retval; - } - - flex_type_num = flex_dtype->type_num; - - /* Flexible types with expandable size */ - if (PyDataType_ISUNSIZED(flex_dtype)) { - /* First replace the flex_dtype */ - retval = PyArray_DescrNew(flex_dtype); - Py_DECREF(flex_dtype); - if (retval == NULL) { - return retval; - } - - if (data_dtype->type_num == flex_type_num || - flex_type_num == NPY_VOID) { - (retval)->elsize = data_dtype->elsize; - } - else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { - npy_intp size = 8; - - /* - * Get a string-size estimate of the input. These - * are generallly the size needed, rounded up to - * a multiple of eight. - */ - switch (data_dtype->type_num) { - case NPY_BOOL: - case NPY_UBYTE: - case NPY_BYTE: - case NPY_USHORT: - case NPY_SHORT: - case NPY_UINT: - case NPY_INT: - case NPY_ULONG: - case NPY_LONG: - case NPY_ULONGLONG: - case NPY_LONGLONG: - if (data_dtype->kind == 'b') { - /* 5 chars needed for cast to 'True' or 'False' */ - size = 5; - } - else if (data_dtype->elsize > 8 || - data_dtype->elsize < 0) { - /* - * Element size should never be greater than 8 or - * less than 0 for integer type, but just in case... - */ - break; - } - else if (data_dtype->kind == 'u') { - size = REQUIRED_STR_LEN[data_dtype->elsize]; - } - else if (data_dtype->kind == 'i') { - /* Add character for sign symbol */ - size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; - } - break; - case NPY_HALF: - case NPY_FLOAT: - case NPY_DOUBLE: - size = 32; - break; - case NPY_LONGDOUBLE: - size = 48; - break; - case NPY_CFLOAT: - case NPY_CDOUBLE: - size = 2 * 32; - break; - case NPY_CLONGDOUBLE: - size = 2 * 48; - break; - case NPY_OBJECT: - size = 64; - break; - case NPY_STRING: - case NPY_VOID: - size = data_dtype->elsize; - break; - case NPY_UNICODE: - size = data_dtype->elsize / 4; - break; - case NPY_DATETIME: - meta = get_datetime_metadata_from_dtype(data_dtype); - if (meta == NULL) { - Py_DECREF(retval); - return NULL; - } - size = get_datetime_iso_8601_strlen(0, meta->base); - break; - case NPY_TIMEDELTA: - size = 21; - break; - } - - if (flex_type_num == NPY_STRING) { - retval->elsize = size; - } - else if (flex_type_num == NPY_UNICODE) { - retval->elsize = size * 4; - } - } - else { - /* - * We should never get here, but just in case someone adds - * a new flex dtype... - */ - PyErr_SetString(PyExc_TypeError, - "don't know how to adapt flex dtype"); - Py_DECREF(retval); - return NULL; - } - } - /* Flexible type with generic time unit that adapts */ - else if (flex_type_num == NPY_DATETIME || - flex_type_num == NPY_TIMEDELTA) { - meta = get_datetime_metadata_from_dtype(flex_dtype); - retval = flex_dtype; - if (meta == NULL) { - return NULL; - } - - if (meta->base == NPY_FR_GENERIC) { - if (data_dtype->type_num == NPY_DATETIME || - data_dtype->type_num == NPY_TIMEDELTA) { - meta = get_datetime_metadata_from_dtype(data_dtype); - if (meta == NULL) { - return NULL; - } - - retval = create_datetime_dtype(flex_type_num, meta); - Py_DECREF(flex_dtype); - } - } - } - else { - retval = flex_dtype; - } - return retval; -} /* * Must be broadcastable. @@ -325,42 +343,122 @@ PyArray_CastAnyTo(PyArrayObject *out, PyArrayObject *mp) return PyArray_CopyAnyInto(out, mp); } + +/** + * Given two dtype instances, find the correct casting safety. + * + * Note that in many cases, it may be preferable to fetch the casting + * implementations fully to have them available for doing the actual cast + * later. + * + * @param from + * @param to The descriptor to cast to (may be NULL) + * @param to_dtype If `to` is NULL, must pass the to_dtype (otherwise this + * is ignored). + * @return NPY_CASTING or -1 on error or if the cast is not possible. + */ +NPY_NO_EXPORT NPY_CASTING +PyArray_GetCastSafety( + PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype) +{ + NPY_CASTING casting; + if (to != NULL) { + to_dtype = NPY_DTYPE(to); + } + PyObject *meth = PyArray_GetCastingImpl(NPY_DTYPE(from), to_dtype); + if (meth == NULL) { + return -1; + } + if (meth == Py_None) { + Py_DECREF(Py_None); + return -1; + } + + PyArrayMethodObject *castingimpl = (PyArrayMethodObject *)meth; + + PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(from), to_dtype}; + PyArray_Descr *descrs[2] = {from, to}; + PyArray_Descr *out_descrs[2]; + PyArrayMethod_Context context = { + .caller = NULL, + .nin = 1, .nout = 1, + .method = castingimpl, + .dtypes = dtypes, + .descriptors = NULL, + }; + + casting = castingimpl->resolve_descriptors(&context, descrs, out_descrs); + Py_DECREF(meth); + if (casting < 0) { + return -1; + } + /* The returned descriptors may not match, requiring a second check */ + if (out_descrs[0] != descrs[0]) { + NPY_CASTING from_casting = PyArray_GetCastSafety( + descrs[0], out_descrs[0], NULL); + casting = PyArray_MinCastSafety(casting, from_casting); + if (casting < 0) { + goto finish; + } + } + if (descrs[1] != NULL && out_descrs[1] != descrs[1]) { + NPY_CASTING from_casting = PyArray_GetCastSafety( + descrs[1], out_descrs[1], NULL); + casting = PyArray_MinCastSafety(casting, from_casting); + if (casting < 0) { + goto finish; + } + } + + finish: + Py_DECREF(out_descrs[0]); + Py_DECREF(out_descrs[1]); + /* NPY_NO_CASTING has to be used for (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW) */ + assert(casting != (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW)); + return casting; +} + + /*NUMPY_API *Check the type coercion rules. */ NPY_NO_EXPORT int PyArray_CanCastSafely(int fromtype, int totype) { - PyArray_Descr *from; - - /* Fast table lookup for small type numbers */ - if ((unsigned int)fromtype < NPY_NTYPES && - (unsigned int)totype < NPY_NTYPES) { - return _npy_can_cast_safely_table[fromtype][totype]; +#if NPY_USE_NEW_CASTINGIMPL + PyArray_DTypeMeta *from = PyArray_DTypeFromTypeNum(fromtype); + if (from == NULL) { + PyErr_WriteUnraisable(Py_None); + return 0; } - - /* Identity */ - if (fromtype == totype) { - return 1; + PyArray_DTypeMeta *to = PyArray_DTypeFromTypeNum(totype); + if (to == NULL) { + PyErr_WriteUnraisable(Py_None); + return 0; } + PyObject *castingimpl = PyArray_GetCastingImpl(from, to); + Py_DECREF(from); + Py_DECREF(to); - from = PyArray_DescrFromType(fromtype); - /* - * cancastto is a NPY_NOTYPE terminated C-int-array of types that - * the data-type can be cast to safely. - */ - if (from->f->cancastto) { - int *curtype = from->f->cancastto; - - while (*curtype != NPY_NOTYPE) { - if (*curtype++ == totype) { - return 1; - } - } + if (castingimpl == NULL) { + PyErr_WriteUnraisable(Py_None); + return 0; } - return 0; + else if (castingimpl == Py_None) { + Py_DECREF(Py_None); + return 0; + } + NPY_CASTING safety = ((PyArrayMethodObject *)castingimpl)->casting; + int res = PyArray_MinCastSafety(safety, NPY_SAFE_CASTING) == NPY_SAFE_CASTING; + Py_DECREF(castingimpl); + return res; +#else + return PyArray_LegacyCanCastSafely(fromtype, totype); +#endif } + + /*NUMPY_API * leaves reference count alone --- cannot be NULL * @@ -370,117 +468,16 @@ PyArray_CanCastSafely(int fromtype, int totype) NPY_NO_EXPORT npy_bool PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) { - int from_type_num = from->type_num; - int to_type_num = to->type_num; - npy_bool ret; - - ret = (npy_bool) PyArray_CanCastSafely(from_type_num, to_type_num); - if (ret) { - /* Check String and Unicode more closely */ - if (from_type_num == NPY_STRING) { - if (to_type_num == NPY_STRING) { - ret = (from->elsize <= to->elsize); - } - else if (to_type_num == NPY_UNICODE) { - ret = (from->elsize << 2 <= to->elsize); - } - } - else if (from_type_num == NPY_UNICODE) { - if (to_type_num == NPY_UNICODE) { - ret = (from->elsize <= to->elsize); - } - } - /* - * For datetime/timedelta, only treat casts moving towards - * more precision as safe. - */ - else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - return can_cast_datetime64_metadata(meta1, meta2, - NPY_SAFE_CASTING); - } - else if (from_type_num == NPY_TIMEDELTA && - to_type_num == NPY_TIMEDELTA) { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - return can_cast_timedelta64_metadata(meta1, meta2, - NPY_SAFE_CASTING); - } - /* - * If to_type_num is STRING or unicode - * see if the length is long enough to hold the - * stringified value of the object. - */ - else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { - /* - * Boolean value cast to string type is 5 characters max - * for string 'False'. - */ - int char_size = 1; - if (to_type_num == NPY_UNICODE) { - char_size = 4; - } - - ret = 0; - if (PyDataType_ISUNSIZED(to)) { - ret = 1; - } - /* - * Need at least 5 characters to convert from boolean - * to 'True' or 'False'. - */ - else if (from->kind == 'b' && to->elsize >= 5 * char_size) { - ret = 1; - } - else if (from->kind == 'u') { - /* Guard against unexpected integer size */ - if (from->elsize > 8 || from->elsize < 0) { - ret = 0; - } - else if (to->elsize >= - REQUIRED_STR_LEN[from->elsize] * char_size) { - ret = 1; - } - } - else if (from->kind == 'i') { - /* Guard against unexpected integer size */ - if (from->elsize > 8 || from->elsize < 0) { - ret = 0; - } - /* Extra character needed for sign */ - else if (to->elsize >= - (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { - ret = 1; - } - } - } - } - return ret; +#if NPY_USE_NEW_CASTINGIMPL + return PyArray_CanCastTypeTo(from, to, NPY_SAFE_CASTING); +#else + return PyArray_LegacyCanCastTo(from, to); +#endif } + /* Provides an ordering for the dtype 'kind' character codes */ -static int +NPY_NO_EXPORT int dtype_kind_to_ordering(char kind) { switch (kind) { @@ -541,51 +538,6 @@ type_num_unsigned_to_signed(int type_num) } } -/* - * Compare two field dictionaries for castability. - * - * Return 1 if 'field1' can be cast to 'field2' according to the rule - * 'casting', 0 if not. - * - * Castabiliy of field dictionaries is defined recursively: 'field1' and - * 'field2' must have the same field names (possibly in different - * orders), and the corresponding field types must be castable according - * to the given casting rule. - */ -static int -can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) -{ - Py_ssize_t ppos; - PyObject *key; - PyObject *tuple1, *tuple2; - - if (field1 == field2) { - return 1; - } - if (field1 == NULL || field2 == NULL) { - return 0; - } - if (PyDict_Size(field1) != PyDict_Size(field2)) { - return 0; - } - - /* Iterate over all the fields and compare for castability */ - ppos = 0; - while (PyDict_Next(field1, &ppos, &key, &tuple1)) { - if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) { - return 0; - } - /* Compare the dtype of the field for castability */ - if (!PyArray_CanCastTypeTo( - (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0), - (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0), - casting)) { - return 0; - } - } - - return 1; -} /*NUMPY_API * Returns true if data of type 'from' may be cast to data of type @@ -593,224 +545,41 @@ can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) */ NPY_NO_EXPORT npy_bool PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, - NPY_CASTING casting) + NPY_CASTING casting) { +#if NPY_USE_NEW_CASTINGIMPL /* - * Fast paths for equality and for basic types. + * NOTE: This code supports U and S, this is identical to the code + * in `ctors.c` which does not allow these dtypes to be attached + * to an array. Unlike the code for `np.array(..., dtype=)` + * which uses `PyArray_ExtractDTypeAndDescriptor` it rejects "m8" + * as a flexible dtype instance representing a DType. */ - if (from == to || - ((NPY_LIKELY(PyDataType_ISNUMBER(from)) || - PyDataType_ISOBJECT(from)) && - NPY_LIKELY(from->type_num == to->type_num) && - NPY_LIKELY(from->byteorder == to->byteorder))) { - return 1; - } - /* - * Cases with subarrays and fields need special treatment. - */ - if (PyDataType_HASFIELDS(from)) { - /* - * If from is a structured data type, then it can be cast to a simple - * non-object one only for unsafe casting *and* if it has a single - * field; recurse just in case the single field is itself structured. - */ - if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) { - if (casting == NPY_UNSAFE_CASTING && - PyDict_Size(from->fields) == 1) { - Py_ssize_t ppos = 0; - PyObject *tuple; - PyArray_Descr *field; - PyDict_Next(from->fields, &ppos, NULL, &tuple); - field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); - /* - * For a subarray, we need to get the underlying type; - * since we already are casting unsafely, we can ignore - * the shape. - */ - if (PyDataType_HASSUBARRAY(field)) { - field = field->subarray->base; - } - return PyArray_CanCastTypeTo(field, to, casting); - } - else { - return 0; - } - } - /* - * Casting from one structured data type to another depends on the fields; - * we pass that case on to the EquivTypenums case below. - * - * TODO: move that part up here? Need to check whether equivalent type - * numbers is an addition constraint that is needed. - * - * TODO/FIXME: For now, always allow structured to structured for unsafe - * casting; this is not correct, but needed since the treatment in can_cast - * below got out of sync with astype; see gh-13667. - */ - if (casting == NPY_UNSAFE_CASTING) { - return 1; - } - } - else if (PyDataType_HASFIELDS(to)) { - /* - * If "from" is a simple data type and "to" has fields, then only - * unsafe casting works (and that works always, even to multiple fields). - */ - return casting == NPY_UNSAFE_CASTING; - } /* - * Everything else we consider castable for unsafe for now. - * FIXME: ensure what we do here is consistent with "astype", - * i.e., deal more correctly with subarrays and user-defined dtype. + * TODO: We should grow support for `np.can_cast("d", "S")` being + * different from `np.can_cast("d", "S0")` here, at least for + * the python side API. */ - else if (casting == NPY_UNSAFE_CASTING) { - return 1; + NPY_CASTING safety; + if (PyDataType_ISUNSIZED(to) && to->subarray == NULL) { + safety = PyArray_GetCastSafety(from, NULL, NPY_DTYPE(to)); } - /* - * Equivalent simple types can be cast with any value of 'casting', but - * we need to be careful about structured to structured. - */ - if (PyArray_EquivTypenums(from->type_num, to->type_num)) { - /* For complicated case, use EquivTypes (for now) */ - if (PyTypeNum_ISUSERDEF(from->type_num) || - from->subarray != NULL) { - int ret; - - /* Only NPY_NO_CASTING prevents byte order conversion */ - if ((casting != NPY_NO_CASTING) && - (!PyArray_ISNBO(from->byteorder) || - !PyArray_ISNBO(to->byteorder))) { - PyArray_Descr *nbo_from, *nbo_to; - - nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE); - nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE); - if (nbo_from == NULL || nbo_to == NULL) { - Py_XDECREF(nbo_from); - Py_XDECREF(nbo_to); - PyErr_Clear(); - return 0; - } - ret = PyArray_EquivTypes(nbo_from, nbo_to); - Py_DECREF(nbo_from); - Py_DECREF(nbo_to); - } - else { - ret = PyArray_EquivTypes(from, to); - } - return ret; - } - - if (PyDataType_HASFIELDS(from)) { - switch (casting) { - case NPY_EQUIV_CASTING: - case NPY_SAFE_CASTING: - case NPY_SAME_KIND_CASTING: - /* - * `from' and `to' must have the same fields, and - * corresponding fields must be (recursively) castable. - */ - return can_cast_fields(from->fields, to->fields, casting); - - case NPY_NO_CASTING: - default: - return PyArray_EquivTypes(from, to); - } - } - - switch (from->type_num) { - case NPY_DATETIME: { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - if (casting == NPY_NO_CASTING) { - return PyArray_ISNBO(from->byteorder) == - PyArray_ISNBO(to->byteorder) && - can_cast_datetime64_metadata(meta1, meta2, casting); - } - else { - return can_cast_datetime64_metadata(meta1, meta2, casting); - } - } - case NPY_TIMEDELTA: { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - if (casting == NPY_NO_CASTING) { - return PyArray_ISNBO(from->byteorder) == - PyArray_ISNBO(to->byteorder) && - can_cast_timedelta64_metadata(meta1, meta2, casting); - } - else { - return can_cast_timedelta64_metadata(meta1, meta2, casting); - } - } - default: - switch (casting) { - case NPY_NO_CASTING: - return PyArray_EquivTypes(from, to); - case NPY_EQUIV_CASTING: - return (from->elsize == to->elsize); - case NPY_SAFE_CASTING: - return (from->elsize <= to->elsize); - default: - return 1; - } - break; - } + else { + safety = PyArray_GetCastSafety(from, to, NPY_DTYPE(to)); } - /* If safe or same-kind casts are allowed */ - else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) { - if (PyArray_CanCastTo(from, to)) { - return 1; - } - else if(casting == NPY_SAME_KIND_CASTING) { - /* - * Also allow casting from lower to higher kinds, according - * to the ordering provided by dtype_kind_to_ordering. - * Some kinds, like datetime, don't fit in the hierarchy, - * and are special cased as -1. - */ - int from_order, to_order; - - from_order = dtype_kind_to_ordering(from->kind); - to_order = dtype_kind_to_ordering(to->kind); - - if (to->kind == 'm') { - /* both types being timedelta is already handled before. */ - int integer_order = dtype_kind_to_ordering('i'); - return (from_order != -1) && (from_order <= integer_order); - } - return (from_order != -1) && (from_order <= to_order); - } - else { - return 0; - } - } - /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */ - else { + if (safety < 0) { + PyErr_Clear(); return 0; } + /* If casting is the smaller (or equal) safety we match */ + return PyArray_MinCastSafety(safety, casting) == casting; +#else + return PyArray_LegacyCanCastTypeTo(from, to, casting); +#endif } + /* CanCastArrayTo needs this function */ static int min_scalar_type_num(char *valueptr, int type_num, int *is_small_unsigned); @@ -1067,6 +836,41 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) return descr; } +#if NPY_USE_NEW_CASTINGIMPL + PyObject *tmp = PyArray_GetCastingImpl(NPY_DTYPE(descr), given_DType); + if (tmp == NULL || tmp == Py_None) { + Py_XDECREF(tmp); + goto error; + } + PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(descr), given_DType}; + PyArrayMethod_Context context = { + .caller = NULL, + .method = (PyArrayMethodObject *)tmp, + .dtypes = dtypes, + .nin = 1, + .nout = 1, + .descriptors = NULL, + }; + PyArray_Descr *given_descrs[2] = {descr, NULL}; + PyArray_Descr *loop_descrs[2]; + NPY_CASTING casting = context.method->resolve_descriptors( + &context, given_descrs, loop_descrs); + Py_DECREF(context.method); + if (casting < 0) { + goto error; + } + Py_DECREF(loop_descrs[0]); + return loop_descrs[1]; + + error:; /* (; due to compiler limitations) */ + PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL; + PyErr_Fetch(&err_type, &err_value, &err_traceback); + PyErr_Format(PyExc_ValueError, + "cannot cast dtype %S to %S.", descr, given_DType); + npy_PyErr_ChainExceptions(err_type, err_value, err_traceback); + return NULL; + +#else /* NPY_USE_NEW_CASTS */ if (!given_DType->legacy) { PyErr_SetString(PyExc_NotImplementedError, "Must use casting to find the correct DType for a parametric " @@ -1077,6 +881,7 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) PyArray_Descr *flex_dtype = PyArray_DescrNew(given_DType->singleton); return PyArray_AdaptFlexibleDType(descr, flex_dtype); +#endif /* NPY_USE_NEW_CASTS */ } @@ -2007,3 +1812,1101 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn) PyDataMem_FREE(mps); return NULL; } + + +/** + * Private function to add a casting implementation by unwrapping a bound + * array method. + * + * @param meth + * @return 0 on success -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth) +{ + if (meth->nin != 1 || meth->nout != 1) { + PyErr_SetString(PyExc_TypeError, + "A cast must have one input and one output."); + return -1; + } + if (meth->dtypes[0] == meth->dtypes[1]) { + if (!(meth->method->flags & NPY_METH_SUPPORTS_UNALIGNED)) { + PyErr_Format(PyExc_TypeError, + "A cast where input and output DType (class) are identical " + "must currently support unaligned data. (method: %s)", + meth->method->name); + return -1; + } + if ((meth->method->casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING) { + PyErr_Format(PyExc_TypeError, + "A cast where input and output DType (class) are identical " + "must signal `no-casting`. (method: %s)", + meth->method->name); + return -1; + } + } + if (PyDict_Contains(meth->dtypes[0]->castingimpls, + (PyObject *)meth->dtypes[1])) { + PyErr_Format(PyExc_RuntimeError, + "A cast was already added for %S -> %S. (method: %s)", + meth->dtypes[0], meth->dtypes[1], meth->method->name); + return -1; + } + if (PyDict_SetItem(meth->dtypes[0]->castingimpls, + (PyObject *)meth->dtypes[1], (PyObject *)meth->method) < 0) { + return -1; + } + return 0; +} + +/** + * Add a new casting implementation using a PyArrayMethod_Spec. + * + * @param spec + * @param private If private, allow slots not publically exposed. + * @return 0 on success -1 on failure + */ +NPY_NO_EXPORT int +PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private) +{ + /* Create a bound method, unbind and store it */ + PyBoundArrayMethodObject *meth = PyArrayMethod_FromSpec_int(spec, private); + if (meth == NULL) { + return -1; + } + int res = PyArray_AddCastingImplmentation(meth); + Py_DECREF(meth); + if (res < 0) { + return -1; + } + return 0; +} + + +NPY_NO_EXPORT NPY_CASTING +legacy_same_dtype_resolve_descriptors( + PyArrayMethod_Context *NPY_UNUSED(context), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + /* this function only makes sense for non-flexible legacy dtypes: */ + assert(loop_descrs[0]->elsize == loop_descrs[1]->elsize); + + /* + * Legacy dtypes (except datetime) only have byte-order and elsize as + * storage parameters. + */ + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + return NPY_EQUIV_CASTING; +} + + +/* + * Simple dtype resolver for casting between two different (non-parametric) + * (legacy) dtypes. + */ +NPY_NO_EXPORT NPY_CASTING +simple_cast_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + assert(context->dtypes[0]->legacy && context->dtypes[1]->legacy); + + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + if (given_descrs[1] != NULL) { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + loop_descrs[1] = context->dtypes[1]->default_descr(context->dtypes[1]); + } + + if (context->method->casting != NPY_NO_CASTING) { + return context->method->casting; + } + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + return NPY_EQUIV_CASTING; +} + + +static int +add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyType_Slot slots[6]; + PyArray_DTypeMeta *dtypes[2] = {from, to}; + PyArrayMethod_Spec spec = { + .name = "numeric_cast", + .nin = 1, + .nout = 1, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .slots = slots, + .dtypes = dtypes, + }; + + npy_intp from_itemsize = dtypes[0]->singleton->elsize; + npy_intp to_itemsize = dtypes[1]->singleton->elsize; + + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &simple_cast_resolve_descriptors; + /* Fetch the optimized loops (2<<10 is a non-contiguous stride) */ + slots[1].slot = NPY_DTMETH_strided_loop; + slots[1].pfunc = PyArray_GetStridedNumericCastFn( + 1, 2<<10, 2<<10, from->type_num, to->type_num); + slots[2].slot = NPY_DTMETH_contiguous_loop; + slots[2].pfunc = PyArray_GetStridedNumericCastFn( + 1, from_itemsize, to_itemsize, from->type_num, to->type_num); + slots[3].slot = NPY_DTMETH_unaligned_strided_loop; + slots[3].pfunc = PyArray_GetStridedNumericCastFn( + 0, 2<<10, 2<<10, from->type_num, to->type_num); + slots[4].slot = NPY_DTMETH_unaligned_contiguous_loop; + slots[4].pfunc = PyArray_GetStridedNumericCastFn( + 0, from_itemsize, to_itemsize, from->type_num, to->type_num); + slots[5].slot = 0; + slots[5].pfunc = NULL; + + assert(slots[1].pfunc && slots[2].pfunc && slots[3].pfunc && slots[4].pfunc); + + /* Find the correct casting level, and special case no-cast */ + if (dtypes[0]->kind == dtypes[1]->kind && from_itemsize == to_itemsize) { + spec.casting = NPY_NO_CASTING; + + /* When there is no casting (equivalent C-types) use byteswap loops */ + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &legacy_same_dtype_resolve_descriptors; + slots[1].slot = NPY_DTMETH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "numeric_copy_or_byteswap"; + spec.flags |= NPY_METH_NO_FLOATINGPOINT_ERRORS; + } + else if (_npy_can_cast_safely_table[from->type_num][to->type_num]) { + spec.casting = NPY_SAFE_CASTING; + } + else if (dtype_kind_to_ordering(dtypes[0]->kind) <= + dtype_kind_to_ordering(dtypes[1]->kind)) { + spec.casting = NPY_SAME_KIND_CASTING; + } + else { + spec.casting = NPY_UNSAFE_CASTING; + } + + /* Create a bound method, unbind and store it */ + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); +} + + +/* + * This registers the castingimpl for all casts between numeric types. + * Eventually, this function should likely be defined as part of a .c.src + * file to remove `PyArray_GetStridedNumericCastFn` entirely. + */ +static int +PyArray_InitializeNumericCasts() +{ + for (int from = 0; from < NPY_NTYPES; from++) { + if (!PyTypeNum_ISNUMBER(from) && from != NPY_BOOL) { + continue; + } + PyArray_DTypeMeta *from_dt = PyArray_DTypeFromTypeNum(from); + + for (int to = 0; to < NPY_NTYPES; to++) { + if (!PyTypeNum_ISNUMBER(to) && to != NPY_BOOL) { + continue; + } + PyArray_DTypeMeta *to_dt = PyArray_DTypeFromTypeNum(to); + int res = add_numeric_cast(from_dt, to_dt); + Py_DECREF(to_dt); + if (res < 0) { + Py_DECREF(from_dt); + return -1; + } + } + } + return 0; +} + + +static int +cast_to_string_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + /* + * NOTE: The following code used to be part of PyArray_AdaptFlexibleDType + * + * Get a string-size estimate of the input. These + * are generallly the size needed, rounded up to + * a multiple of eight. + */ + npy_intp size = -1; + switch (context->dtypes[0]->type_num) { + case NPY_BOOL: + case NPY_UBYTE: + case NPY_BYTE: + case NPY_USHORT: + case NPY_SHORT: + case NPY_UINT: + case NPY_INT: + case NPY_ULONG: + case NPY_LONG: + case NPY_ULONGLONG: + case NPY_LONGLONG: + assert(context->dtypes[0]->singleton->elsize <= 8); + assert(context->dtypes[0]->singleton->elsize > 0); + if (context->dtypes[0]->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (context->dtypes[0]->kind == 'u') { + size = REQUIRED_STR_LEN[context->dtypes[0]->singleton->elsize]; + } + else if (context->dtypes[0]->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[context->dtypes[0]->singleton->elsize] + 1; + } + break; + case NPY_HALF: + case NPY_FLOAT: + case NPY_DOUBLE: + size = 32; + break; + case NPY_LONGDOUBLE: + size = 48; + break; + case NPY_CFLOAT: + case NPY_CDOUBLE: + size = 2 * 32; + break; + case NPY_CLONGDOUBLE: + size = 2 * 48; + break; + case NPY_STRING: + case NPY_VOID: + size = given_descrs[0]->elsize; + break; + case NPY_UNICODE: + size = given_descrs[0]->elsize / 4; + break; + default: + PyErr_SetString(PyExc_SystemError, + "Impossible cast to string path requested."); + return -1; + } + if (context->dtypes[1]->type_num == NPY_UNICODE) { + size *= 4; + } + + if (given_descrs[1] == NULL) { + loop_descrs[1] = PyArray_DescrNewFromType(context->dtypes[1]->type_num); + if (loop_descrs[1] == NULL) { + return -1; + } + loop_descrs[1]->elsize = size; + } + else { + /* The legacy loop can handle mismatching itemsizes */ + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + + /* Set the input one as well (late for easier error management) */ + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + + if (context->method->casting == NPY_UNSAFE_CASTING) { + assert(context->dtypes[0]->type_num == NPY_UNICODE && + context->dtypes[1]->type_num == NPY_STRING); + return NPY_UNSAFE_CASTING; + } + assert(context->method->casting == NPY_SAFE_CASTING); + + if (loop_descrs[1]->elsize >= size) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; +} + + +static int +add_other_to_and_from_string_cast( + PyArray_DTypeMeta *string, PyArray_DTypeMeta *other) +{ + if (string == other) { + return 0; + } + + /* Casting from string, is always a simple legacy-style cast */ + if (other->type_num != NPY_STRING && other->type_num != NPY_UNICODE) { + if (PyArray_AddLegacyWrapping_CastingImpl( + string, other, NPY_UNSAFE_CASTING) < 0) { + return -1; + } + } + /* + * Casting to strings, is almost the same, but requires a custom resolver + * to define the correct string length. Right now we use a generic function + * for this. + */ + PyArray_DTypeMeta *dtypes[2] = {other, string}; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {NPY_DTMETH_resolve_descriptors, &cast_to_string_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "legacy_cast_to_string", + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI, + .dtypes = dtypes, + .slots = slots, + }; + /* Almost everything can be safely cast to string (except unicode) */ + if (other->type_num != NPY_UNICODE) { + spec.casting = NPY_SAFE_CASTING; + } + else { + spec.casting = NPY_UNSAFE_CASTING; + } + + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); +} + + +NPY_NO_EXPORT NPY_CASTING +string_to_string_resolve_descriptors( + PyArrayMethod_Context *NPY_UNUSED(context), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + if (loop_descrs[0]->elsize == loop_descrs[1]->elsize) { + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + else { + return NPY_EQUIV_CASTING; + } + } + else if (loop_descrs[0]->elsize <= loop_descrs[1]->elsize) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; +} + + +/* + * Add string casts. Right now all string casts are just legacy-wrapped ones + * (except string<->string and unicode<->unicode), but they do require + * custom type resolution for the string length. + * + * A bit like `object`, it could make sense to define a simpler protocol for + * string casts, however, we also need to remember that the itemsize of the + * output has to be found. + */ +static int +PyArray_InitializeStringCasts() +{ + int result = -1; + PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING); + PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE); + PyArray_DTypeMeta *other_dt = NULL; + + /* Add most casts as legacy ones */ + for (int other = 0; other < NPY_NTYPES; other++) { + if (PyTypeNum_ISDATETIME(other) || other == NPY_VOID || + other == NPY_OBJECT) { + continue; + } + other_dt = PyArray_DTypeFromTypeNum(other); + + /* The functions skip string == other_dt or unicode == other_dt */ + if (add_other_to_and_from_string_cast(string, other_dt) < 0) { + goto finish; + } + if (add_other_to_and_from_string_cast(unicode, other_dt) < 0) { + goto finish; + } + + Py_SETREF(other_dt, NULL); + } + + /* string<->string and unicode<->unicode have their own specialized casts */ + PyArray_DTypeMeta *dtypes[2]; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {NPY_DTMETH_resolve_descriptors, &string_to_string_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "string_to_string_cast", + .casting = NPY_NO_CASTING, + .nin = 1, + .nout = 1, + .flags = (NPY_METH_REQUIRES_PYAPI | + NPY_METH_NO_FLOATINGPOINT_ERRORS | + NPY_METH_SUPPORTS_UNALIGNED), + .dtypes = dtypes, + .slots = slots, + }; + + dtypes[0] = string; + dtypes[1] = string; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto finish; + } + + dtypes[0] = unicode; + dtypes[1] = unicode; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto finish; + } + + result = 0; + finish: + Py_DECREF(string); + Py_DECREF(unicode); + Py_XDECREF(other_dt); + return result; +} + + +/* + * Small helper function to handle the case of `arr.astype(dtype="V")`. + * When the output descriptor is not passed, we always use `V<itemsize>` + * of the other dtype. + */ +static NPY_CASTING +cast_to_void_dtype_class( + PyArray_Descr **given_descrs, PyArray_Descr **loop_descrs) +{ + /* `dtype="V"` means unstructured currently (compare final path) */ + loop_descrs[1] = PyArray_DescrNewFromType(NPY_VOID); + if (loop_descrs[1] == NULL) { + return -1; + } + loop_descrs[1]->elsize = given_descrs[0]->elsize; + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW; +} + + +static NPY_CASTING +nonstructured_to_structured_resolve_descriptors( + PyArrayMethod_Context *NPY_UNUSED(context), + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + NPY_CASTING casting; + + if (given_descrs[1] == NULL) { + return cast_to_void_dtype_class(given_descrs, loop_descrs); + } + + if (given_descrs[1]->subarray != NULL) { + /* + * We currently consider this at most a safe cast. It would be + * possible to allow a view if the field has exactly one element. + */ + casting = NPY_SAFE_CASTING; + /* Subarray dtype */ + NPY_CASTING base_casting = PyArray_GetCastSafety( + given_descrs[0], given_descrs[1]->subarray->base, NULL); + if (base_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, base_casting); + } + else if (given_descrs[1]->names != NULL) { + /* Structured dtype */ + if (PyTuple_Size(given_descrs[1]->names) == 0) { + /* TODO: This retained behaviour, but likely should be changed. */ + casting = NPY_UNSAFE_CASTING; + } + else { + /* Considered at most unsafe casting (but this could be changed) */ + casting = NPY_UNSAFE_CASTING; + if (PyTuple_Size(given_descrs[1]->names) == 1) { + /* A view may be acceptable */ + casting |= _NPY_CAST_IS_VIEW; + } + + Py_ssize_t pos = 0; + PyObject *key, *tuple; + while (PyDict_Next(given_descrs[1]->fields, &pos, &key, &tuple)) { + PyArray_Descr *field_descr = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); + NPY_CASTING field_casting = PyArray_GetCastSafety( + given_descrs[0], field_descr, NULL); + casting = PyArray_MinCastSafety(casting, field_casting); + if (casting < 0) { + return -1; + } + } + } + } + else { + /* Plain void type. This behaves much like a "view" */ + if (given_descrs[0]->elsize == given_descrs[1]->elsize && + !PyDataType_REFCHK(given_descrs[0])) { + /* + * A simple view, at the moment considered "safe" (the refcheck is + * probably not necessary, but more future proof + */ + casting = NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW; + } + else if (given_descrs[0]->elsize <= given_descrs[1]->elsize) { + casting = NPY_SAFE_CASTING; + } + else { + casting = NPY_UNSAFE_CASTING; + } + } + + /* Void dtypes always do the full cast. */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + + return casting; +} + + +int give_bad_field_error(PyObject *key) +{ + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_RuntimeError, + "Invalid or missing field %R, this should be impossible " + "and indicates a NumPy bug.", key); + } + return -1; +} + + +static PyObject * +PyArray_GetGenericToVoidCastingImpl() +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "any_to_void_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_SAFE_CASTING; + method->resolve_descriptors = &nonstructured_to_structured_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +static NPY_CASTING +structured_to_nonstructured_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + PyArray_Descr *base_descr; + + if (given_descrs[0]->subarray != NULL) { + base_descr = given_descrs[0]->subarray->base; + } + else if (given_descrs[0]->names != NULL) { + if (PyTuple_Size(given_descrs[0]->names) != 1) { + /* Only allow casting a single field */ + return -1; + } + PyObject *key = PyTuple_GetItem(given_descrs[0]->names, 0); + PyObject *base_tup = PyDict_GetItem(given_descrs[0]->fields, key); + base_descr = (PyArray_Descr *)PyTuple_GET_ITEM(base_tup, 0); + } + else { + /* + * unstructured voids are considered unsafe casts and defined, albeit, + * at this time they go back to legacy behaviour using getitem/setitem. + */ + base_descr = NULL; + } + + /* + * The cast is always considered unsafe, so the PyArray_GetCastSafety + * result currently does not matter. + */ + if (base_descr != NULL && PyArray_GetCastSafety( + base_descr, given_descrs[1], context->dtypes[1]) < 0) { + return -1; + } + + /* Void dtypes always do the full cast. */ + if (given_descrs[1] == NULL) { + loop_descrs[1] = context->dtypes[1]->default_descr(context->dtypes[1]); + /* + * Special case strings here, this is probably unnecessary and + * should be useless (i.e. it is necessary to use empty arrays to + * trigger this path.). + */ + if (context->dtypes[1]->type_num == NPY_STRING) { + loop_descrs[1]->elsize = given_descrs[0]->elsize; + } + else if (context->dtypes[1]->type_num == NPY_UNICODE) { + loop_descrs[1]->elsize = given_descrs[0]->elsize * 4; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + return NPY_UNSAFE_CASTING; +} + + +static PyObject * +PyArray_GetVoidToGenericCastingImpl() +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "void_to_any_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_UNSAFE_CASTING; + method->resolve_descriptors = &structured_to_nonstructured_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +/* + * Find the correct field casting safety. See the TODO note below, including + * in 1.20 (and later) this was based on field names rather than field order + * which it should be using. + * + * NOTE: In theory it would be possible to cache the all the field casting + * implementations on the dtype, to avoid duplicate work. + */ +static NPY_CASTING +can_cast_fields_safety(PyArray_Descr *from, PyArray_Descr *to) +{ + NPY_CASTING casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + + Py_ssize_t field_count = PyTuple_Size(from->names); + if (field_count != PyTuple_Size(to->names)) { + /* TODO: This should be rejected! */ + return NPY_UNSAFE_CASTING; + } + for (Py_ssize_t i = 0; i < field_count; i++) { + PyObject *from_key = PyTuple_GET_ITEM(from->names, i); + PyObject *from_tup = PyDict_GetItemWithError(from->fields, from_key); + if (from_tup == NULL) { + return give_bad_field_error(from_key); + } + PyArray_Descr *from_base = (PyArray_Descr*)PyTuple_GET_ITEM(from_tup, 0); + + /* + * TODO: This should use to_key (order), compare gh-15509 by + * by Allan Haldane. And raise an error on failure. + * (Fixing that may also requires fixing/changing promotion.) + */ + PyObject *to_tup = PyDict_GetItem(to->fields, from_key); + if (to_tup == NULL) { + return NPY_UNSAFE_CASTING; + } + PyArray_Descr *to_base = (PyArray_Descr*)PyTuple_GET_ITEM(to_tup, 0); + + NPY_CASTING field_casting = PyArray_GetCastSafety(from_base, to_base, NULL); + if (field_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, field_casting); + } + if (!(casting & _NPY_CAST_IS_VIEW)) { + assert((casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING); + return casting; + } + + /* + * If the itemsize (includes padding at the end), fields, or names + * do not match, this cannot be a view and also not a "no" cast + * (identical dtypes). + * It may be possible that this can be relaxed in some cases. + */ + if (from->elsize != to->elsize) { + /* + * The itemsize may mismatch even if all fields and formats match + * (due to additional padding). + */ + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + + int cmp = PyObject_RichCompareBool(from->fields, to->fields, Py_EQ); + if (cmp != 1) { + if (cmp == -1) { + PyErr_Clear(); + } + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + cmp = PyObject_RichCompareBool(from->names, to->names, Py_EQ); + if (cmp != 1) { + if (cmp == -1) { + PyErr_Clear(); + } + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + return casting; +} + + +static NPY_CASTING +void_to_void_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + NPY_CASTING casting; + + if (given_descrs[1] == NULL) { + /* This is weird, since it doesn't return the original descr, but... */ + return cast_to_void_dtype_class(given_descrs, loop_descrs); + } + + if (given_descrs[0]->names != NULL && given_descrs[1]->names != NULL) { + /* From structured to structured, need to check fields */ + casting = can_cast_fields_safety(given_descrs[0], given_descrs[1]); + } + else if (given_descrs[0]->names != NULL) { + return structured_to_nonstructured_resolve_descriptors( + context, given_descrs, loop_descrs); + } + else if (given_descrs[1]->names != NULL) { + return nonstructured_to_structured_resolve_descriptors( + context, given_descrs, loop_descrs); + } + else if (given_descrs[0]->subarray == NULL && + given_descrs[1]->subarray == NULL) { + /* Both are plain void dtypes */ + if (given_descrs[0]->elsize == given_descrs[1]->elsize) { + casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + else if (given_descrs[0]->elsize < given_descrs[1]->elsize) { + casting = NPY_SAFE_CASTING; + } + else { + casting = NPY_SAME_KIND_CASTING; + } + } + else { + /* + * At this point, one of the dtypes must be a subarray dtype, the + * other is definitely not a structured one. + */ + PyArray_ArrayDescr *from_sub = given_descrs[0]->subarray; + PyArray_ArrayDescr *to_sub = given_descrs[1]->subarray; + assert(from_sub || to_sub); + + /* If the shapes do not match, this is at most an unsafe cast */ + casting = NPY_UNSAFE_CASTING; + if (from_sub && to_sub) { + int res = PyObject_RichCompareBool(from_sub->shape, to_sub->shape, Py_EQ); + if (res < 0) { + return -1; + } + else if (res) { + /* Both are subarrays and the shape matches */ + casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + } + NPY_CASTING field_casting = PyArray_GetCastSafety( + given_descrs[0]->subarray->base, given_descrs[1]->subarray->base, NULL); + if (field_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, field_casting); + } + + /* Void dtypes always do the full cast. */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + + return casting; +} + + +/* + * This initializes the void to void cast. Voids include structured dtypes, + * which means that they can cast from and to any other dtype and, in that + * sense, are special (similar to Object). + */ +static int +PyArray_InitializeVoidToVoidCast() +{ + PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID); + PyArray_DTypeMeta *dtypes[2] = {Void, Void}; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {NPY_DTMETH_resolve_descriptors, &void_to_void_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "void_to_void_cast", + .casting = NPY_NO_CASTING, + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_DECREF(Void); + return res; +} + + +/* + * Implement object to any casting implementation. Casting from object may + * require inspecting of all array elements (for parametric dtypes), and + * the resolver will thus reject all parametric dtypes if the out dtype + * is not provided. + */ +static NPY_CASTING +object_to_any_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + if (given_descrs[1] == NULL) { + /* + * This should not really be called, since object -> parametric casts + * require inspecting the object array. Allow legacy ones, the path + * here is that e.g. "M8" input is considered to be the DType class, + * and by allowing it here, we go back to the "M8" instance. + */ + if (context->dtypes[1]->parametric && !context->dtypes[1]->legacy) { + PyErr_Format(PyExc_TypeError, + "casting from object to the parametric DType %S requires " + "the specified output dtype instance. " + "This may be a NumPy issue, since the correct instance " + "should be discovered automatically, however.", + context->dtypes[1]); + return -1; + } + loop_descrs[1] = context->dtypes[1]->default_descr(context->dtypes[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + if (context->dtypes[1]->type_num == NPY_VOID) { + /* NOTE: This appears to be behaviour as of 1.19 (void is not + * adjusted) */ + loop_descrs[1]->elsize = sizeof(PyObject *); + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_UNSAFE_CASTING; +} + + +/* + * Casting to object is special since it is generic to all input dtypes. + */ +static PyObject * +PyArray_GetObjectToGenericCastingImpl() +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "object_to_any_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_UNSAFE_CASTING; + method->resolve_descriptors = &object_to_any_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + + +/* Any object object is simple (could even use the default) */ +static NPY_CASTING +any_to_object_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + if (given_descrs[1] == NULL) { + loop_descrs[1] = context->dtypes[1]->default_descr(context->dtypes[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_SAFE_CASTING; +} + + +/* + * Casting to object is special since it is generic to all input dtypes. + */ +static PyObject * +PyArray_GetGenericToObjectCastingImpl() +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "any_to_object_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_SAFE_CASTING; + method->resolve_descriptors = &any_to_object_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +static int +PyArray_InitializeObjectToObjectCast() +{ + /* + * The object dtype does not support byte order changes, so its cast + * is always a direct view. + */ + PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT); + PyArray_DTypeMeta *dtypes[2] = {Object, Object}; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "object_to_object_cast", + .casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW, + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_DECREF(Object); + return res; +} + + +NPY_NO_EXPORT int +PyArray_InitializeCasts() +{ + if (PyArray_InitializeNumericCasts() < 0) { + return -1; + } + if (PyArray_InitializeStringCasts() < 0) { + return -1; + } + if (PyArray_InitializeVoidToVoidCast() < 0) { + return -1; + } + if (PyArray_InitializeObjectToObjectCast() < 0) { + return -1; + } + /* Datetime casts are defined in datetime.c */ + if (PyArray_InitializeDatetimeCasts() < 0) { + return -1; + } + return 0; +} diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index a2b36b497..351365701 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -1,6 +1,13 @@ #ifndef _NPY_ARRAY_CONVERT_DATATYPE_H_ #define _NPY_ARRAY_CONVERT_DATATYPE_H_ +#include "array_method.h" + +extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[]; + +NPY_NO_EXPORT PyObject * +_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args); + NPY_NO_EXPORT PyArray_VectorUnaryFunc * PyArray_GetCastFunc(PyArray_Descr *descr, int type_num); @@ -16,6 +23,9 @@ PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2); NPY_NO_EXPORT int PyArray_ValidType(int type); +NPY_NO_EXPORT int +dtype_kind_to_ordering(char kind); + /* Like PyArray_CanCastArrayTo */ NPY_NO_EXPORT npy_bool can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data, @@ -36,26 +46,35 @@ npy_set_invalid_cast_error( PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, NPY_CASTING casting, npy_bool scalar); -/* - * This function calls Py_DECREF on flex_dtype, and replaces it with - * a new dtype that has been adapted based on the values in data_dtype - * and data_obj. If the flex_dtype is not flexible, it returns it as-is. - * - * Usually, if data_obj is not an array, dtype should be the result - * given by the PyArray_GetArrayParamsFromObject function. - * - * The data_obj may be NULL if just a dtype is known for the source. - * - * If *flex_dtype is NULL, returns immediately, without setting an - * exception, leaving any previous error handling intact. - * - * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, - * and NPY_DATETIME with generic units. - */ -NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); - NPY_NO_EXPORT PyArray_Descr * PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType); +NPY_NO_EXPORT int +PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth); + +NPY_NO_EXPORT int +PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private); + +NPY_NO_EXPORT NPY_CASTING +PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2); + +NPY_NO_EXPORT NPY_CASTING +PyArray_GetCastSafety( + PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype); + +NPY_NO_EXPORT NPY_CASTING +legacy_same_dtype_resolve_descriptors( + PyArrayMethod_Context *NPY_UNUSED(context), + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs); + +NPY_NO_EXPORT NPY_CASTING +simple_cast_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **NPY_UNUSED(input_descrs), + PyArray_Descr **loop_descrs); + +NPY_NO_EXPORT int +PyArray_InitializeCasts(); + #endif diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 4afc45fb6..d41c190e1 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -25,6 +25,9 @@ #include "_datetime.h" #include "datetime_strings.h" #include "convert_datatype.h" +#include "array_method.h" +#include "dtypemeta.h" +#include "usertypes.h" /* * Computes the python `ret, d = divmod(d, unit)`. @@ -3725,3 +3728,371 @@ find_object_datetime_type(PyObject *obj, int type_num) return NULL; } } + + + + +/* + * Describes casting within datetimes or timedelta + */ +static NPY_CASTING +time_to_time_resolve_descriptors( + PyArrayMethod_Context *NPY_UNUSED(context), + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + /* This is a within-dtype cast, which currently must handle byteswapping */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[0]); + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + int is_timedelta = given_descrs[0]->type_num == NPY_TIMEDELTA; + + if (given_descrs[0] == given_descrs[1]) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + + NPY_CASTING byteorder_may_allow_view = 0; + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + byteorder_may_allow_view = _NPY_CAST_IS_VIEW; + } + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(loop_descrs[0]); + assert(meta1 != NULL); + meta2 = get_datetime_metadata_from_dtype(loop_descrs[1]); + assert(meta2 != NULL); + + if (meta1->base == meta2->base && meta1->num == meta2->num) { + if (byteorder_may_allow_view) { + return NPY_NO_CASTING | byteorder_may_allow_view; + } + return NPY_EQUIV_CASTING; + } + else if (meta1->base == NPY_FR_GENERIC) { + return NPY_SAFE_CASTING | byteorder_may_allow_view; + } + else if (meta2->base == NPY_FR_GENERIC) { + /* TODO: This is actually an invalid cast (casting will error) */ + return NPY_UNSAFE_CASTING; + } + else if (is_timedelta && ( + /* jump between time units and date units is unsafe for timedelta */ + (meta1->base <= NPY_FR_M && meta2->base > NPY_FR_M) || + (meta1->base > NPY_FR_M && meta2->base <= NPY_FR_M))) { + return NPY_UNSAFE_CASTING; + } + else if (meta1->base <= meta2->base) { + /* Casting to a more precise unit is currently considered safe */ + if (datetime_metadata_divides(meta1, meta2, is_timedelta)) { + /* If it divides, we consider it to be a safe cast */ + return NPY_SAFE_CASTING; + } + else { + return NPY_SAME_KIND_CASTING; + } + } + return NPY_SAME_KIND_CASTING; +} + + +/* Handles datetime<->timedelta type resolution (both directions) */ +static NPY_CASTING +datetime_to_timedelta_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + if (given_descrs[1] == NULL) { + PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]); + assert(meta != NULL); + loop_descrs[1] = create_datetime_dtype(context->dtypes[1]->type_num, meta); + } + else { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + } + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + /* + * Mostly NPY_UNSAFE_CASTING is not true, the cast will fail. + * TODO: Once ufuncs use dtype specific promotion rules, + * this is likely unnecessary + */ + return NPY_UNSAFE_CASTING; +} + + +/* In the current setup both strings and unicode casts support all outputs */ +static NPY_CASTING +time_to_string_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (given_descrs[1] != NULL) { + /* + * At the time of writing, NumPy does not check the length here, + * but will error if filling fails. + */ + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + else { + /* Find the correct string length, possibly based on the unit */ + int size; + if (given_descrs[0]->type_num == NPY_DATETIME) { + PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]); + assert(meta != NULL); + size = get_datetime_iso_8601_strlen(0, meta->base); + } + else { + size = 21; + } + if (context->dtypes[1]->type_num == NPY_UNICODE) { + size *= 4; + } + loop_descrs[1] = PyArray_DescrNewFromType(context->dtypes[1]->type_num); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + loop_descrs[1]->elsize = size; + } + assert(context->method->casting == NPY_UNSAFE_CASTING); + return NPY_UNSAFE_CASTING; +} + + +static NPY_CASTING +string_to_datetime_cast_resolve_descriptors( + PyArrayMethod_Context *context, + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + /* We currently support byte-swapping, so any (unicode) string is OK */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + /* NOTE: This doesn't actually work, and will error during the cast */ + loop_descrs[1] = context->dtypes[1]->default_descr(context->dtypes[1]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + return NPY_UNSAFE_CASTING; +} + + +/* + * This registers the castingimpl for all datetime related casts. + */ +NPY_NO_EXPORT int +PyArray_InitializeDatetimeCasts() +{ + int result = -1; + + PyType_Slot slots[3]; + PyArray_DTypeMeta *dtypes[2]; + PyArrayMethod_Spec spec = { + .name = "datetime_casts", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .slots = slots, + .dtypes = dtypes, + }; + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &time_to_time_resolve_descriptors; + slots[1].slot = NPY_DTMETH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + PyArray_DTypeMeta *datetime = PyArray_DTypeFromTypeNum(NPY_DATETIME); + PyArray_DTypeMeta *timedelta = PyArray_DTypeFromTypeNum(NPY_TIMEDELTA); + PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING); + PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE); + PyArray_DTypeMeta *tmp = NULL; + + dtypes[0] = datetime; + dtypes[1] = datetime; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + dtypes[0] = timedelta; + dtypes[1] = timedelta; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + /* + * Casting between timedelta and datetime uses legacy casting loops, but + * custom dtype resolution (to handle copying of the time unit). + */ + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &datetime_to_timedelta_resolve_descriptors; + slots[1].slot = NPY_DTMETH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "timedelta_and_datetime_cast"; + dtypes[0] = timedelta; + dtypes[1] = datetime; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + spec.name = "datetime_to_timedelta_cast"; + dtypes[0] = datetime; + dtypes[1] = timedelta; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + /* + * Cast from numeric types to times. These use the cast functions + * as stored on the datatype, which should be replaced at some point. + * Some of these casts can fail (casting to unitless datetime), but these + * are rather special. + */ + for (int num = 0; num < NPY_NTYPES; num++) { + if (!PyTypeNum_ISNUMBER(num) && num != NPY_BOOL) { + continue; + } + + Py_XSETREF(tmp, PyArray_DTypeFromTypeNum(num)); + + if (PyArray_AddLegacyWrapping_CastingImpl( + tmp, datetime, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + datetime, tmp, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + + NPY_CASTING to_timedelta_casting = NPY_UNSAFE_CASTING; + if (PyTypeNum_ISINTEGER(num) || num == NPY_BOOL) { + /* timedelta casts like int64 right now... */ + if (PyTypeNum_ISUNSIGNED(num) && tmp->singleton->elsize == 8) { + to_timedelta_casting = NPY_SAME_KIND_CASTING; + } + else { + to_timedelta_casting = NPY_SAFE_CASTING; + } + } + if (PyArray_AddLegacyWrapping_CastingImpl( + tmp, timedelta, to_timedelta_casting) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + timedelta, tmp, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + } + + /* + * Cast times to string and unicode + */ + spec.casting = NPY_UNSAFE_CASTING; + /* + * Casts can error and need API (unicodes needs it for string->unicode). + * Unicode handling is currently implemented via a legacy cast. + */ + spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &time_to_string_resolve_descriptors; + slots[1].slot = NPY_DTMETH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + for (int num = NPY_DATETIME; num <= NPY_TIMEDELTA; num++) { + for (int str = NPY_STRING; str <= NPY_UNICODE; str++) { + dtypes[0] = PyArray_DTypeFromTypeNum(num); + dtypes[1] = PyArray_DTypeFromTypeNum(str); + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_SETREF(dtypes[0], NULL); + Py_SETREF(dtypes[1], NULL); + if (res < 0) { + return -1; + } + } + } + + /* + * Cast strings to timedelta are currently only legacy casts + */ + if (PyArray_AddLegacyWrapping_CastingImpl( + string, timedelta, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + unicode, timedelta, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + + /* + * Cast strings to datetime + */ + dtypes[1] = datetime; + spec.casting = NPY_UNSAFE_CASTING; + + /* The default type resolution should work fine. */ + slots[0].slot = NPY_DTMETH_resolve_descriptors; + slots[0].pfunc = &string_to_datetime_cast_resolve_descriptors; + slots[1].slot = NPY_DTMETH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + dtypes[0] = string; + spec.flags = NPY_METH_SUPPORTS_UNALIGNED; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + dtypes[0] = unicode; + /* + * Unicode handling is currently implemented via a legacy cast, which + * requires the Python API. + */ + spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + result = 0; + fail: + Py_DECREF(datetime); + Py_DECREF(timedelta); + Py_DECREF(string); + Py_DECREF(unicode); + Py_XDECREF(tmp); + return result; +} + diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index af4e6c22e..630bd76f3 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -1006,9 +1006,8 @@ _strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride, /* * Assumes src_dtype and dst_dtype are both datetimes or both timedeltas */ -static int +NPY_NO_EXPORT int get_nbo_cast_datetime_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) @@ -1082,12 +1081,10 @@ get_nbo_cast_datetime_transfer_function(int aligned, return NPY_SUCCEED; } -static int -get_nbo_datetime_to_string_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) +NPY_NO_EXPORT int +get_nbo_datetime_to_string_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *src_meta; _strided_datetime_cast_data *data; @@ -1127,7 +1124,7 @@ get_nbo_datetime_to_string_transfer_function(int aligned, return NPY_SUCCEED; } -static int +NPY_NO_EXPORT int get_datetime_to_unicode_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1140,8 +1137,8 @@ get_datetime_to_unicode_transfer_function(int aligned, PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ - str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(dst_dtype, str_dtype); + str_dtype = PyArray_DescrNewFromType(NPY_STRING); + str_dtype->elsize = dst_dtype->elsize / 4; if (str_dtype == NULL) { return NPY_FAIL; } @@ -1156,10 +1153,9 @@ get_datetime_to_unicode_transfer_function(int aligned, } /* Get the NBO datetime to string aligned contig function */ - if (get_nbo_datetime_to_string_transfer_function(1, - src_dtype->elsize, str_dtype->elsize, - src_dtype, str_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { + if (get_nbo_datetime_to_string_transfer_function( + src_dtype, str_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); NPY_AUXDATA_FREE(todata); return NPY_FAIL; @@ -1198,12 +1194,10 @@ get_datetime_to_unicode_transfer_function(int aligned, return NPY_SUCCEED; } -static int -get_nbo_string_to_datetime_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) +NPY_NO_EXPORT int +get_nbo_string_to_datetime_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *dst_meta; _strided_datetime_cast_data *data; @@ -1250,7 +1244,7 @@ get_nbo_string_to_datetime_transfer_function(int aligned, return NPY_SUCCEED; } -static int +NPY_NO_EXPORT int get_unicode_to_datetime_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1263,11 +1257,12 @@ get_unicode_to_datetime_transfer_function(int aligned, PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ - str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(src_dtype, str_dtype); + str_dtype = PyArray_DescrNewFromType(NPY_STRING); if (str_dtype == NULL) { return NPY_FAIL; } + assert(src_dtype->type_num == NPY_UNICODE); + str_dtype->elsize = src_dtype->elsize / 4; /* Get the cast operation from src */ if (PyArray_GetDTypeTransferFunction(aligned, @@ -1281,10 +1276,9 @@ get_unicode_to_datetime_transfer_function(int aligned, } /* Get the string to NBO datetime aligned contig function */ - if (get_nbo_string_to_datetime_transfer_function(1, - str_dtype->elsize, dst_dtype->elsize, - str_dtype, dst_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { + if (get_nbo_string_to_datetime_transfer_function( + str_dtype, dst_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); NPY_AUXDATA_FREE(todata); return NPY_FAIL; @@ -1323,7 +1317,7 @@ get_unicode_to_datetime_transfer_function(int aligned, } -static int +NPY_NO_EXPORT int get_legacy_dtype_cast_function( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1502,7 +1496,6 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || !PyArray_ISNBO(dst_dtype->byteorder); return get_nbo_cast_datetime_transfer_function(aligned, - src_stride, dst_stride, src_dtype, dst_dtype, out_stransfer, out_transferdata); } @@ -1518,10 +1511,8 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_api = 1; *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); return get_nbo_datetime_to_string_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); + src_dtype, dst_dtype, + out_stransfer, out_transferdata); case NPY_UNICODE: return get_datetime_to_unicode_transfer_function( @@ -1538,10 +1529,8 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_api = 1; *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); return get_nbo_string_to_datetime_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); + src_dtype, dst_dtype, + out_stransfer, out_transferdata); case NPY_UNICODE: return get_unicode_to_datetime_transfer_function( @@ -1561,7 +1550,7 @@ get_nbo_cast_transfer_function(int aligned, } -static int +NPY_NO_EXPORT int wrap_aligned_contig_transfer_function_with_copyswapn( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1570,7 +1559,7 @@ wrap_aligned_contig_transfer_function_with_copyswapn( PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata) { NpyAuxData *todata = NULL, *fromdata = NULL; - PyArray_StridedUnaryOp *tobuffer, *frombuffer; + PyArray_StridedUnaryOp *tobuffer = NULL, *frombuffer = NULL; npy_intp src_itemsize = src_dtype->elsize; npy_intp dst_itemsize = dst_dtype->elsize; @@ -3768,6 +3757,53 @@ PyArray_GetDTypeTransferFunction(int aligned, out_needs_api); } + +/* + * Basic version of PyArray_GetDTypeTransferFunction for legacy dtype + * support. + * It supports only wrapping the copyswapn functions and the legacy + * cast functions registered with `PyArray_RegisterCastFunc`. + * This function takes the easy way out: It does not wrap + */ +NPY_NO_EXPORT int +PyArray_GetLegacyDTypeTransferFunction(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) +{ + /* Note: We ignore `needs_wrap`; needs-wrap is handled by another cast */ + int needs_wrap = 0; + + if (src_dtype->type_num == dst_dtype->type_num) { + /* + * This is a cast within the same dtype. For legacy user-dtypes, + * it is always valid to handle this using the copy swap function. + */ + return wrap_copy_swap_function(aligned, + src_stride, dst_stride, + src_dtype, + PyArray_ISNBO(src_dtype->byteorder) != + PyArray_ISNBO(dst_dtype->byteorder), + out_stransfer, out_transferdata); + } + + if (get_legacy_dtype_cast_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + out_stransfer, + out_transferdata, + out_needs_api, + &needs_wrap) != NPY_SUCCEED) { + return NPY_FAIL; + } + return NPY_SUCCEED; +} + + NPY_NO_EXPORT int PyArray_GetMaskedDTypeTransferFunction(int aligned, npy_intp src_stride, diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index e63a60738..4c11723e7 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -27,6 +27,7 @@ dtypemeta_dealloc(PyArray_DTypeMeta *self) { Py_XDECREF(self->scalar_type); Py_XDECREF(self->singleton); + Py_XDECREF(self->castingimpls); PyType_Type.tp_dealloc((PyObject *) self); } @@ -565,6 +566,12 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) /* Let python finish the initialization (probably unnecessary) */ if (PyType_Ready((PyTypeObject *)dtype_class) < 0) { + Py_DECREF(dtype_class); + return -1; + } + dtype_class->castingimpls = PyDict_New(); + if (dtype_class->castingimpls == NULL) { + Py_DECREF(dtype_class); return -1; } diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.c b/numpy/core/src/multiarray/legacy_dtype_implementation.c new file mode 100644 index 000000000..3ce4710fd --- /dev/null +++ b/numpy/core/src/multiarray/legacy_dtype_implementation.c @@ -0,0 +1,716 @@ +/* + * This file hosts legacy implementations of certain functions for + * which alternatives exists, but the old functions are still required + * in certain code paths, or until the code transition is finalized. + * + * This code should typically not require modification, and if modified + * similar changes may be necessary in the new version. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include "numpy/arrayobject.h" +#include "scalartypes.h" +#include "_datetime.h" +#include "datetime_strings.h" +#include "convert_datatype.h" + +#include "legacy_dtype_implementation.h" + + +/* + * Compare the field dictionaries for two types. + * + * Return 1 if the field types and field names of the two descrs are equal and + * in the same order, 0 if not. + */ +static int +_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) { + + int val; + + if (type1->fields == type2->fields && type1->names == type2->names) { + return 1; + } + if (type1->fields == NULL || type2->fields == NULL) { + return 0; + } + + val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + return 1; +} + +/* + * Compare the subarray data for two types. + * Return 1 if they are the same, 0 if not. + */ +static int +_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) +{ + int val; + + if (sub1 == sub2) { + return 1; + + } + if (sub1 == NULL || sub2 == NULL) { + return 0; + } + + val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + return PyArray_EquivTypes(sub1->base, sub2->base); +} + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) +{ + int type_num1, type_num2, size1, size2; + + if (type1 == type2) { + return NPY_TRUE; + } + + type_num1 = type1->type_num; + type_num2 = type2->type_num; + size1 = type1->elsize; + size2 = type2->elsize; + + if (size1 != size2) { + return NPY_FALSE; + } + if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) { + return NPY_FALSE; + } + if (type1->subarray || type2->subarray) { + return ((type_num1 == type_num2) + && _equivalent_subarrays(type1->subarray, type2->subarray)); + } + if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) { + return ((type_num1 == type_num2) && _equivalent_fields(type1, type2)); + } + if (type_num1 == NPY_DATETIME + || type_num1 == NPY_TIMEDELTA + || type_num2 == NPY_DATETIME + || type_num2 == NPY_TIMEDELTA) { + return ((type_num1 == type_num2) + && has_equivalent_datetime_metadata(type1, type2)); + } + return type1->kind == type2->kind; +} + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypenums(int typenum1, int typenum2) +{ + PyArray_Descr *d1, *d2; + npy_bool ret; + + if (typenum1 == typenum2) { + return NPY_SUCCEED; + } + + d1 = PyArray_DescrFromType(typenum1); + d2 = PyArray_DescrFromType(typenum2); + ret = PyArray_LegacyEquivTypes(d1, d2); + Py_DECREF(d1); + Py_DECREF(d2); + return ret; +} + + +NPY_NO_EXPORT int +PyArray_LegacyCanCastSafely(int fromtype, int totype) +{ + PyArray_Descr *from; + + /* Fast table lookup for small type numbers */ + if ((unsigned int)fromtype < NPY_NTYPES && + (unsigned int)totype < NPY_NTYPES) { + return _npy_can_cast_safely_table[fromtype][totype]; + } + + /* Identity */ + if (fromtype == totype) { + return 1; + } + + from = PyArray_DescrFromType(fromtype); + /* + * cancastto is a NPY_NOTYPE terminated C-int-array of types that + * the data-type can be cast to safely. + */ + if (from->f->cancastto) { + int *curtype = from->f->cancastto; + + while (*curtype != NPY_NOTYPE) { + if (*curtype++ == totype) { + return 1; + } + } + } + return 0; +} + + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to) +{ + int from_type_num = from->type_num; + int to_type_num = to->type_num; + npy_bool ret; + + ret = (npy_bool) PyArray_LegacyCanCastSafely(from_type_num, to_type_num); + if (ret) { + /* Check String and Unicode more closely */ + if (from_type_num == NPY_STRING) { + if (to_type_num == NPY_STRING) { + ret = (from->elsize <= to->elsize); + } + else if (to_type_num == NPY_UNICODE) { + ret = (from->elsize << 2 <= to->elsize); + } + } + else if (from_type_num == NPY_UNICODE) { + if (to_type_num == NPY_UNICODE) { + ret = (from->elsize <= to->elsize); + } + } + /* + * For datetime/timedelta, only treat casts moving towards + * more precision as safe. + */ + else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + return can_cast_datetime64_metadata(meta1, meta2, + NPY_SAFE_CASTING); + } + else if (from_type_num == NPY_TIMEDELTA && + to_type_num == NPY_TIMEDELTA) { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + return can_cast_timedelta64_metadata(meta1, meta2, + NPY_SAFE_CASTING); + } + /* + * If to_type_num is STRING or unicode + * see if the length is long enough to hold the + * stringified value of the object. + */ + else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { + /* + * Boolean value cast to string type is 5 characters max + * for string 'False'. + */ + int char_size = 1; + if (to_type_num == NPY_UNICODE) { + char_size = 4; + } + + ret = 0; + if (PyDataType_ISUNSIZED(to)) { + ret = 1; + } + /* + * Need at least 5 characters to convert from boolean + * to 'True' or 'False'. + */ + else if (from->kind == 'b' && to->elsize >= 5 * char_size) { + ret = 1; + } + else if (from->kind == 'u') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + else if (to->elsize >= + REQUIRED_STR_LEN[from->elsize] * char_size) { + ret = 1; + } + } + else if (from->kind == 'i') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + /* Extra character needed for sign */ + else if (to->elsize >= + (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { + ret = 1; + } + } + } + } + return ret; +} + + +/* + * Compare two field dictionaries for castability. + * + * Return 1 if 'field1' can be cast to 'field2' according to the rule + * 'casting', 0 if not. + * + * Castabiliy of field dictionaries is defined recursively: 'field1' and + * 'field2' must have the same field names (possibly in different + * orders), and the corresponding field types must be castable according + * to the given casting rule. + */ +static int +can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) +{ + Py_ssize_t ppos; + PyObject *key; + PyObject *tuple1, *tuple2; + + if (field1 == field2) { + return 1; + } + if (field1 == NULL || field2 == NULL) { + return 0; + } + if (PyDict_Size(field1) != PyDict_Size(field2)) { + return 0; + } + + /* Iterate over all the fields and compare for castability */ + ppos = 0; + while (PyDict_Next(field1, &ppos, &key, &tuple1)) { + if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) { + return 0; + } + /* Compare the dtype of the field for castability */ + if (!PyArray_CanCastTypeTo( + (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0), + (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0), + casting)) { + return 0; + } + } + + return 1; +} + + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, + NPY_CASTING casting) +{ + /* + * Fast paths for equality and for basic types. + */ + if (from == to || + ((NPY_LIKELY(PyDataType_ISNUMBER(from)) || + PyDataType_ISOBJECT(from)) && + NPY_LIKELY(from->type_num == to->type_num) && + NPY_LIKELY(from->byteorder == to->byteorder))) { + return 1; + } + /* + * Cases with subarrays and fields need special treatment. + */ + if (PyDataType_HASFIELDS(from)) { + /* + * If from is a structured data type, then it can be cast to a simple + * non-object one only for unsafe casting *and* if it has a single + * field; recurse just in case the single field is itself structured. + */ + if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) { + if (casting == NPY_UNSAFE_CASTING && + PyDict_Size(from->fields) == 1) { + Py_ssize_t ppos = 0; + PyObject *tuple; + PyArray_Descr *field; + PyDict_Next(from->fields, &ppos, NULL, &tuple); + field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); + /* + * For a subarray, we need to get the underlying type; + * since we already are casting unsafely, we can ignore + * the shape. + */ + if (PyDataType_HASSUBARRAY(field)) { + field = field->subarray->base; + } + return PyArray_LegacyCanCastTypeTo(field, to, casting); + } + else { + return 0; + } + } + /* + * Casting from one structured data type to another depends on the fields; + * we pass that case on to the EquivTypenums case below. + * + * TODO: move that part up here? Need to check whether equivalent type + * numbers is an addition constraint that is needed. + * + * TODO/FIXME: For now, always allow structured to structured for unsafe + * casting; this is not correct, but needed since the treatment in can_cast + * below got out of sync with astype; see gh-13667. + */ + if (casting == NPY_UNSAFE_CASTING) { + return 1; + } + } + else if (PyDataType_HASFIELDS(to)) { + /* + * If "from" is a simple data type and "to" has fields, then only + * unsafe casting works (and that works always, even to multiple fields). + */ + return casting == NPY_UNSAFE_CASTING; + } + /* + * Everything else we consider castable for unsafe for now. + * FIXME: ensure what we do here is consistent with "astype", + * i.e., deal more correctly with subarrays and user-defined dtype. + */ + else if (casting == NPY_UNSAFE_CASTING) { + return 1; + } + /* + * Equivalent simple types can be cast with any value of 'casting', but + * we need to be careful about structured to structured. + */ + if (PyArray_LegacyEquivTypenums(from->type_num, to->type_num)) { + /* For complicated case, use EquivTypes (for now) */ + if (PyTypeNum_ISUSERDEF(from->type_num) || + from->subarray != NULL) { + int ret; + + /* Only NPY_NO_CASTING prevents byte order conversion */ + if ((casting != NPY_NO_CASTING) && + (!PyArray_ISNBO(from->byteorder) || + !PyArray_ISNBO(to->byteorder))) { + PyArray_Descr *nbo_from, *nbo_to; + + nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE); + nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE); + if (nbo_from == NULL || nbo_to == NULL) { + Py_XDECREF(nbo_from); + Py_XDECREF(nbo_to); + PyErr_Clear(); + return 0; + } + ret = PyArray_LegacyEquivTypes(nbo_from, nbo_to); + Py_DECREF(nbo_from); + Py_DECREF(nbo_to); + } + else { + ret = PyArray_LegacyEquivTypes(from, to); + } + return ret; + } + + if (PyDataType_HASFIELDS(from)) { + switch (casting) { + case NPY_EQUIV_CASTING: + case NPY_SAFE_CASTING: + case NPY_SAME_KIND_CASTING: + /* + * `from' and `to' must have the same fields, and + * corresponding fields must be (recursively) castable. + */ + return can_cast_fields(from->fields, to->fields, casting); + + case NPY_NO_CASTING: + default: + return PyArray_LegacyEquivTypes(from, to); + } + } + + switch (from->type_num) { + case NPY_DATETIME: { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + if (casting == NPY_NO_CASTING) { + return PyArray_ISNBO(from->byteorder) == + PyArray_ISNBO(to->byteorder) && + can_cast_datetime64_metadata(meta1, meta2, casting); + } + else { + return can_cast_datetime64_metadata(meta1, meta2, casting); + } + } + case NPY_TIMEDELTA: { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + if (casting == NPY_NO_CASTING) { + return PyArray_ISNBO(from->byteorder) == + PyArray_ISNBO(to->byteorder) && + can_cast_timedelta64_metadata(meta1, meta2, casting); + } + else { + return can_cast_timedelta64_metadata(meta1, meta2, casting); + } + } + default: + switch (casting) { + case NPY_NO_CASTING: + return PyArray_LegacyEquivTypes(from, to); + case NPY_EQUIV_CASTING: + return (from->elsize == to->elsize); + case NPY_SAFE_CASTING: + return (from->elsize <= to->elsize); + default: + return 1; + } + break; + } + } + /* If safe or same-kind casts are allowed */ + else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) { + if (PyArray_LegacyCanCastTo(from, to)) { + return 1; + } + else if(casting == NPY_SAME_KIND_CASTING) { + /* + * Also allow casting from lower to higher kinds, according + * to the ordering provided by dtype_kind_to_ordering. + * Some kinds, like datetime, don't fit in the hierarchy, + * and are special cased as -1. + */ + int from_order, to_order; + + from_order = dtype_kind_to_ordering(from->kind); + to_order = dtype_kind_to_ordering(to->kind); + + if (to->kind == 'm') { + /* both types being timedelta is already handled before. */ + int integer_order = dtype_kind_to_ordering('i'); + return (from_order != -1) && (from_order <= integer_order); + } + + return (from_order != -1) && (from_order <= to_order); + } + else { + return 0; + } + } + /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */ + else { + return 0; + } +} + + +/* + * Legacy function to find the correct dtype when casting from any built-in + * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic + * units. + * + * This function returns a dtype based on flex_dtype and the values in + * data_dtype. It also calls Py_DECREF on the flex_dtype. If the + * flex_dtype is not flexible, it returns it as-is. + * + * Usually, if data_obj is not an array, dtype should be the result + * given by the PyArray_GetArrayParamsFromObject function. + * + * If *flex_dtype is NULL, returns immediately, without setting an + * exception, leaving any previous error handling intact. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype) +{ + PyArray_DatetimeMetaData *meta; + PyArray_Descr *retval = NULL; + int flex_type_num; + + if (flex_dtype == NULL) { + return retval; + } + + flex_type_num = flex_dtype->type_num; + + /* Flexible types with expandable size */ + if (PyDataType_ISUNSIZED(flex_dtype)) { + /* First replace the flex_dtype */ + retval = PyArray_DescrNew(flex_dtype); + Py_DECREF(flex_dtype); + if (retval == NULL) { + return retval; + } + + if (data_dtype->type_num == flex_type_num || + flex_type_num == NPY_VOID) { + (retval)->elsize = data_dtype->elsize; + } + else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { + npy_intp size = 8; + + /* + * Get a string-size estimate of the input. These + * are generallly the size needed, rounded up to + * a multiple of eight. + */ + switch (data_dtype->type_num) { + case NPY_BOOL: + case NPY_UBYTE: + case NPY_BYTE: + case NPY_USHORT: + case NPY_SHORT: + case NPY_UINT: + case NPY_INT: + case NPY_ULONG: + case NPY_LONG: + case NPY_ULONGLONG: + case NPY_LONGLONG: + if (data_dtype->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (data_dtype->elsize > 8 || + data_dtype->elsize < 0) { + /* + * Element size should never be greater than 8 or + * less than 0 for integer type, but just in case... + */ + break; + } + else if (data_dtype->kind == 'u') { + size = REQUIRED_STR_LEN[data_dtype->elsize]; + } + else if (data_dtype->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; + } + break; + case NPY_HALF: + case NPY_FLOAT: + case NPY_DOUBLE: + size = 32; + break; + case NPY_LONGDOUBLE: + size = 48; + break; + case NPY_CFLOAT: + case NPY_CDOUBLE: + size = 2 * 32; + break; + case NPY_CLONGDOUBLE: + size = 2 * 48; + break; + case NPY_OBJECT: + size = 64; + break; + case NPY_STRING: + case NPY_VOID: + size = data_dtype->elsize; + break; + case NPY_UNICODE: + size = data_dtype->elsize / 4; + break; + case NPY_DATETIME: + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + Py_DECREF(retval); + return NULL; + } + size = get_datetime_iso_8601_strlen(0, meta->base); + break; + case NPY_TIMEDELTA: + size = 21; + break; + } + + if (flex_type_num == NPY_STRING) { + retval->elsize = size; + } + else if (flex_type_num == NPY_UNICODE) { + retval->elsize = size * 4; + } + } + else { + /* + * We should never get here, but just in case someone adds + * a new flex dtype... + */ + PyErr_SetString(PyExc_TypeError, + "don't know how to adapt flex dtype"); + Py_DECREF(retval); + return NULL; + } + } + /* Flexible type with generic time unit that adapts */ + else if (flex_type_num == NPY_DATETIME || + flex_type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(flex_dtype); + retval = flex_dtype; + if (meta == NULL) { + return NULL; + } + + if (meta->base == NPY_FR_GENERIC) { + if (data_dtype->type_num == NPY_DATETIME || + data_dtype->type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + return NULL; + } + + retval = create_datetime_dtype(flex_type_num, meta); + Py_DECREF(flex_dtype); + } + } + } + else { + retval = flex_dtype; + } + return retval; +} diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.h b/numpy/core/src/multiarray/legacy_dtype_implementation.h new file mode 100644 index 000000000..ca171d773 --- /dev/null +++ b/numpy/core/src/multiarray/legacy_dtype_implementation.h @@ -0,0 +1,40 @@ +#ifndef _NPY_LEGACY_DTYPE_IMPLEMENTATION_H +#define _NPY_LEGACY_DTYPE_IMPLEMENTATION_H + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2); + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypenums(int typenum1, int typenum2); + +NPY_NO_EXPORT int +PyArray_LegacyCanCastSafely(int fromtype, int totype); + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to); + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, + NPY_CASTING casting); + +/* + * This function calls Py_DECREF on flex_dtype, and replaces it with + * a new dtype that has been adapted based on the values in data_dtype + * and data_obj. If the flex_dtype is not flexible, it returns it as-is. + * + * Usually, if data_obj is not an array, dtype should be the result + * given by the PyArray_GetArrayParamsFromObject function. + * + * The data_obj may be NULL if just a dtype is known for the source. + * + * If *flex_dtype is NULL, returns immediately, without setting an + * exception, leaving any previous error handling intact. + * + * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, + * and NPY_DATETIME with generic units. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); + +#endif /*_NPY_LEGACY_DTYPE_IMPLEMENTATION_H*/ diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 1aad70dc6..60b965845 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -30,6 +30,8 @@ #include "npy_config.h" #include "npy_pycompat.h" #include "npy_import.h" +#include "convert_datatype.h" +#include "legacy_dtype_implementation.h" NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; @@ -1480,65 +1482,6 @@ array_putmask(PyObject *NPY_UNUSED(module), PyObject *args, PyObject *kwds) return PyArray_PutMask((PyArrayObject *)array, values, mask); } -/* - * Compare the field dictionaries for two types. - * - * Return 1 if the field types and field names of the two descrs are equal and - * in the same order, 0 if not. - */ -static int -_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) { - - int val; - - if (type1->fields == type2->fields && type1->names == type2->names) { - return 1; - } - if (type1->fields == NULL || type2->fields == NULL) { - return 0; - } - - val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - return 1; -} - -/* - * Compare the subarray data for two types. - * Return 1 if they are the same, 0 if not. - */ -static int -_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) -{ - int val; - - if (sub1 == sub2) { - return 1; - - } - if (sub1 == NULL || sub2 == NULL) { - return 0; - } - - val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - return PyArray_EquivTypes(sub1->base, sub2->base); -} - /*NUMPY_API * @@ -1548,40 +1491,24 @@ _equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) NPY_NO_EXPORT unsigned char PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) { - int type_num1, type_num2, size1, size2; - - if (type1 == type2) { - return NPY_TRUE; - } - - type_num1 = type1->type_num; - type_num2 = type2->type_num; - size1 = type1->elsize; - size2 = type2->elsize; - - if (size1 != size2) { - return NPY_FALSE; - } - if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) { - return NPY_FALSE; - } - if (type1->subarray || type2->subarray) { - return ((type_num1 == type_num2) - && _equivalent_subarrays(type1->subarray, type2->subarray)); - } - if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) { - return ((type_num1 == type_num2) && _equivalent_fields(type1, type2)); - } - if (type_num1 == NPY_DATETIME - || type_num1 == NPY_TIMEDELTA - || type_num2 == NPY_DATETIME - || type_num2 == NPY_TIMEDELTA) { - return ((type_num1 == type_num2) - && has_equivalent_datetime_metadata(type1, type2)); +#if NPY_USE_NEW_CASTINGIMPL + /* + * Do not use PyArray_CanCastTypeTo because it supports legacy flexible + * dtypes as input. + */ + NPY_CASTING safety = PyArray_GetCastSafety(type1, type2, NULL); + if (safety < 0) { + PyErr_Clear(); + return 0; } - return type1->kind == type2->kind; + /* If casting is "no casting" this dtypes are considered equivalent. */ + return PyArray_MinCastSafety(safety, NPY_NO_CASTING) == NPY_NO_CASTING; +#else + return PyArray_LegacyEquivTypes(type1, type2); +#endif } + /*NUMPY_API*/ NPY_NO_EXPORT unsigned char PyArray_EquivTypenums(int typenum1, int typenum2) @@ -4299,6 +4226,8 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS, NULL}, {"_discover_array_parameters", (PyCFunction)_discover_array_parameters, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_get_castingimpl", (PyCFunction)_get_castingimpl, + METH_VARARGS | METH_KEYWORDS, NULL}, /* from umath */ {"frompyfunc", (PyCFunction) ufunc_frompyfunc, @@ -4317,6 +4246,7 @@ static struct PyMethodDef array_module_methods[] = { }; #include "__multiarray_api.c" +#include "array_method.h" /* Establish scalar-type hierarchy * @@ -4767,9 +4697,20 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { if (set_typeinfo(d) != 0) { goto err; } + if (PyType_Ready(&PyArrayMethod_Type) < 0) { + goto err; + } + if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) { + goto err; + } if (initialize_and_map_pytypes_to_dtypes() < 0) { goto err; } + + if (PyArray_InitializeCasts() < 0) { + goto err; + } + if (initumath(m) != 0) { goto err; } diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index 1404c9b68..de880eb1c 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -39,6 +39,10 @@ maintainer email: oliphant.travis@ieee.org #include "usertypes.h" #include "dtypemeta.h" #include "scalartypes.h" +#include "array_method.h" +#include "convert_datatype.h" +#include "legacy_dtype_implementation.h" + NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL; @@ -488,3 +492,65 @@ legacy_userdtype_common_dtype_function( Py_INCREF(Py_NotImplemented); return (PyArray_DTypeMeta *)Py_NotImplemented; } + + +/** + * This function wraps a legacy cast into an array-method. This is mostly + * used for legacy user-dtypes, but for example numeric to/from datetime + * casts were only defined that way as well. + * + * @param from + * @param to + * @param casting If `NPY_NO_CASTING` will check the legacy registered cast, + * otherwise uses the provided cast. + */ +NPY_NO_EXPORT int +PyArray_AddLegacyWrapping_CastingImpl( + PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting) +{ + if (casting < 0) { + if (from == to) { + casting = NPY_NO_CASTING; + } + else if (PyArray_LegacyCanCastTypeTo( + from->singleton, to->singleton, NPY_SAFE_CASTING)) { + casting = NPY_SAFE_CASTING; + } + else if (PyArray_LegacyCanCastTypeTo( + from->singleton, to->singleton, NPY_SAME_KIND_CASTING)) { + casting = NPY_SAME_KIND_CASTING; + } + else { + casting = NPY_UNSAFE_CASTING; + } + } + + PyArray_DTypeMeta *dtypes[2] = {from, to}; + PyArrayMethod_Spec spec = { + /* Name is not actually used, but allows identifying these. */ + .name = "legacy_cast", + .nin = 1, + .nout = 1, + .casting = casting, + .dtypes = dtypes, + }; + + if (from == to) { + spec.flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {NPY_DTMETH_resolve_descriptors, &legacy_same_dtype_resolve_descriptors}, + {0, NULL}}; + spec.slots = slots; + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); + } + else { + spec.flags = NPY_METH_REQUIRES_PYAPI; + PyType_Slot slots[] = { + {NPY_DTMETH_get_loop, NULL}, + {NPY_DTMETH_resolve_descriptors, &simple_cast_resolve_descriptors}, + {0, NULL}}; + spec.slots = slots; + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); + } +} diff --git a/numpy/core/src/multiarray/usertypes.h b/numpy/core/src/multiarray/usertypes.h index 1b323d458..8b2fc80e6 100644 --- a/numpy/core/src/multiarray/usertypes.h +++ b/numpy/core/src/multiarray/usertypes.h @@ -1,6 +1,8 @@ #ifndef _NPY_PRIVATE_USERTYPES_H_ #define _NPY_PRIVATE_USERTYPES_H_ +#include "array_method.h" + extern NPY_NO_EXPORT PyArray_Descr **userdescrs; NPY_NO_EXPORT void @@ -21,4 +23,8 @@ NPY_NO_EXPORT PyArray_DTypeMeta * legacy_userdtype_common_dtype_function( PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other); +NPY_NO_EXPORT int +PyArray_AddLegacyWrapping_CastingImpl( + PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting); + #endif diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 24730f969..edc549a94 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -281,6 +281,14 @@ def test_array_astype(): a = np.array(1000, dtype='i4') assert_raises(TypeError, a.astype, 'U1', casting='safe') + +@pytest.mark.parametrize("dt", ["d", "f", "S13", "U32", "O"]) +def test_array_astype_to_void(dt): + dt = np.dtype(dt) + arr = np.array([], dtype=dt) + assert arr.astype("V").dtype.itemsize == dt.itemsize + + @pytest.mark.parametrize("t", np.sctypes['uint'] + np.sctypes['int'] + np.sctypes['float'] ) diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py new file mode 100644 index 000000000..2f82d718b --- /dev/null +++ b/numpy/core/tests/test_casting_unittests.py @@ -0,0 +1,284 @@ +""" +The tests exercise the casting machinery in a more low-level manner. +The reason is mostly to test a new implementation of the casting machinery. + +Unlike most tests in NumPy, these are closer to unit-tests rather +than integration tests. +""" + +import pytest +import textwrap +import enum + +import numpy as np + +from numpy.core._multiarray_umath import ( + _get_castingimpl as get_castingimpl) +from numpy.core._multiarray_tests import uses_new_casts + + +# Simple skips object, parametric and long double (unsupported by struct) +simple_dtypes = "?bhilqBHILQefdFD" +if np.dtype("l").itemsize != np.dtype("q").itemsize: + # Remove l and L, the table was generated with 64bit linux in mind. + # TODO: Should have two tables or no a different solution. + simple_dtypes = simple_dtypes.replace("l", "").replace("L", "") +simple_dtypes = [type(np.dtype(c)) for c in simple_dtypes] + + +def simple_dtype_instances(): + for dtype_class in simple_dtypes: + dt = dtype_class() + yield pytest.param(dt, id=str(dt)) + if dt.byteorder != "|": + dt = dt.newbyteorder() + yield pytest.param(dt, id=str(dt)) + + +def get_expected_stringlength(dtype): + """Returns the string length when casting the basic dtypes to strings. + """ + if dtype == np.bool_: + return 5 + if dtype.kind in "iu": + if dtype.itemsize == 1: + length = 3 + elif dtype.itemsize == 2: + length = 5 + elif dtype.itemsize == 4: + length = 10 + elif dtype.itemsize == 8: + length = 20 + else: + raise AssertionError(f"did not find expected length for {dtype}") + + if dtype.kind == "i": + length += 1 # adds one character for the sign + + return length + + # Note: Can't do dtype comparison for longdouble on windows + if dtype.char == "g": + return 48 + elif dtype.char == "G": + return 48 * 2 + elif dtype.kind == "f": + return 32 # also for half apparently. + elif dtype.kind == "c": + return 32 * 2 + + raise AssertionError(f"did not find expected length for {dtype}") + + +class Casting(enum.IntEnum): + no = 0 + equiv = 1 + safe = 2 + same_kind = 3 + unsafe = 4 + cast_is_view = 1 << 16 + + +def _get_cancast_table(): + table = textwrap.dedent(""" + X ? b h i l q B H I L Q e f d g F D G S U V O M m + ? # = = = = = = = = = = = = = = = = = = = = = . = + b . # = = = = . . . . . = = = = = = = = = = = . = + h . ~ # = = = . . . . . ~ = = = = = = = = = = . = + i . ~ ~ # = = . . . . . ~ ~ = = ~ = = = = = = . = + l . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . = + q . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . = + B . ~ = = = = # = = = = = = = = = = = = = = = . = + H . ~ ~ = = = ~ # = = = ~ = = = = = = = = = = . = + I . ~ ~ ~ = = ~ ~ # = = ~ ~ = = ~ = = = = = = . = + L . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~ + Q . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~ + e . . . . . . . . . . . # = = = = = = = = = = . . + f . . . . . . . . . . . ~ # = = = = = = = = = . . + d . . . . . . . . . . . ~ ~ # = ~ = = = = = = . . + g . . . . . . . . . . . ~ ~ ~ # ~ ~ = = = = = . . + F . . . . . . . . . . . . . . . # = = = = = = . . + D . . . . . . . . . . . . . . . ~ # = = = = = . . + G . . . . . . . . . . . . . . . ~ ~ # = = = = . . + S . . . . . . . . . . . . . . . . . . # = = = . . + U . . . . . . . . . . . . . . . . . . . # = = . . + V . . . . . . . . . . . . . . . . . . . . # = . . + O . . . . . . . . . . . . . . . . . . . . = # . . + M . . . . . . . . . . . . . . . . . . . . = = # . + m . . . . . . . . . . . . . . . . . . . . = = . # + """).strip().split("\n") + dtypes = [type(np.dtype(c)) for c in table[0][2::2]] + + convert_cast = {".": Casting.unsafe, "~": Casting.same_kind, + "=": Casting.safe, "#": Casting.equiv, + " ": -1} + + cancast = {} + for from_dt, row in zip(dtypes, table[1:]): + cancast[from_dt] = {} + for to_dt, c in zip(dtypes, row[2::2]): + cancast[from_dt][to_dt] = convert_cast[c] + + return cancast + +CAST_TABLE = _get_cancast_table() + + +class TestChanges: + """ + These test cases excercise some behaviour changes + """ + @pytest.mark.parametrize("string", ["S", "U"]) + @pytest.mark.parametrize("floating", ["e", "f", "d", "g"]) + def test_float_to_string(self, floating, string): + assert np.can_cast(floating, string) + # 100 is long enough to hold any formatted floating + if uses_new_casts(): + assert np.can_cast(floating, f"{string}100") + else: + assert not np.can_cast(floating, f"{string}100") + assert np.can_cast(floating, f"{string}100", casting="same_kind") + + def test_to_void(self): + # But in general, we do consider these safe: + assert np.can_cast("d", "V") + assert np.can_cast("S20", "V") + + # Do not consider it a safe cast if the void is too smaller: + if uses_new_casts(): + assert not np.can_cast("d", "V1") + assert not np.can_cast("S20", "V1") + assert not np.can_cast("U1", "V1") + # Structured to unstructured is just like any other: + assert np.can_cast("d,i", "V", casting="same_kind") + else: + assert np.can_cast("d", "V1") + assert np.can_cast("S20", "V1") + assert np.can_cast("U1", "V1") + assert not np.can_cast("d,i", "V", casting="same_kind") + + +class TestCasting: + @pytest.mark.parametrize("from_Dt", simple_dtypes) + def test_simple_cancast(self, from_Dt): + for to_Dt in simple_dtypes: + cast = get_castingimpl(from_Dt, to_Dt) + + for from_dt in [from_Dt(), from_Dt().newbyteorder()]: + default = cast._resolve_descriptors((from_dt, None))[1][1] + assert default == to_Dt() + del default + + for to_dt in [to_Dt(), to_Dt().newbyteorder()]: + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, to_dt)) + assert(type(from_res) == from_Dt) + assert(type(to_res) == to_Dt) + if casting & Casting.cast_is_view: + # If a view is acceptable, this is "no" casting + # and byte order must be matching. + assert casting == Casting.no | Casting.cast_is_view + # The above table lists this as "equivalent" + assert Casting.equiv == CAST_TABLE[from_Dt][to_Dt] + # Note that to_res may not be the same as from_dt + assert from_res.isnative == to_res.isnative + else: + if from_Dt == to_Dt: + # Note that to_res may not be the same as from_dt + assert from_res.isnative != to_res.isnative + assert casting == CAST_TABLE[from_Dt][to_Dt] + + if from_Dt is to_Dt: + assert(from_dt is from_res) + assert(to_dt is to_res) + + + def string_with_modified_length(self, dtype, change_length): + fact = 1 if dtype.char == "S" else 4 + length = dtype.itemsize // fact + change_length + return np.dtype(f"{dtype.byteorder}{dtype.char}{length}") + + @pytest.mark.parametrize("other_DT", simple_dtypes) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_cancast(self, other_DT, string_char): + fact = 1 if string_char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(other_DT, string_DT) + + other_dt = other_DT() + expected_length = get_expected_stringlength(other_dt) + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert safety == Casting.safe # we consider to string casts "safe" + assert isinstance(res_dt, string_DT) + + # These casts currently implement changing the string length, so + # check the cast-safety for too long/fixed string lengths: + for change_length in [-1, 0, 1]: + if change_length >= 0: + expected_safety = Casting.safe + else: + expected_safety = Casting.same_kind + + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + assert res_dt is to_dt + assert safety == expected_safety + + # The opposite direction is always considered unsafe: + cast = get_castingimpl(string_DT, other_DT) + + safety, _ = cast._resolve_descriptors((string_dt, other_dt)) + assert safety == Casting.unsafe + + cast = get_castingimpl(string_DT, other_DT) + safety, (_, res_dt) = cast._resolve_descriptors((string_dt, None)) + assert safety == Casting.unsafe + assert other_dt is res_dt # returns the singleton for simple dtypes + + @pytest.mark.parametrize("other_dt", ["S8", "<U8", ">U8"]) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_to_string_cancast(self, other_dt, string_char): + other_dt = np.dtype(other_dt) + + fact = 1 if string_char == "S" else 4 + div = 1 if other_dt.char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(type(other_dt), string_DT) + + expected_length = other_dt.itemsize // div + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert isinstance(res_dt, string_DT) + + if other_dt.char == string_char: + if other_dt.isnative: + expected_safety = Casting.no | Casting.cast_is_view + else: + expected_safety = Casting.equiv + elif string_char == "U": + expected_safety = Casting.safe + else: + expected_safety = Casting.unsafe + + assert expected_safety == safety + + for change_length in [-1, 0, 1]: + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + + assert res_dt is to_dt + if expected_safety == Casting.unsafe: + assert safety == expected_safety + elif change_length < 0: + assert safety == Casting.same_kind + elif change_length == 0: + assert safety == expected_safety + elif change_length > 0: + assert safety == Casting.safe diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 1b2b85cc1..0ebcc72da 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -153,6 +153,9 @@ class TestBuiltin: 'formats': ['f4', 'i4'], 'offsets': [4, 0]}) assert_equal(x == y, False) + # But it is currently an equivalent cast: + assert np.can_cast(x, y, casting="equiv") + class TestRecord: def test_equivalent_record(self): diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index f5428f98c..866a96e31 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -922,6 +922,25 @@ class TestTypes: assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20')) assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30')) + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V6"), np.dtype("V10")], + [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])], + [np.dtype("i8,i8"), np.dtype("i4,i4")], + ]) + def test_invalid_void_promotion(self, dtype1, dtype2): + # Mainly test structured void promotion, which currently allows + # byte-swapping, but nothing else: + with pytest.raises(TypeError): + np.promote_types(dtype1, dtype2) + + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V10"), np.dtype("V10")], + [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])], + [np.dtype("i8,i8"), np.dtype("i8,>i8")], + ]) + def test_valid_void_promotion(self, dtype1, dtype2): + assert np.promote_types(dtype1, dtype2) is dtype1 + @pytest.mark.parametrize("dtype", list(np.typecodes["All"]) + ["i,i", "S3", "S100", "U3", "U100", rational]) diff --git a/numpy/testing/print_coercion_tables.py b/numpy/testing/print_coercion_tables.py index 8024df128..3a447cd2d 100755 --- a/numpy/testing/print_coercion_tables.py +++ b/numpy/testing/print_coercion_tables.py @@ -3,6 +3,7 @@ """ import numpy as np +from collections import namedtuple # Generic object that can be added, but doesn't do anything else class GenericObject: @@ -25,7 +26,17 @@ def print_cancast_table(ntypes): for row in ntypes: print(row, end=' ') for col in ntypes: - print(int(np.can_cast(row, col)), end=' ') + if np.can_cast(row, col, "equiv"): + cast = "#" + elif np.can_cast(row, col, "safe"): + cast = "=" + elif np.can_cast(row, col, "same_kind"): + cast = "~" + elif np.can_cast(row, col, "unsafe"): + cast = "." + else: + cast = " " + print(cast, end=' ') print() def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, use_promote_types=False): @@ -69,6 +80,101 @@ def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, print() +def print_new_cast_table(*, can_cast=True, legacy=False, flags=False): + """Prints new casts, the values given are default "can-cast" values, not + actual ones. + """ + from numpy.core._multiarray_tests import get_all_cast_information + + cast_table = { + 0 : "#", # No cast (classify as equivalent here) + 1 : "#", # equivalent casting + 2 : "=", # safe casting + 3 : "~", # same-kind casting + 4 : ".", # unsafe casting + } + flags_table = { + 0 : "▗", 7: "█", + 1: "▚", 2: "▐", 4: "▄", + 3: "▜", 5: "▙", + 6: "▟", + } + + cast_info = namedtuple("cast_info", ["can_cast", "legacy", "flags"]) + no_cast_info = cast_info(" ", " ", " ") + + casts = get_all_cast_information() + table = {} + dtypes = set() + for cast in casts: + dtypes.add(cast["from"]) + dtypes.add(cast["to"]) + + if cast["from"] not in table: + table[cast["from"]] = {} + to_dict = table[cast["from"]] + + can_cast = cast_table[cast["casting"]] + legacy = "L" if cast["legacy"] else "." + flags = 0 + if cast["requires_pyapi"]: + flags |= 1 + if cast["supports_unaligned"]: + flags |= 2 + if cast["no_floatingpoint_errors"]: + flags |= 4 + + flags = flags_table[flags] + to_dict[cast["to"]] = cast_info(can_cast=can_cast, legacy=legacy, flags=flags) + + # The np.dtype(x.type) is a bit strange, because dtype classes do + # not expose much yet. + types = np.typecodes["All"] + def sorter(x): + # This is a bit weird hack, to get a table as close as possible to + # the one printing all typecodes (but expecting user-dtypes). + dtype = np.dtype(x.type) + try: + indx = types.index(dtype.char) + except ValueError: + indx = np.inf + return (indx, dtype.char) + + dtypes = sorted(dtypes, key=sorter) + + def print_table(field="can_cast"): + print('X', end=' ') + for dt in dtypes: + print(np.dtype(dt.type).char, end=' ') + print() + for from_dt in dtypes: + print(np.dtype(from_dt.type).char, end=' ') + row = table.get(from_dt, {}) + for to_dt in dtypes: + print(getattr(row.get(to_dt, no_cast_info), field), end=' ') + print() + + if can_cast: + # Print the actual table: + print() + print("Casting: # is equivalent, = is safe, ~ is same-kind, and . is unsafe") + print() + print_table("can_cast") + + if legacy: + print() + print("L denotes a legacy cast . a non-legacy one.") + print() + print_table("legacy") + + if flags: + print() + print(f"{flags_table[0]}: no flags, {flags_table[1]}: PyAPI, " + f"{flags_table[2]}: supports unaligned, {flags_table[4]}: no-float-errors") + print() + print_table("flags") + + if __name__ == '__main__': print("can cast") print_cancast_table(np.typecodes['All']) @@ -89,3 +195,5 @@ if __name__ == '__main__': print() print("promote_types") print_coercion_table(np.typecodes['All'], 0, 0, False, True) + print("New casting type promotion:") + print_new_cast_table(can_cast=True, legacy=True, flags=True) |