diff options
Diffstat (limited to 'numpy')
53 files changed, 4776 insertions, 992 deletions
diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py index 33fee9a14..813e069a4 100644 --- a/numpy/_pytesttester.py +++ b/numpy/_pytesttester.py @@ -6,7 +6,7 @@ boiler plate for doing that is to put the following in the module ``__init__.py`` file:: from numpy._pytesttester import PytestTester - test = PytestTester(__name__).test + test = PytestTester(__name__) del PytestTester diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt index 1868610f4..2d3a65391 100644 --- a/numpy/core/code_generators/cversions.txt +++ b/numpy/core/code_generators/cversions.txt @@ -50,8 +50,9 @@ # Version 13 (NumPy 1.17) No change. # Version 13 (NumPy 1.18) No change. # Version 13 (NumPy 1.19) No change. -# Version 13 (NumPy 1.20) No change. 0x0000000d = 5b0e8bbded00b166125974fc71e80a33 -# Version 14 (NumPy 1.19) DType related API additions +# Version 14 (NumPy 1.20) +# DType related API additions. +# A new field was added to the end of PyArrayObject_fields. 0x0000000e = 17a0f366e55ec05e5c5c149123478452 diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index 856db0410..ca6a22828 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -26,6 +26,7 @@ API_FILES = [join('multiarray', 'alloc.c'), join('multiarray', 'array_assign_array.c'), join('multiarray', 'array_assign_scalar.c'), join('multiarray', 'array_coercion.c'), + join('multiarray', 'array_method.c'), join('multiarray', 'arrayobject.c'), join('multiarray', 'arraytypes.c.src'), join('multiarray', 'buffer.c'), diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index d65e26827..efb052bc2 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -1375,7 +1375,7 @@ def resize(a, new_shape): reshaped_array : ndarray The new array is formed from the data in the old array, repeated if necessary to fill out the required number of elements. The - data are repeated in the order that they are stored in memory. + data are repeated iterating over the array in C-order. See Also -------- @@ -1392,11 +1392,11 @@ def resize(a, new_shape): Warning: This functionality does **not** consider axes separately, i.e. it does not apply interpolation/extrapolation. - It fills the return array with the required number of elements, taken - from `a` as they are laid out in memory, disregarding strides and axes. - (This is in case the new shape is smaller. For larger, see above.) - This functionality is therefore not suitable to resize images, - or data where each axis represents a separate and distinct entity. + It fills the return array with the required number of elements, iterating + over `a` in C-order, disregarding axes (and cycling back from the start if + the new shape is larger). This functionality is therefore not suitable to + resize images, or data where each axis represents a separate and distinct + entity. Examples -------- diff --git a/numpy/core/include/numpy/arrayscalars.h b/numpy/core/include/numpy/arrayscalars.h index b282a2cd4..14a31988f 100644 --- a/numpy/core/include/numpy/arrayscalars.h +++ b/numpy/core/include/numpy/arrayscalars.h @@ -149,6 +149,7 @@ typedef struct { PyArray_Descr *descr; int flags; PyObject *base; + void *_buffer_info; /* private buffer info, tagged to allow warning */ } PyVoidScalarObject; /* Macros diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 6bf54938f..63e8bf974 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -210,6 +210,7 @@ typedef enum { /* For specifying allowed casting in operations which support it */ typedef enum { + _NPY_ERROR_OCCURRED_IN_CAST = -1, /* Only allow identical types */ NPY_NO_CASTING=0, /* Allow identical and byte swapped types */ @@ -219,7 +220,14 @@ typedef enum { /* Allow safe casts or casts within the same kind */ NPY_SAME_KIND_CASTING=3, /* Allow any casts */ - NPY_UNSAFE_CASTING=4 + NPY_UNSAFE_CASTING=4, + /* + * Flag to allow signalling that a cast is a view, this flag is not + * valid when requesting a cast of specific safety. + * _NPY_CAST_IS_VIEW|NPY_EQUIV_CASTING means the same as NPY_NO_CASTING. + */ + // TODO-DTYPES: Needs to be documented. + _NPY_CAST_IS_VIEW = 1 << 16, } NPY_CASTING; typedef enum { @@ -701,6 +709,7 @@ typedef struct tagPyArrayObject_fields { int flags; /* For weak references */ PyObject *weakreflist; + void *_buffer_info; /* private buffer info, tagged to allow warning */ } PyArrayObject_fields; /* @@ -720,7 +729,18 @@ typedef struct tagPyArrayObject { } PyArrayObject; #endif -#define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields)) +/* + * Removed 2020-Nov-25, NumPy 1.20 + * #define NPY_SIZEOF_PYARRAYOBJECT (sizeof(PyArrayObject_fields)) + * + * The above macro was removed as it gave a false sense of a stable ABI + * with respect to the structures size. If you require a runtime constant, + * you can use `PyArray_Type.tp_basicsize` instead. Otherwise, please + * see the PyArrayObject documentation or ask the NumPy developers for + * information on how to correctly replace the macro in a way that is + * compatible with multiple NumPy versions. + */ + /* Array Flags Object */ typedef struct PyArrayFlagsObject { @@ -1900,6 +1920,12 @@ typedef void (PyDataMem_EventHookFunc)(void *inp, void *outp, size_t size, default_descr_function *default_descr; common_dtype_function *common_dtype; common_instance_function *common_instance; + /* + * Dictionary of ArrayMethods representing most possible casts + * (structured and object are exceptions). + * This should potentially become a weak mapping in the future. + */ + PyObject *castingimpls; }; #endif /* NPY_INTERNAL_BUILD */ diff --git a/numpy/core/include/numpy/numpyconfig.h b/numpy/core/include/numpy/numpyconfig.h index 8eaf446b7..a1b1de0ef 100644 --- a/numpy/core/include/numpy/numpyconfig.h +++ b/numpy/core/include/numpy/numpyconfig.h @@ -41,6 +41,7 @@ #define NPY_1_17_API_VERSION 0x00000008 #define NPY_1_18_API_VERSION 0x00000008 #define NPY_1_19_API_VERSION 0x00000008 -#define NPY_1_20_API_VERSION 0x00000008 +#define NPY_1_20_API_VERSION 0x0000000e +#define NPY_1_21_API_VERSION 0x0000000e #endif diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 6ada03f73..2ec5e1a64 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -23,6 +23,11 @@ NPY_RELAXED_STRIDES_CHECKING = (os.environ.get('NPY_RELAXED_STRIDES_CHECKING', " NPY_RELAXED_STRIDES_DEBUG = (os.environ.get('NPY_RELAXED_STRIDES_DEBUG', "0") != "0") NPY_RELAXED_STRIDES_DEBUG = NPY_RELAXED_STRIDES_DEBUG and NPY_RELAXED_STRIDES_CHECKING +# Set to True to use the new casting implementation as much as implemented. +# Allows running the full test suit to exercise the new machinery until +# it is used as default and the old version is eventually deleted. +NPY_USE_NEW_CASTINGIMPL = os.environ.get('NPY_USE_NEW_CASTINGIMPL', "0") != "0" + # XXX: ugly, we use a class to avoid calling twice some expensive functions in # config.h/numpyconfig.h. I don't see a better way because distutils force # config.h generation inside an Extension class, and as such sharing @@ -468,6 +473,10 @@ def configuration(parent_package='',top_path=None): if NPY_RELAXED_STRIDES_DEBUG: moredefs.append(('NPY_RELAXED_STRIDES_DEBUG', 1)) + # Use the new experimental casting implementation in NumPy 1.20: + if NPY_USE_NEW_CASTINGIMPL: + moredefs.append(('NPY_USE_NEW_CASTINGIMPL', 1)) + # Get long double representation rep = check_long_double_representation(config_cmd) moredefs.append(('HAVE_LDOUBLE_%s' % rep, 1)) @@ -769,6 +778,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'arraytypes.h'), join('src', 'multiarray', 'arrayfunction_override.h'), join('src', 'multiarray', 'array_coercion.h'), + join('src', 'multiarray', 'array_method.h'), join('src', 'multiarray', 'npy_buffer.h'), join('src', 'multiarray', 'calculation.h'), join('src', 'multiarray', 'common.h'), @@ -784,6 +794,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'getset.h'), join('src', 'multiarray', 'hashdescr.h'), join('src', 'multiarray', 'iterators.h'), + join('src', 'multiarray', 'legacy_dtype_implementation.h'), join('src', 'multiarray', 'mapping.h'), join('src', 'multiarray', 'methods.h'), join('src', 'multiarray', 'multiarraymodule.h'), @@ -824,6 +835,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'arrayobject.c'), join('src', 'multiarray', 'arraytypes.c.src'), join('src', 'multiarray', 'array_coercion.c'), + join('src', 'multiarray', 'array_method.c'), join('src', 'multiarray', 'array_assign_scalar.c'), join('src', 'multiarray', 'array_assign_array.c'), join('src', 'multiarray', 'arrayfunction_override.c'), @@ -850,6 +862,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'hashdescr.c'), join('src', 'multiarray', 'item_selection.c'), join('src', 'multiarray', 'iterators.c'), + join('src', 'multiarray', 'legacy_dtype_implementation.c'), join('src', 'multiarray', 'lowlevel_strided_loops.c.src'), join('src', 'multiarray', 'mapping.c'), join('src', 'multiarray', 'methods.c'), diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index f15425c87..ba3e215b3 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -40,7 +40,8 @@ C_ABI_VERSION = 0x01000009 # 0x0000000c - 1.14.x # 0x0000000c - 1.15.x # 0x0000000d - 1.16.x -# 0x0000000e - 1.19.x +# 0x0000000d - 1.19.x +# 0x0000000e - 1.20.x C_API_VERSION = 0x0000000e class MismatchCAPIWarning(Warning): diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h index 27328aa73..61cc3c7f1 100644 --- a/numpy/core/src/common/npy_config.h +++ b/numpy/core/src/common/npy_config.h @@ -19,6 +19,15 @@ #endif +/* Disable broken functions on z/OS */ +#if defined (__MVS__) + +#undef HAVE_POWF +#undef HAVE_EXPF +#undef HAVE___THREAD + +#endif + /* Disable broken MS math functions */ #if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(__MINGW32_VERSION) diff --git a/numpy/core/src/multiarray/_datetime.h b/numpy/core/src/multiarray/_datetime.h index 421b03f93..c0d2f1967 100644 --- a/numpy/core/src/multiarray/_datetime.h +++ b/numpy/core/src/multiarray/_datetime.h @@ -373,4 +373,7 @@ datetime_arange(PyObject *start, PyObject *stop, PyObject *step, NPY_NO_EXPORT PyArray_Descr * find_object_datetime_type(PyObject *obj, int type_num); +NPY_NO_EXPORT int +PyArray_InitializeDatetimeCasts(void); + #endif diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 5b6b6dc78..3811e87a8 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -9,6 +9,7 @@ #include "common.h" #include "mem_overlap.h" #include "npy_extint128.h" +#include "array_method.h" #if defined(MS_WIN32) || defined(__CYGWIN__) #define EXPORT(x) __declspec(dllexport) x @@ -36,6 +37,7 @@ IsPythonScalar(PyObject * dummy, PyObject *args) #include "npy_pycompat.h" + /** Function to test calling via ctypes */ EXPORT(void*) forward_pointer(void *x) { @@ -684,6 +686,39 @@ create_custom_field_dtype(PyObject *NPY_UNUSED(mod), PyObject *args) } +PyObject * +corrupt_or_fix_bufferinfo(PyObject *dummy, PyObject *obj) +{ + void **buffer_info_ptr; + if (PyArray_Check(obj)) { + buffer_info_ptr = &((PyArrayObject_fields *)obj)->_buffer_info; + } + else if (PyArray_IsScalar(obj, Void)) { + buffer_info_ptr = &((PyVoidScalarObject *)obj)->_buffer_info; + } + else { + PyErr_SetString(PyExc_TypeError, + "argument must be an array or void scalar"); + return NULL; + } + if (*buffer_info_ptr == NULL) { + /* set to an invalid value (as a subclass might accidentally) */ + *buffer_info_ptr = obj; + assert(((uintptr_t)obj & 7) == 0); + } + else if (*buffer_info_ptr == obj) { + /* Reset to a NULL (good value) */ + *buffer_info_ptr = NULL; + } + else { + PyErr_SetString(PyExc_TypeError, + "buffer was already exported, this test doesn't support that"); + return NULL; + } + Py_RETURN_NONE; +} + + /* check no elison for avoided increfs */ static PyObject * incref_elide(PyObject *dummy, PyObject *args) @@ -977,6 +1012,79 @@ get_c_wrapping_array(PyObject* NPY_UNUSED(self), PyObject* arg) } +static PyObject * +get_all_cast_information(PyObject *NPY_UNUSED(mod), PyObject *NPY_UNUSED(args)) +{ + PyObject *result = PyList_New(0); + if (result == NULL) { + return NULL; + } + PyObject *classes = PyObject_CallMethod( + (PyObject *)&PyArrayDescr_Type, "__subclasses__", ""); + if (classes == NULL) { + return NULL; + } + Py_SETREF(classes, PySequence_Fast(classes, NULL)); + if (classes == NULL) { + goto fail; + } + + Py_ssize_t nclass = PySequence_Length(classes); + for (Py_ssize_t i = 0; i < nclass; i++) { + PyArray_DTypeMeta *from_dtype = ( + (PyArray_DTypeMeta *)PySequence_Fast_GET_ITEM(classes, i)); + if (from_dtype->abstract) { + /* + * TODO: In principle probably needs to recursively check this, + * also we may allow casts to abstract dtypes at some point. + */ + continue; + } + + PyObject *to_dtype, *cast_obj; + Py_ssize_t pos = 0; + + while (PyDict_Next(from_dtype->castingimpls, &pos, &to_dtype, &cast_obj)) { + if (cast_obj == Py_None) { + continue; + } + PyArrayMethodObject *cast = (PyArrayMethodObject *)cast_obj; + + /* Pass some information about this cast out! */ + PyObject *cast_info = Py_BuildValue("{sOsOsisisisisisssi}", + "from", from_dtype, + "to", to_dtype, + "legacy", (cast->name != NULL && + strncmp(cast->name, "legacy_", 7) == 0), + "casting", cast->casting & ~_NPY_CAST_IS_VIEW, + "requires_pyapi", cast->flags & NPY_METH_REQUIRES_PYAPI, + "supports_unaligned", + cast->flags & NPY_METH_SUPPORTS_UNALIGNED, + "no_floatingpoint_errors", + cast->flags & NPY_METH_NO_FLOATINGPOINT_ERRORS, + "name", cast->name, + "cast_is_view", + cast->casting & _NPY_CAST_IS_VIEW); + if (cast_info == NULL) { + goto fail; + } + int res = PyList_Append(result, cast_info); + Py_DECREF(cast_info); + if (res < 0) { + goto fail; + } + } + } + Py_DECREF(classes); + return result; + + fail: + Py_XDECREF(classes); + Py_XDECREF(result); + return NULL; +} + + /* * Test C-api level item getting. */ @@ -2010,6 +2118,18 @@ getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) return ret; } + +static PyObject * +uses_new_casts(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ +#if NPY_USE_NEW_CASTINGIMPL + Py_RETURN_TRUE; +#else + Py_RETURN_FALSE; +#endif +} + + static PyObject * run_byteorder_converter(PyObject* NPY_UNUSED(self), PyObject *args) { @@ -2113,8 +2233,8 @@ run_casting_converter(PyObject* NPY_UNUSED(self), PyObject *args) case NPY_SAFE_CASTING: return PyUnicode_FromString("NPY_SAFE_CASTING"); case NPY_SAME_KIND_CASTING: return PyUnicode_FromString("NPY_SAME_KIND_CASTING"); case NPY_UNSAFE_CASTING: return PyUnicode_FromString("NPY_UNSAFE_CASTING"); + default: return PyLong_FromLong(casting); } - return PyLong_FromLong(casting); } static PyObject * @@ -2158,6 +2278,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"create_custom_field_dtype", create_custom_field_dtype, METH_VARARGS, NULL}, + {"corrupt_or_fix_bufferinfo", + corrupt_or_fix_bufferinfo, + METH_O, NULL}, {"incref_elide", incref_elide, METH_VARARGS, NULL}, @@ -2194,6 +2317,12 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"get_c_wrapping_array", get_c_wrapping_array, METH_O, NULL}, + {"get_all_cast_information", + get_all_cast_information, + METH_NOARGS, + "Return a list with info on all available casts. Some of the info" + "may differ for an actual cast if it uses value-based casting " + "(flexible types)."}, {"array_indexing", array_indexing, METH_VARARGS, NULL}, @@ -2254,6 +2383,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"getset_numericops", getset_numericops, METH_NOARGS, NULL}, + {"uses_new_casts", + uses_new_casts, + METH_NOARGS, NULL}, /**begin repeat * #name = cabs, carg# */ diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c new file mode 100644 index 000000000..cae452454 --- /dev/null +++ b/numpy/core/src/multiarray/array_method.c @@ -0,0 +1,614 @@ +/* + * This file implements an abstraction layer for "Array methods", which + * work with a specific DType class input and provide low-level C function + * pointers to do fast operations on the given input functions. + * It thus adds an abstraction layer around individual ufunc loops. + * + * Unlike methods, a ArrayMethod can have multiple inputs and outputs. + * This has some serious implication for garbage collection, and as far + * as I (@seberg) understands, it is not possible to always guarantee correct + * cyclic garbage collection of dynamically created DTypes with methods. + * The keyword (or rather the solution) for this seems to be an "ephemeron" + * which I believe should allow correct garbage collection but seems + * not implemented in Python at this time. + * The vast majority of use-cases will not require correct garbage collection. + * Some use cases may require the user to be careful. + * + * Generally there are two main ways to solve this issue: + * + * 1. A method with a single input (or inputs of all the same DTypes) can + * be "owned" by that DType (it becomes unusable when the DType is deleted). + * This holds especially for all casts, which must have a defined output + * DType and must hold on to it strongly. + * 2. A method which can infer the output DType(s) from the input types does + * not need to keep the output type alive. (It can use NULL for the type, + * or an abstract base class which is known to be persistent.) + * It is then sufficient for a ufunc (or other owner) to only hold a + * weak reference to the input DTypes. + */ + + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <npy_pycompat.h> +#include "arrayobject.h" +#include "array_method.h" +#include "dtypemeta.h" +#include "convert_datatype.h" + + +/* + * The default descriptor resolution function. The logic is as follows: + * + * 1. The output is ensured to be canonical (currently native byte order), + * if it is of the correct DType. + * 2. If any DType is was not defined, it is replaced by the common DType + * of all inputs. (If that common DType is parametric, this is an error.) + * + * We could allow setting the output descriptors specifically to simplify + * this step. + */ +static NPY_CASTING +default_resolve_descriptors( + PyArrayMethodObject *method, + PyArray_DTypeMeta **dtypes, + PyArray_Descr **input_descrs, + PyArray_Descr **output_descrs) +{ + int nin = method->nin; + int nout = method->nout; + int all_defined = 1; + + for (int i = 0; i < nin + nout; i++) { + PyArray_DTypeMeta *dtype = dtypes[i]; + if (dtype == NULL) { + output_descrs[i] = NULL; + all_defined = 0; + continue; + } + if (NPY_DTYPE(input_descrs[i]) == dtype) { + output_descrs[i] = ensure_dtype_nbo(input_descrs[i]); + } + else { + output_descrs[i] = dtype->default_descr(dtype); + } + if (NPY_UNLIKELY(output_descrs[i] == NULL)) { + goto fail; + } + } + if (all_defined) { + return method->casting; + } + + if (NPY_UNLIKELY(nin == 0 || dtypes[0] == NULL)) { + /* Registration should reject this, so this would be indicates a bug */ + PyErr_SetString(PyExc_RuntimeError, + "Invalid use of default resolver without inputs or with " + "input or output DType incorrectly missing."); + goto fail; + } + /* We find the common dtype of all inputs, and use it for the unknowns */ + PyArray_DTypeMeta *common_dtype = dtypes[0]; + assert(common_dtype != NULL); + for (int i = 1; i < nin; i++) { + Py_SETREF(common_dtype, PyArray_CommonDType(common_dtype, dtypes[i])); + if (common_dtype == NULL) { + goto fail; + } + } + for (int i = nin; i < nin + nout; i++) { + if (output_descrs[i] != NULL) { + continue; + } + if (NPY_DTYPE(input_descrs[i]) == common_dtype) { + output_descrs[i] = ensure_dtype_nbo(input_descrs[i]); + } + else { + output_descrs[i] = common_dtype->default_descr(common_dtype); + } + if (NPY_UNLIKELY(output_descrs[i] == NULL)) { + goto fail; + } + } + + return method->casting; + + fail: + for (int i = 0; i < nin + nout; i++) { + Py_XDECREF(output_descrs[i]); + } + return -1; +} + + +/** + * The default method to fetch the correct loop for a cast or ufunc + * (at the time of writing only casts). + * The default version can return loops explicitly registered during method + * creation. It does specialize contiguous loops, although has to check + * all descriptors itemsizes for this. + * + * @param context + * @param aligned + * @param move_references UNUSED. + * @param strides + * @param descriptors + * @param out_loop + * @param out_transferdata + * @param flags + * @return 0 on success -1 on failure. + */ +static int +default_get_strided_loop( + PyArrayMethod_Context *NPY_UNUSED(context), + int NPY_UNUSED(aligned), int NPY_UNUSED(move_references), + npy_intp *NPY_UNUSED(strides), + PyArray_StridedUnaryOp **NPY_UNUSED(out_loop), + NpyAuxData **NPY_UNUSED(out_transferdata), + NPY_ARRAYMETHOD_FLAGS *NPY_UNUSED(flags)) +{ + PyErr_SetString(PyExc_NotImplementedError, + "default loop getter is not implemented"); + return -1; +} + + +/** + * Validate that the input is usable to create a new ArrayMethod. + * + * @param spec + * @return 0 on success -1 on error. + */ +static int +validate_spec(PyArrayMethod_Spec *spec) +{ + int nargs = spec->nin + spec->nout; + /* Check the passed spec for invalid fields/values */ + if (spec->nin < 0 || spec->nout < 0 || nargs > NPY_MAXARGS) { + PyErr_Format(PyExc_ValueError, + "ArrayMethod inputs and outputs must be greater zero and" + "not exceed %d. (method: %s)", NPY_MAXARGS, spec->name); + return -1; + } + switch (spec->casting & ~_NPY_CAST_IS_VIEW) { + case NPY_NO_CASTING: + case NPY_EQUIV_CASTING: + case NPY_SAFE_CASTING: + case NPY_SAME_KIND_CASTING: + case NPY_UNSAFE_CASTING: + break; + default: + PyErr_Format(PyExc_TypeError, + "ArrayMethod has invalid casting `%d`. (method: %s)", + spec->casting, spec->name); + return -1; + } + + for (int i = 0; i < nargs; i++) { + if (spec->dtypes[i] == NULL && i < spec->nin) { + PyErr_Format(PyExc_TypeError, + "ArrayMethod must have well defined input DTypes. " + "(method: %s)", spec->name); + return -1; + } + if (!PyObject_TypeCheck(spec->dtypes[i], &PyArrayDTypeMeta_Type)) { + PyErr_Format(PyExc_TypeError, + "ArrayMethod provided object %R is not a DType." + "(method: %s)", spec->dtypes[i], spec->name); + return -1; + } + if (spec->dtypes[i]->abstract && i < spec->nin) { + PyErr_Format(PyExc_TypeError, + "abstract DType %S are currently not allowed for inputs." + "(method: %s defined at %s)", spec->dtypes[i], spec->name); + return -1; + } + } + return 0; +} + + +/** + * Initialize a new BoundArrayMethodObject from slots. Slots which are + * not provided may be filled with defaults. + * + * @param res The new PyBoundArrayMethodObject to be filled. + * @param spec The specification list passed by the user. + * @param private Private flag to limit certain slots to use in NumPy. + * @return -1 on error 0 on success + */ +static int +fill_arraymethod_from_slots( + PyBoundArrayMethodObject *res, PyArrayMethod_Spec *spec, + int private) +{ + PyArrayMethodObject *meth = res->method; + + /* Set the defaults */ + meth->get_strided_loop = &default_get_strided_loop; + meth->resolve_descriptors = &default_resolve_descriptors; + + /* Fill in the slots passed by the user */ + /* + * TODO: This is reasonable for now, but it would be nice to find a + * shorter solution, and add some additional error checking (e.g. + * the same slot used twice). Python uses an array of slot offsets. + */ + for (PyType_Slot *slot = &spec->slots[0]; slot->slot != 0; slot++) { + switch (slot->slot) { + case NPY_METH_resolve_descriptors: + meth->resolve_descriptors = slot->pfunc; + continue; + case NPY_METH_get_loop: + if (private) { + /* Only allow override for private functions initially */ + meth->get_strided_loop = slot->pfunc; + continue; + } + break; + case NPY_METH_strided_loop: + meth->strided_loop = slot->pfunc; + continue; + case NPY_METH_contiguous_loop: + meth->contiguous_loop = slot->pfunc; + continue; + case NPY_METH_unaligned_strided_loop: + meth->unaligned_strided_loop = slot->pfunc; + continue; + case NPY_METH_unaligned_contiguous_loop: + meth->unaligned_contiguous_loop = slot->pfunc; + continue; + default: + break; + } + PyErr_Format(PyExc_RuntimeError, + "invalid slot number %d to ArrayMethod: %s", + slot->slot, spec->name); + return -1; + } + + /* Check whether the slots are valid: */ + if (meth->resolve_descriptors == &default_resolve_descriptors) { + for (int i = 0; i < meth->nin + meth->nout; i++) { + if (res->dtypes[i] == NULL) { + if (i < meth->nin) { + PyErr_Format(PyExc_TypeError, + "All input DTypes must be specified when using " + "the default `resolve_descriptors` function. " + "(method: %s)", spec->name); + return -1; + } + else if (meth->nin == 0) { + PyErr_Format(PyExc_TypeError, + "Must specify output DTypes or use custom " + "`resolve_descriptors` when there are no inputs. " + "(method: %s defined at %s)", spec->name); + return -1; + } + } + if (i >= meth->nin && res->dtypes[i]->parametric) { + PyErr_Format(PyExc_TypeError, + "must provide a `resolve_descriptors` function if any " + "output DType is parametric. (method: %s)", + spec->name); + return -1; + } + } + } + if (meth->get_strided_loop != &default_get_strided_loop) { + /* Do not check the actual loop fields. */ + return 0; + } + + /* Check whether the provided loops make sense. */ + if (meth->strided_loop == NULL) { + PyErr_Format(PyExc_TypeError, + "Must provide a strided inner loop function. (method: %s)", + spec->name); + return -1; + } + if (meth->contiguous_loop == NULL) { + meth->contiguous_loop = meth->strided_loop; + } + if (meth->unaligned_contiguous_loop != NULL && + meth->unaligned_strided_loop == NULL) { + PyErr_Format(PyExc_TypeError, + "Must provide unaligned strided inner loop when providing " + "a contiguous version. (method: %s)", spec->name); + return -1; + } + if ((meth->unaligned_strided_loop == NULL) != + !(meth->flags & NPY_METH_SUPPORTS_UNALIGNED)) { + PyErr_Format(PyExc_TypeError, + "Must provide unaligned strided inner loop when providing " + "a contiguous version. (method: %s)", spec->name); + return -1; + } + + return 0; +} + + +/** + * Create a new ArrayMethod (internal version). + * + * @param name A name for the individual method, may be NULL. + * @param spec A filled context object to pass generic information about + * the method (such as usually needing the API, and the DTypes). + * Unused fields must be NULL. + * @param slots Slots with the correct pair of IDs and (function) pointers. + * @param private Some slots are currently considered private, if not true, + * these will be rejected. + * + * @returns A new (bound) ArrayMethod object. + */ +NPY_NO_EXPORT PyBoundArrayMethodObject * +PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private) +{ + int nargs = spec->nin + spec->nout; + + if (spec->name == NULL) { + spec->name = "<unknown>"; + } + + if (validate_spec(spec) < 0) { + return NULL; + } + + PyBoundArrayMethodObject *res; + res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type); + if (res == NULL) { + return NULL; + } + res->method = NULL; + + res->dtypes = PyMem_Malloc(sizeof(PyArray_DTypeMeta *) * nargs); + if (res->dtypes == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + for (int i = 0; i < nargs ; i++) { + Py_XINCREF(spec->dtypes[i]); + res->dtypes[i] = spec->dtypes[i]; + } + + res->method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (res->method == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + memset((char *)(res->method) + sizeof(PyObject), 0, + sizeof(PyArrayMethodObject) - sizeof(PyObject)); + + res->method->nin = spec->nin; + res->method->nout = spec->nout; + res->method->flags = spec->flags; + res->method->casting = spec->casting; + if (fill_arraymethod_from_slots(res, spec, private) < 0) { + Py_DECREF(res); + return NULL; + } + + ssize_t length = strlen(spec->name); + res->method->name = PyMem_Malloc(length + 1); + if (res->method->name == NULL) { + Py_DECREF(res); + PyErr_NoMemory(); + return NULL; + } + strcpy(res->method->name, spec->name); + + return res; +} + + +static void +arraymethod_dealloc(PyObject *self) +{ + PyArrayMethodObject *meth; + meth = ((PyArrayMethodObject *)self); + + PyMem_Free(meth->name); + + Py_TYPE(self)->tp_free(self); +} + + +NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "numpy._ArrayMethod", + .tp_basicsize = sizeof(PyArrayMethodObject), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_dealloc = arraymethod_dealloc, +}; + + + +static PyObject * +boundarraymethod_repr(PyBoundArrayMethodObject *self) +{ + int nargs = self->method->nin + self->method->nout; + PyObject *dtypes = PyTuple_New(nargs); + if (dtypes == NULL) { + return NULL; + } + for (int i = 0; i < nargs; i++) { + Py_INCREF(self->dtypes[i]); + PyTuple_SET_ITEM(dtypes, i, (PyObject *)self->dtypes[i]); + } + return PyUnicode_FromFormat( + "<np._BoundArrayMethod `%s` for dtypes %S>", + self->method->name, dtypes); +} + + +static void +boundarraymethod_dealloc(PyObject *self) +{ + PyBoundArrayMethodObject *meth; + meth = ((PyBoundArrayMethodObject *)self); + int nargs = meth->method->nin + meth->method->nout; + + for (int i = 0; i < nargs; i++) { + Py_XDECREF(meth->dtypes[i]); + } + PyMem_Free(meth->dtypes); + + Py_XDECREF(meth->method); + + Py_TYPE(self)->tp_free(self); +} + + +/* + * Calls resolve_descriptors() and returns the casting level and the resolved + * descriptors as a tuple. If the operation is impossible returns (-1, None). + * May raise an error, but usually should not. + * The function validates the casting attribute compared to the returned + * casting level. + */ +static PyObject * +boundarraymethod__resolve_descripors( + PyBoundArrayMethodObject *self, PyObject *descr_tuple) +{ + int nin = self->method->nin; + int nout = self->method->nout; + + PyArray_Descr *given_descrs[NPY_MAXARGS]; + PyArray_Descr *loop_descrs[NPY_MAXARGS]; + + if (!PyTuple_CheckExact(descr_tuple) || + PyTuple_Size(descr_tuple) != nin + nout) { + PyErr_Format(PyExc_ValueError, + "_resolve_descriptors() takes exactly one tuple with as many " + "elements as the method takes arguments (%d+%d).", nin, nout); + return NULL; + } + + for (int i = 0; i < nin + nout; i++) { + PyObject *tmp = PyTuple_GetItem(descr_tuple, i); + if (tmp == NULL) { + return NULL; + } + else if (tmp == Py_None) { + if (i < nin) { + PyErr_SetString(PyExc_ValueError, + "only output dtypes may be omitted (set to None)."); + return NULL; + } + given_descrs[i] = NULL; + } + else if (PyArray_DescrCheck(tmp)) { + if (Py_TYPE(tmp) != (PyTypeObject *)self->dtypes[i]) { + PyErr_Format(PyExc_ValueError, + "input dtype %S was not an exact instance of the bound " + "DType class %S.", tmp, self->dtypes[i]); + return NULL; + } + given_descrs[i] = (PyArray_Descr *)tmp; + } + else { + PyErr_SetString(PyExc_TypeError, + "dtype tuple can only contain dtype instances or None."); + return NULL; + } + } + + NPY_CASTING casting = self->method->resolve_descriptors( + self->method, self->dtypes, given_descrs, loop_descrs); + + if (casting < 0 && PyErr_Occurred()) { + return NULL; + } + else if (casting < 0) { + return Py_BuildValue("iO", casting, Py_None); + } + + PyObject *result_tuple = PyTuple_New(nin + nout); + if (result_tuple == NULL) { + return NULL; + } + for (int i = 0; i < nin + nout; i++) { + /* transfer ownership to the tuple. */ + PyTuple_SET_ITEM(result_tuple, i, (PyObject *)loop_descrs[i]); + } + + /* + * The casting flags should be the most generic casting level (except the + * cast-is-view flag. If no input is parametric, it must match exactly. + */ + int parametric = 0; + for (int i = 0; i < nin + nout; i++) { + if (self->dtypes[i]->parametric) { + parametric = 1; + break; + } + } + if (!parametric) { + /* + * Non-parametric can only mismatch if it switches from no to equiv + * (e.g. due to byteorder changes). + */ + if (self->method->casting != (casting & ~_NPY_CAST_IS_VIEW) && + !(self->method->casting == NPY_NO_CASTING && + casting == NPY_EQUIV_CASTING)) { + PyErr_Format(PyExc_RuntimeError, + "resolve_descriptors cast level did not match stored one " + "(expected %d, got %d) for method %s", + self->method->casting, (casting & ~_NPY_CAST_IS_VIEW), + self->method->name); + Py_DECREF(result_tuple); + return NULL; + } + } + else { + NPY_CASTING cast = casting & ~_NPY_CAST_IS_VIEW; + if (cast != PyArray_MinCastSafety(cast, self->method->casting)) { + PyErr_Format(PyExc_RuntimeError, + "resolve_descriptors cast level did not match stored one " + "(expected %d, got %d) for method %s", + self->method->casting, (casting & ~_NPY_CAST_IS_VIEW), + self->method->name); + Py_DECREF(result_tuple); + return NULL; + } + } + + return Py_BuildValue("iN", casting, result_tuple); +} + + +PyMethodDef boundarraymethod_methods[] = { + {"_resolve_descriptors", (PyCFunction)boundarraymethod__resolve_descripors, + METH_O, "Resolve the given dtypes."}, + {NULL, 0, 0, NULL}, +}; + + +static PyObject * +boundarraymethod__supports_unaligned(PyBoundArrayMethodObject *self) +{ + return PyBool_FromLong(self->method->flags & NPY_METH_SUPPORTS_UNALIGNED); +} + + +PyGetSetDef boundarraymethods_getters[] = { + {"_supports_unaligned", + (getter)boundarraymethod__supports_unaligned, NULL, + "whether the method supports unaligned inputs/outputs.", NULL}, + {NULL, NULL, NULL, NULL, NULL}, +}; + + +NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "numpy._BoundArrayMethod", + .tp_basicsize = sizeof(PyBoundArrayMethodObject), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_repr = (reprfunc)boundarraymethod_repr, + .tp_dealloc = boundarraymethod_dealloc, + .tp_methods = boundarraymethod_methods, + .tp_getset = boundarraymethods_getters, +}; diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h new file mode 100644 index 000000000..15ea948ce --- /dev/null +++ b/numpy/core/src/multiarray/array_method.h @@ -0,0 +1,150 @@ +#ifndef _NPY_ARRAY_METHOD_H +#define _NPY_ARRAY_METHOD_H + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#include <Python.h> +#include <numpy/ndarraytypes.h> +#include <lowlevel_strided_loops.h> + + +typedef enum { + /* Flag for whether the GIL is required */ + NPY_METH_REQUIRES_PYAPI = 1 << 1, + /* + * Some functions cannot set floating point error flags, this flag + * gives us the option (not requirement) to skip floating point error + * setup/check. No function should set error flags and ignore them + * since it would interfere with chaining operations (e.g. casting). + */ + NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, + /* Whether the method supports unaligned access (not runtime) */ + NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, + + /* All flags which can change at runtime */ + NPY_METH_RUNTIME_FLAGS = ( + NPY_METH_REQUIRES_PYAPI | + NPY_METH_NO_FLOATINGPOINT_ERRORS), +} NPY_ARRAYMETHOD_FLAGS; + + +struct PyArrayMethodObject_tag; + +/* + * This struct is specific to an individual (possibly repeated) call of + * the ArrayMethods strided operator, and as such is passed into the various + * methods of the ArrayMethod object (the resolve_descriptors function, + * the get_loop function and the individual lowlevel strided operator calls). + * It thus has to be persistent for one end-user call, and then be discarded. + * + * TODO: Before making this public, we should review which information should + * be stored on the Context/BoundArrayMethod vs. the ArrayMethod. + */ +typedef struct { + PyObject *caller; /* E.g. the original ufunc, may be NULL */ + struct PyArrayMethodObject_tag *method; + + /* Operand descriptors, filled in by resolve_descriptors */ + PyArray_Descr **descriptors; +} PyArrayMethod_Context; + + +typedef NPY_CASTING (resolve_descriptors_function)( + struct PyArrayMethodObject_tag *method, + PyArray_DTypeMeta **dtypes, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs); + + +typedef int (get_loop_function)( + PyArrayMethod_Context *context, + int aligned, int move_references, + npy_intp *strides, + PyArray_StridedUnaryOp **out_loop, + NpyAuxData **out_transferdata, + NPY_ARRAYMETHOD_FLAGS *flags); + + +/* + * This struct will be public and necessary for creating a new ArrayMethod + * object (casting and ufuncs). + * We could version the struct, although since we allow passing arbitrary + * data using the slots, and have flags, that may be enough? + * (See also PyBoundArrayMethodObject.) + */ +typedef struct { + const char *name; + int nin, nout; + NPY_CASTING casting; + NPY_ARRAYMETHOD_FLAGS flags; + PyArray_DTypeMeta **dtypes; + PyType_Slot *slots; +} PyArrayMethod_Spec; + + +/* + * Structure of the ArrayMethod. This structure should probably not be made + * public. If necessary, we can make certain operations on it public + * (e.g. to allow users indirect access to `get_strided_loop`). + * + * NOTE: In some cases, it may not be clear whether information should be + * stored here or on the bound version. E.g. `nin` and `nout` (and in the + * future the gufunc `signature`) is already stored on the ufunc so that + * storing these here duplicates the information. + */ +typedef struct PyArrayMethodObject_tag { + PyObject_HEAD + char *name; + int nin, nout; + /* Casting is normally "safe" for functions, but is important for casts */ + NPY_CASTING casting; + /* default flags. The get_strided_loop function can override these */ + NPY_ARRAYMETHOD_FLAGS flags; + resolve_descriptors_function *resolve_descriptors; + get_loop_function *get_strided_loop; + /* Typical loop functions (contiguous ones are used in current casts) */ + PyArray_StridedUnaryOp *strided_loop; + PyArray_StridedUnaryOp *contiguous_loop; + PyArray_StridedUnaryOp *unaligned_strided_loop; + PyArray_StridedUnaryOp *unaligned_contiguous_loop; +} PyArrayMethodObject; + + +/* + * We will sometimes have to create a ArrayMethod and allow passing it around, + * similar to `instance.method` returning a bound method, e.g. a function like + * `ufunc.resolve()` can return a bound object. + * The current main purpose of the BoundArrayMethod is that it holds on to the + * `dtypes` (the classes), so that the `ArrayMethod` (e.g. for casts) will + * not create references cycles. In principle, it could hold any information + * which is also stored on the ufunc (and thus does not need to be repeated + * on the `ArrayMethod` itself. + */ +typedef struct { + PyObject_HEAD + PyArray_DTypeMeta **dtypes; + PyArrayMethodObject *method; +} PyBoundArrayMethodObject; + + +extern NPY_NO_EXPORT PyTypeObject PyArrayMethod_Type; +extern NPY_NO_EXPORT PyTypeObject PyBoundArrayMethod_Type; + +/* + * SLOTS IDs For the ArrayMethod creation, one public, the IDs are fixed. + * TODO: Before making it public, consider adding a large constant to private + * slots. + */ +#define NPY_METH_resolve_descriptors 1 +#define NPY_METH_get_loop 2 +#define NPY_METH_strided_loop 3 +#define NPY_METH_contiguous_loop 4 +#define NPY_METH_unaligned_strided_loop 5 +#define NPY_METH_unaligned_contiguous_loop 6 + + +NPY_NO_EXPORT PyBoundArrayMethodObject * +PyArrayMethod_FromSpec_int(PyArrayMethod_Spec *spec, int private); + +#endif /*_NPY_ARRAY_METHOD_H*/ diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 5da1b5f29..a2474d79f 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -434,7 +434,9 @@ array_dealloc(PyArrayObject *self) { PyArrayObject_fields *fa = (PyArrayObject_fields *)self; - _dealloc_cached_buffer_info((PyObject*)self); + if (_buffer_info_free(fa->_buffer_info, (PyObject *)self) < 0) { + PyErr_WriteUnraisable(NULL); + } if (fa->weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *)self); @@ -1745,7 +1747,7 @@ array_free(PyObject * v) NPY_NO_EXPORT PyTypeObject PyArray_Type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "numpy.ndarray", - .tp_basicsize = NPY_SIZEOF_PYARRAYOBJECT, + .tp_basicsize = sizeof(PyArrayObject_fields), /* methods */ .tp_dealloc = (destructor)array_dealloc, .tp_repr = (reprfunc)array_repr, diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c index 1f4f676ba..813850224 100644 --- a/numpy/core/src/multiarray/buffer.c +++ b/numpy/core/src/multiarray/buffer.c @@ -428,31 +428,23 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str, /* - * Global information about all active buffers + * Information about all active buffers is stored as a linked list on + * the ndarray. The initial pointer is currently tagged to have a chance of + * detecting incompatible subclasses. * * Note: because for backward compatibility we cannot define bf_releasebuffer, * we must manually keep track of the additional data required by the buffers. */ /* Additional per-array data required for providing the buffer interface */ -typedef struct { +typedef struct _buffer_info_t_tag { char *format; int ndim; Py_ssize_t *strides; Py_ssize_t *shape; + struct _buffer_info_t_tag *next; } _buffer_info_t; -/* - * { id(array): [list of pointers to _buffer_info_t, the last one is latest] } - * - * Because shape, strides, and format can be different for different buffers, - * we may need to keep track of multiple buffer infos for each array. - * - * However, when none of them has changed, the same buffer info may be reused. - * - * Thread-safety is provided by GIL. - */ -static PyObject *_buffer_info_cache = NULL; /* Fill in the info structure */ static _buffer_info_t* @@ -564,6 +556,7 @@ _buffer_info_new(PyObject *obj, int flags) Py_DECREF(descr); info->format = NULL; } + info->next = NULL; return info; fail: @@ -596,145 +589,161 @@ _buffer_info_cmp(_buffer_info_t *a, _buffer_info_t *b) return 0; } -static void -_buffer_info_free(_buffer_info_t *info) -{ - if (info->format) { - PyObject_Free(info->format); - } - PyObject_Free(info); -} -/* Get buffer info from the global dictionary */ -static _buffer_info_t* -_buffer_get_info(PyObject *obj, int flags) +/* + * Tag the buffer info pointer by adding 2 (unless it is NULL to simplify + * object initialization). + * The linked list of buffer-infos was appended to the array struct in + * NumPy 1.20. Tagging the pointer gives us a chance to raise/print + * a useful error message instead of crashing hard if a C-subclass uses + * the same field. + */ +static NPY_INLINE void * +buffer_info_tag(void *buffer_info) { - PyObject *key = NULL, *item_list = NULL, *item = NULL; - _buffer_info_t *info = NULL, *old_info = NULL; - - if (_buffer_info_cache == NULL) { - _buffer_info_cache = PyDict_New(); - if (_buffer_info_cache == NULL) { - return NULL; - } + if (buffer_info == NULL) { + return buffer_info; } - - /* Compute information */ - info = _buffer_info_new(obj, flags); - if (info == NULL) { - return NULL; + else { + return (void *)((uintptr_t)buffer_info + 3); } +} - /* Check if it is identical with an old one; reuse old one, if yes */ - key = PyLong_FromVoidPtr((void*)obj); - if (key == NULL) { - goto fail; - } - item_list = PyDict_GetItem(_buffer_info_cache, key); - - if (item_list != NULL) { - Py_ssize_t item_list_length = PyList_GET_SIZE(item_list); - Py_INCREF(item_list); - if (item_list_length > 0) { - item = PyList_GetItem(item_list, item_list_length - 1); - old_info = (_buffer_info_t*)PyLong_AsVoidPtr(item); - if (_buffer_info_cmp(info, old_info) != 0) { - old_info = NULL; /* Can't use this one, but possibly next */ - - if (item_list_length > 1 && info->ndim > 1) { - /* - * Some arrays are C- and F-contiguous and if they have more - * than one dimension, the buffer-info may differ between - * the two due to RELAXED_STRIDES_CHECKING. - * If we export both buffers, the first stored one may be - * the one for the other contiguity, so check both. - * This is generally very unlikely in all other cases, since - * in all other cases the first one will match unless array - * metadata was modified in-place (which is discouraged). - */ - item = PyList_GetItem(item_list, item_list_length - 2); - old_info = (_buffer_info_t*)PyLong_AsVoidPtr(item); - if (_buffer_info_cmp(info, old_info) != 0) { - old_info = NULL; - } - } - } - if (old_info != NULL) { - /* - * The two info->format are considered equal if one of them - * has no format set (meaning the format is arbitrary and can - * be modified). If the new info has a format, but we reuse - * the old one, this transfers the ownership to the old one. - */ - if (old_info->format == NULL) { - old_info->format = info->format; - info->format = NULL; - } - _buffer_info_free(info); - info = old_info; - } - } +static NPY_INLINE int +_buffer_info_untag( + void *tagged_buffer_info, _buffer_info_t **buffer_info, PyObject *obj) +{ + if (tagged_buffer_info == NULL) { + *buffer_info = NULL; + return 0; } - else { - item_list = PyList_New(0); - if (item_list == NULL) { - goto fail; - } - if (PyDict_SetItem(_buffer_info_cache, key, item_list) != 0) { - goto fail; - } + if (NPY_UNLIKELY(((uintptr_t)tagged_buffer_info & 0x7) != 3)) { + PyErr_Format(PyExc_RuntimeError, + "Object of type %S appears to be C subclassed NumPy array, " + "void scalar, or allocated in a non-standard way." + "NumPy reserves the right to change the size of these " + "structures. Projects are required to take this into account " + "by either recompiling against a specific NumPy version or " + "padding the struct and enforcing a maximum NumPy version.", + Py_TYPE(obj)); + return -1; } + *buffer_info = (void *)((uintptr_t)tagged_buffer_info - 3); + return 0; +} - if (info != old_info) { - /* Needs insertion */ - item = PyLong_FromVoidPtr((void*)info); - if (item == NULL) { - goto fail; + +/* + * NOTE: for backward compatibility (esp. with PyArg_ParseTuple("s#", ...)) + * we do *not* define bf_releasebuffer at all. + * + * Instead, any extra data allocated with the buffer is released only in + * array_dealloc. + * + * Ensuring that the buffer stays in place is taken care by refcounting; + * ndarrays do not reallocate if there are references to them, and a buffer + * view holds one reference. + * + * This is stored in the array's _buffer_info slot (currently as a void *). + */ +static void +_buffer_info_free_untagged(void *_buffer_info) +{ + _buffer_info_t *next = _buffer_info; + while (next != NULL) { + _buffer_info_t *curr = next; + next = curr->next; + if (curr->format) { + PyObject_Free(curr->format); } - PyList_Append(item_list, item); - Py_DECREF(item); + /* Shape is allocated as part of info */ + PyObject_Free(curr); } +} - Py_DECREF(item_list); - Py_DECREF(key); - return info; -fail: - if (info != NULL && info != old_info) { - _buffer_info_free(info); +/* + * Checks whether the pointer is tagged, and then frees the cache list. + * (The tag check is only for transition due to changed structure size in 1.20) + */ +NPY_NO_EXPORT int +_buffer_info_free(void *buffer_info, PyObject *obj) +{ + _buffer_info_t *untagged_buffer_info; + if (_buffer_info_untag(buffer_info, &untagged_buffer_info, obj) < 0) { + return -1; } - Py_XDECREF(item_list); - Py_XDECREF(key); - return NULL; + _buffer_info_free_untagged(untagged_buffer_info); + return 0; } -/* Clear buffer info from the global dictionary */ -static void -_buffer_clear_info(PyObject *arr) + +/* + * Get the buffer info returning either the old one (passed in) or a new + * buffer info which adds holds on to (and thus replaces) the old one. + */ +static _buffer_info_t* +_buffer_get_info(void **buffer_info_cache_ptr, PyObject *obj, int flags) { - PyObject *key, *item_list, *item; - _buffer_info_t *info; - int k; + _buffer_info_t *info = NULL; + _buffer_info_t *stored_info; /* First currently stored buffer info */ + + if (_buffer_info_untag(*buffer_info_cache_ptr, &stored_info, obj) < 0) { + return NULL; + } + _buffer_info_t *old_info = stored_info; - if (_buffer_info_cache == NULL) { - return; + /* Compute information (it would be nice to skip this in simple cases) */ + info = _buffer_info_new(obj, flags); + if (info == NULL) { + return NULL; } - key = PyLong_FromVoidPtr((void*)arr); - item_list = PyDict_GetItem(_buffer_info_cache, key); - if (item_list != NULL) { - for (k = 0; k < PyList_GET_SIZE(item_list); ++k) { - item = PyList_GET_ITEM(item_list, k); - info = (_buffer_info_t*)PyLong_AsVoidPtr(item); - _buffer_info_free(info); + if (old_info != NULL && _buffer_info_cmp(info, old_info) != 0) { + _buffer_info_t *next_info = old_info->next; + old_info = NULL; /* Can't use this one, but possibly next */ + + if (info->ndim > 1 && next_info != NULL) { + /* + * Some arrays are C- and F-contiguous and if they have more + * than one dimension, the buffer-info may differ between + * the two due to RELAXED_STRIDES_CHECKING. + * If we export both buffers, the first stored one may be + * the one for the other contiguity, so check both. + * This is generally very unlikely in all other cases, since + * in all other cases the first one will match unless array + * metadata was modified in-place (which is discouraged). + */ + if (_buffer_info_cmp(info, next_info) == 0) { + old_info = next_info; + } + } + } + if (old_info != NULL) { + /* + * The two info->format are considered equal if one of them + * has no format set (meaning the format is arbitrary and can + * be modified). If the new info has a format, but we reuse + * the old one, this transfers the ownership to the old one. + */ + if (old_info->format == NULL) { + old_info->format = info->format; + info->format = NULL; } - PyDict_DelItem(_buffer_info_cache, key); + _buffer_info_free_untagged(info); + info = old_info; + } + else { + /* Insert new info as first item in the linked buffer-info list. */ + info->next = stored_info; + *buffer_info_cache_ptr = buffer_info_tag(info); } - Py_DECREF(key); + return info; } + /* * Retrieving buffers for ndarray */ @@ -779,8 +788,9 @@ array_getbuffer(PyObject *obj, Py_buffer *view, int flags) goto fail; } - /* Fill in information */ - info = _buffer_get_info(obj, flags); + /* Fill in information (and add it to _buffer_info if necessary) */ + info = _buffer_get_info( + &((PyArrayObject_fields *)self)->_buffer_info, obj, flags); if (info == NULL) { goto fail; } @@ -830,90 +840,48 @@ fail: } /* - * Retrieving buffers for scalars + * Retrieving buffers for void scalar (which can contain any complex types), + * defined in buffer.c since it requires the complex format building logic. */ -int +NPY_NO_EXPORT int void_getbuffer(PyObject *self, Py_buffer *view, int flags) { - _buffer_info_t *info = NULL; - PyArray_Descr *descr = NULL; - int elsize; + PyVoidScalarObject *scalar = (PyVoidScalarObject *)self; if (flags & PyBUF_WRITABLE) { PyErr_SetString(PyExc_BufferError, "scalar buffer is readonly"); - goto fail; - } - - /* Fill in information */ - info = _buffer_get_info(self, flags); - if (info == NULL) { - goto fail; - } - - view->ndim = info->ndim; - view->shape = info->shape; - view->strides = info->strides; - - if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) { - view->format = info->format; - } else { - view->format = NULL; - } - - descr = PyArray_DescrFromScalar(self); - view->buf = (void *)scalar_value(self, descr); - elsize = descr->elsize; - view->len = elsize; - if (PyArray_IsScalar(self, Datetime) || PyArray_IsScalar(self, Timedelta)) { - elsize = 1; /* descr->elsize,char is 8,'M', but we return 1,'B' */ + return -1; } - view->itemsize = elsize; - - Py_DECREF(descr); + view->ndim = 0; + view->shape = NULL; + view->strides = NULL; + view->suboffsets = NULL; + view->len = scalar->descr->elsize; + view->itemsize = scalar->descr->elsize; view->readonly = 1; view->suboffsets = NULL; - view->obj = self; Py_INCREF(self); - return 0; - -fail: - view->obj = NULL; - return -1; -} - -/* - * NOTE: for backward compatibility (esp. with PyArg_ParseTuple("s#", ...)) - * we do *not* define bf_releasebuffer at all. - * - * Instead, any extra data allocated with the buffer is released only in - * array_dealloc. - * - * Ensuring that the buffer stays in place is taken care by refcounting; - * ndarrays do not reallocate if there are references to them, and a buffer - * view holds one reference. - */ - -NPY_NO_EXPORT void -_dealloc_cached_buffer_info(PyObject *self) -{ - int reset_error_state = 0; - PyObject *ptype, *pvalue, *ptraceback; - - /* This function may be called when processing an exception -- - * we need to stash the error state to avoid confusing PyDict - */ + view->obj = self; + view->buf = scalar->obval; - if (PyErr_Occurred()) { - reset_error_state = 1; - PyErr_Fetch(&ptype, &pvalue, &ptraceback); + if (((flags & PyBUF_FORMAT) != PyBUF_FORMAT)) { + /* It is unnecessary to find the correct format */ + view->format = NULL; + return 0; } - _buffer_clear_info(self); - - if (reset_error_state) { - PyErr_Restore(ptype, pvalue, ptraceback); + /* + * If a format is being exported, we need to use _buffer_get_info + * to find the correct format. This format must also be stored, since + * at least in theory it can change (in practice it should never change). + */ + _buffer_info_t *info = _buffer_get_info(&scalar->_buffer_info, self, flags); + if (info == NULL) { + return -1; } + view->format = info->format; + return 0; } diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 061db2250..8ab592015 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1420,7 +1420,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args) { PyObject *obj; PyObject *str; - #if (PY_VERSION_HEX >= 0x030700A2) + #if PY_VERSION_HEX >= 0x030700A2 && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300) const char *docstr; #else char *docstr; diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index f700bdc99..f9dd35a73 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -18,10 +18,13 @@ #include "dtypemeta.h" #include "scalartypes.h" #include "mapping.h" +#include "legacy_dtype_implementation.h" #include "convert_datatype.h" #include "_datetime.h" #include "datetime_strings.h" +#include "array_method.h" +#include "usertypes.h" /* @@ -35,6 +38,183 @@ */ NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[] = {0, 3, 5, 10, 10, 20, 20, 20, 20}; + +static PyObject * +PyArray_GetGenericToVoidCastingImpl(void); + +static PyObject * +PyArray_GetVoidToGenericCastingImpl(void); + +static PyObject * +PyArray_GetGenericToObjectCastingImpl(void); + +static PyObject * +PyArray_GetObjectToGenericCastingImpl(void); + + +/** + * Fetch the casting implementation from one DType to another. + * + * @params from + * @params to + * + * @returns A castingimpl (PyArrayDTypeMethod *), None or NULL with an + * error set. + */ +static PyObject * +PyArray_GetCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyObject *res = PyDict_GetItem(from->castingimpls, (PyObject *)to); + if (res != NULL || PyErr_Occurred()) { + Py_XINCREF(res); + return res; + } + /* + * The following code looks up CastingImpl based on the fact that anything + * can be cast to and from objects or structured (void) dtypes. + * + * The last part adds casts dynamically based on legacy definition + */ + if (from->type_num == NPY_OBJECT) { + res = PyArray_GetObjectToGenericCastingImpl(); + } + else if (to->type_num == NPY_OBJECT) { + res = PyArray_GetGenericToObjectCastingImpl(); + } + else if (from->type_num == NPY_VOID) { + res = PyArray_GetVoidToGenericCastingImpl(); + } + else if (to->type_num == NPY_VOID) { + res = PyArray_GetGenericToVoidCastingImpl(); + } + else if (from->type_num < NPY_NTYPES && to->type_num < NPY_NTYPES) { + /* All builtin dtypes have their casts explicitly defined. */ + PyErr_Format(PyExc_RuntimeError, + "builtin cast from %S to %s not found, this should not " + "be possible.", from, to); + return NULL; + } + else { + if (from->parametric || to->parametric) { + Py_RETURN_NONE; + } + /* Reject non-legacy dtypes (they need to use the new API) */ + if (!from->legacy || !to->legacy) { + Py_RETURN_NONE; + } + if (from != to) { + /* A cast function must have been registered */ + PyArray_VectorUnaryFunc *castfunc = PyArray_GetCastFunc( + from->singleton, to->type_num); + if (castfunc == NULL) { + PyErr_Clear(); + /* Remember that this cast is not possible */ + if (PyDict_SetItem(from->castingimpls, (PyObject *) to, Py_None) < 0) { + return NULL; + } + Py_RETURN_NONE; + } + } + + /* PyArray_AddLegacyWrapping_CastingImpl find the correct casting level: */ + /* + * TODO: Possibly move this to the cast registration time. But if we do + * that, we have to also update the cast when the casting safety + * is registered. + */ + if (PyArray_AddLegacyWrapping_CastingImpl(from, to, -1) < 0) { + return NULL; + } + return PyArray_GetCastingImpl(from, to); + } + + if (res == NULL) { + return NULL; + } + if (PyDict_SetItem(from->castingimpls, (PyObject *)to, res) < 0) { + Py_DECREF(res); + return NULL; + } + return res; +} + + +/** + * Fetch the (bound) casting implementation from one DType to another. + * + * @params from + * @params to + * + * @returns A bound casting implementation or None (or NULL for error). + */ +static PyObject * +PyArray_GetBoundCastingImpl(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyObject *method = PyArray_GetCastingImpl(from, to); + if (method == NULL || method == Py_None) { + return method; + } + + /* TODO: Create better way to wrap method into bound method */ + PyBoundArrayMethodObject *res; + res = PyObject_New(PyBoundArrayMethodObject, &PyBoundArrayMethod_Type); + if (res == NULL) { + return NULL; + } + res->method = (PyArrayMethodObject *)method; + res->dtypes = PyMem_Malloc(2 * sizeof(PyArray_DTypeMeta *)); + if (res->dtypes == NULL) { + Py_DECREF(res); + return NULL; + } + Py_INCREF(from); + res->dtypes[0] = from; + Py_INCREF(to); + res->dtypes[1] = to; + + return (PyObject *)res; +} + + +NPY_NO_EXPORT PyObject * +_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args) +{ + PyArray_DTypeMeta *from, *to; + if (!PyArg_ParseTuple(args, "O!O!:_get_castingimpl", + &PyArrayDTypeMeta_Type, &from, &PyArrayDTypeMeta_Type, &to)) { + return NULL; + } + return PyArray_GetBoundCastingImpl(from, to); +} + + +/** + * Find the minimal cast safety level given two cast-levels as input. + * Supports the NPY_CAST_IS_VIEW check, and should be preferred to allow + * extending cast-levels if necessary. + * It is not valid for one of the arguments to be -1 to indicate an error. + * + * @param casting1 + * @param casting2 + * @return The minimal casting error (can be -1). + */ +NPY_NO_EXPORT NPY_CASTING +PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2) +{ + if (casting1 < 0 || casting2 < 0) { + return -1; + } + NPY_CASTING view = casting1 & casting2 & _NPY_CAST_IS_VIEW; + casting1 = casting1 & ~_NPY_CAST_IS_VIEW; + casting2 = casting2 & ~_NPY_CAST_IS_VIEW; + /* larger casting values are less safe */ + if (casting1 > casting2) { + return casting1 | view; + } + return casting2 | view; +} + + /*NUMPY_API * For backward compatibility * @@ -132,170 +312,6 @@ PyArray_GetCastFunc(PyArray_Descr *descr, int type_num) return NULL; } -/* - * Legacy function to find the correct dtype when casting from any built-in - * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic - * units. - * - * This function returns a dtype based on flex_dtype and the values in - * data_dtype. It also calls Py_DECREF on the flex_dtype. If the - * flex_dtype is not flexible, it returns it as-is. - * - * Usually, if data_obj is not an array, dtype should be the result - * given by the PyArray_GetArrayParamsFromObject function. - * - * If *flex_dtype is NULL, returns immediately, without setting an - * exception, leaving any previous error handling intact. - */ -NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype) -{ - PyArray_DatetimeMetaData *meta; - PyArray_Descr *retval = NULL; - int flex_type_num; - - if (flex_dtype == NULL) { - return retval; - } - - flex_type_num = flex_dtype->type_num; - - /* Flexible types with expandable size */ - if (PyDataType_ISUNSIZED(flex_dtype)) { - /* First replace the flex_dtype */ - retval = PyArray_DescrNew(flex_dtype); - Py_DECREF(flex_dtype); - if (retval == NULL) { - return retval; - } - - if (data_dtype->type_num == flex_type_num || - flex_type_num == NPY_VOID) { - (retval)->elsize = data_dtype->elsize; - } - else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { - npy_intp size = 8; - - /* - * Get a string-size estimate of the input. These - * are generallly the size needed, rounded up to - * a multiple of eight. - */ - switch (data_dtype->type_num) { - case NPY_BOOL: - case NPY_UBYTE: - case NPY_BYTE: - case NPY_USHORT: - case NPY_SHORT: - case NPY_UINT: - case NPY_INT: - case NPY_ULONG: - case NPY_LONG: - case NPY_ULONGLONG: - case NPY_LONGLONG: - if (data_dtype->kind == 'b') { - /* 5 chars needed for cast to 'True' or 'False' */ - size = 5; - } - else if (data_dtype->elsize > 8 || - data_dtype->elsize < 0) { - /* - * Element size should never be greater than 8 or - * less than 0 for integer type, but just in case... - */ - break; - } - else if (data_dtype->kind == 'u') { - size = REQUIRED_STR_LEN[data_dtype->elsize]; - } - else if (data_dtype->kind == 'i') { - /* Add character for sign symbol */ - size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; - } - break; - case NPY_HALF: - case NPY_FLOAT: - case NPY_DOUBLE: - size = 32; - break; - case NPY_LONGDOUBLE: - size = 48; - break; - case NPY_CFLOAT: - case NPY_CDOUBLE: - size = 2 * 32; - break; - case NPY_CLONGDOUBLE: - size = 2 * 48; - break; - case NPY_OBJECT: - size = 64; - break; - case NPY_STRING: - case NPY_VOID: - size = data_dtype->elsize; - break; - case NPY_UNICODE: - size = data_dtype->elsize / 4; - break; - case NPY_DATETIME: - meta = get_datetime_metadata_from_dtype(data_dtype); - if (meta == NULL) { - Py_DECREF(retval); - return NULL; - } - size = get_datetime_iso_8601_strlen(0, meta->base); - break; - case NPY_TIMEDELTA: - size = 21; - break; - } - - if (flex_type_num == NPY_STRING) { - retval->elsize = size; - } - else if (flex_type_num == NPY_UNICODE) { - retval->elsize = size * 4; - } - } - else { - /* - * We should never get here, but just in case someone adds - * a new flex dtype... - */ - PyErr_SetString(PyExc_TypeError, - "don't know how to adapt flex dtype"); - Py_DECREF(retval); - return NULL; - } - } - /* Flexible type with generic time unit that adapts */ - else if (flex_type_num == NPY_DATETIME || - flex_type_num == NPY_TIMEDELTA) { - meta = get_datetime_metadata_from_dtype(flex_dtype); - retval = flex_dtype; - if (meta == NULL) { - return NULL; - } - - if (meta->base == NPY_FR_GENERIC) { - if (data_dtype->type_num == NPY_DATETIME || - data_dtype->type_num == NPY_TIMEDELTA) { - meta = get_datetime_metadata_from_dtype(data_dtype); - if (meta == NULL) { - return NULL; - } - - retval = create_datetime_dtype(flex_type_num, meta); - Py_DECREF(flex_dtype); - } - } - } - else { - retval = flex_dtype; - } - return retval; -} /* * Must be broadcastable. @@ -325,42 +341,116 @@ PyArray_CastAnyTo(PyArrayObject *out, PyArrayObject *mp) return PyArray_CopyAnyInto(out, mp); } + +/** + * Given two dtype instances, find the correct casting safety. + * + * Note that in many cases, it may be preferable to fetch the casting + * implementations fully to have them available for doing the actual cast + * later. + * + * @param from + * @param to The descriptor to cast to (may be NULL) + * @param to_dtype If `to` is NULL, must pass the to_dtype (otherwise this + * is ignored). + * @return NPY_CASTING or -1 on error or if the cast is not possible. + */ +NPY_NO_EXPORT NPY_CASTING +PyArray_GetCastSafety( + PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype) +{ + NPY_CASTING casting; + if (to != NULL) { + to_dtype = NPY_DTYPE(to); + } + PyObject *meth = PyArray_GetCastingImpl(NPY_DTYPE(from), to_dtype); + if (meth == NULL) { + return -1; + } + if (meth == Py_None) { + Py_DECREF(Py_None); + return -1; + } + + PyArrayMethodObject *castingimpl = (PyArrayMethodObject *)meth; + + PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(from), to_dtype}; + PyArray_Descr *descrs[2] = {from, to}; + PyArray_Descr *out_descrs[2]; + + casting = castingimpl->resolve_descriptors( + castingimpl, dtypes, descrs, out_descrs); + Py_DECREF(meth); + if (casting < 0) { + return -1; + } + /* The returned descriptors may not match, requiring a second check */ + if (out_descrs[0] != descrs[0]) { + NPY_CASTING from_casting = PyArray_GetCastSafety( + descrs[0], out_descrs[0], NULL); + casting = PyArray_MinCastSafety(casting, from_casting); + if (casting < 0) { + goto finish; + } + } + if (descrs[1] != NULL && out_descrs[1] != descrs[1]) { + NPY_CASTING from_casting = PyArray_GetCastSafety( + descrs[1], out_descrs[1], NULL); + casting = PyArray_MinCastSafety(casting, from_casting); + if (casting < 0) { + goto finish; + } + } + + finish: + Py_DECREF(out_descrs[0]); + Py_DECREF(out_descrs[1]); + /* NPY_NO_CASTING has to be used for (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW) */ + assert(casting != (NPY_EQUIV_CASTING|_NPY_CAST_IS_VIEW)); + return casting; +} + + /*NUMPY_API *Check the type coercion rules. */ NPY_NO_EXPORT int PyArray_CanCastSafely(int fromtype, int totype) { - PyArray_Descr *from; - - /* Fast table lookup for small type numbers */ - if ((unsigned int)fromtype < NPY_NTYPES && - (unsigned int)totype < NPY_NTYPES) { - return _npy_can_cast_safely_table[fromtype][totype]; +#if NPY_USE_NEW_CASTINGIMPL + PyArray_DTypeMeta *from = PyArray_DTypeFromTypeNum(fromtype); + if (from == NULL) { + PyErr_WriteUnraisable(NULL); + return 0; } - - /* Identity */ - if (fromtype == totype) { - return 1; + PyArray_DTypeMeta *to = PyArray_DTypeFromTypeNum(totype); + if (to == NULL) { + PyErr_WriteUnraisable(NULL); + return 0; } + PyObject *castingimpl = PyArray_GetCastingImpl(from, to); + Py_DECREF(from); + Py_DECREF(to); - from = PyArray_DescrFromType(fromtype); - /* - * cancastto is a NPY_NOTYPE terminated C-int-array of types that - * the data-type can be cast to safely. - */ - if (from->f->cancastto) { - int *curtype = from->f->cancastto; - - while (*curtype != NPY_NOTYPE) { - if (*curtype++ == totype) { - return 1; - } - } + if (castingimpl == NULL) { + PyErr_WriteUnraisable(NULL); + return 0; } - return 0; + else if (castingimpl == Py_None) { + Py_DECREF(Py_None); + return 0; + } + NPY_CASTING safety = ((PyArrayMethodObject *)castingimpl)->casting; + int res = PyArray_MinCastSafety(safety, NPY_SAFE_CASTING) == NPY_SAFE_CASTING; + Py_DECREF(castingimpl); + return res; +#else + return PyArray_LegacyCanCastSafely(fromtype, totype); +#endif } + + /*NUMPY_API * leaves reference count alone --- cannot be NULL * @@ -370,117 +460,16 @@ PyArray_CanCastSafely(int fromtype, int totype) NPY_NO_EXPORT npy_bool PyArray_CanCastTo(PyArray_Descr *from, PyArray_Descr *to) { - int from_type_num = from->type_num; - int to_type_num = to->type_num; - npy_bool ret; - - ret = (npy_bool) PyArray_CanCastSafely(from_type_num, to_type_num); - if (ret) { - /* Check String and Unicode more closely */ - if (from_type_num == NPY_STRING) { - if (to_type_num == NPY_STRING) { - ret = (from->elsize <= to->elsize); - } - else if (to_type_num == NPY_UNICODE) { - ret = (from->elsize << 2 <= to->elsize); - } - } - else if (from_type_num == NPY_UNICODE) { - if (to_type_num == NPY_UNICODE) { - ret = (from->elsize <= to->elsize); - } - } - /* - * For datetime/timedelta, only treat casts moving towards - * more precision as safe. - */ - else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - return can_cast_datetime64_metadata(meta1, meta2, - NPY_SAFE_CASTING); - } - else if (from_type_num == NPY_TIMEDELTA && - to_type_num == NPY_TIMEDELTA) { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - return can_cast_timedelta64_metadata(meta1, meta2, - NPY_SAFE_CASTING); - } - /* - * If to_type_num is STRING or unicode - * see if the length is long enough to hold the - * stringified value of the object. - */ - else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { - /* - * Boolean value cast to string type is 5 characters max - * for string 'False'. - */ - int char_size = 1; - if (to_type_num == NPY_UNICODE) { - char_size = 4; - } - - ret = 0; - if (PyDataType_ISUNSIZED(to)) { - ret = 1; - } - /* - * Need at least 5 characters to convert from boolean - * to 'True' or 'False'. - */ - else if (from->kind == 'b' && to->elsize >= 5 * char_size) { - ret = 1; - } - else if (from->kind == 'u') { - /* Guard against unexpected integer size */ - if (from->elsize > 8 || from->elsize < 0) { - ret = 0; - } - else if (to->elsize >= - REQUIRED_STR_LEN[from->elsize] * char_size) { - ret = 1; - } - } - else if (from->kind == 'i') { - /* Guard against unexpected integer size */ - if (from->elsize > 8 || from->elsize < 0) { - ret = 0; - } - /* Extra character needed for sign */ - else if (to->elsize >= - (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { - ret = 1; - } - } - } - } - return ret; +#if NPY_USE_NEW_CASTINGIMPL + return PyArray_CanCastTypeTo(from, to, NPY_SAFE_CASTING); +#else + return PyArray_LegacyCanCastTo(from, to); +#endif } + /* Provides an ordering for the dtype 'kind' character codes */ -static int +NPY_NO_EXPORT int dtype_kind_to_ordering(char kind) { switch (kind) { @@ -541,51 +530,6 @@ type_num_unsigned_to_signed(int type_num) } } -/* - * Compare two field dictionaries for castability. - * - * Return 1 if 'field1' can be cast to 'field2' according to the rule - * 'casting', 0 if not. - * - * Castabiliy of field dictionaries is defined recursively: 'field1' and - * 'field2' must have the same field names (possibly in different - * orders), and the corresponding field types must be castable according - * to the given casting rule. - */ -static int -can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) -{ - Py_ssize_t ppos; - PyObject *key; - PyObject *tuple1, *tuple2; - - if (field1 == field2) { - return 1; - } - if (field1 == NULL || field2 == NULL) { - return 0; - } - if (PyDict_Size(field1) != PyDict_Size(field2)) { - return 0; - } - - /* Iterate over all the fields and compare for castability */ - ppos = 0; - while (PyDict_Next(field1, &ppos, &key, &tuple1)) { - if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) { - return 0; - } - /* Compare the dtype of the field for castability */ - if (!PyArray_CanCastTypeTo( - (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0), - (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0), - casting)) { - return 0; - } - } - - return 1; -} /*NUMPY_API * Returns true if data of type 'from' may be cast to data of type @@ -593,224 +537,41 @@ can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) */ NPY_NO_EXPORT npy_bool PyArray_CanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, - NPY_CASTING casting) + NPY_CASTING casting) { +#if NPY_USE_NEW_CASTINGIMPL /* - * Fast paths for equality and for basic types. + * NOTE: This code supports U and S, this is identical to the code + * in `ctors.c` which does not allow these dtypes to be attached + * to an array. Unlike the code for `np.array(..., dtype=)` + * which uses `PyArray_ExtractDTypeAndDescriptor` it rejects "m8" + * as a flexible dtype instance representing a DType. */ - if (from == to || - ((NPY_LIKELY(PyDataType_ISNUMBER(from)) || - PyDataType_ISOBJECT(from)) && - NPY_LIKELY(from->type_num == to->type_num) && - NPY_LIKELY(from->byteorder == to->byteorder))) { - return 1; - } - /* - * Cases with subarrays and fields need special treatment. - */ - if (PyDataType_HASFIELDS(from)) { - /* - * If from is a structured data type, then it can be cast to a simple - * non-object one only for unsafe casting *and* if it has a single - * field; recurse just in case the single field is itself structured. - */ - if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) { - if (casting == NPY_UNSAFE_CASTING && - PyDict_Size(from->fields) == 1) { - Py_ssize_t ppos = 0; - PyObject *tuple; - PyArray_Descr *field; - PyDict_Next(from->fields, &ppos, NULL, &tuple); - field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); - /* - * For a subarray, we need to get the underlying type; - * since we already are casting unsafely, we can ignore - * the shape. - */ - if (PyDataType_HASSUBARRAY(field)) { - field = field->subarray->base; - } - return PyArray_CanCastTypeTo(field, to, casting); - } - else { - return 0; - } - } - /* - * Casting from one structured data type to another depends on the fields; - * we pass that case on to the EquivTypenums case below. - * - * TODO: move that part up here? Need to check whether equivalent type - * numbers is an addition constraint that is needed. - * - * TODO/FIXME: For now, always allow structured to structured for unsafe - * casting; this is not correct, but needed since the treatment in can_cast - * below got out of sync with astype; see gh-13667. - */ - if (casting == NPY_UNSAFE_CASTING) { - return 1; - } - } - else if (PyDataType_HASFIELDS(to)) { - /* - * If "from" is a simple data type and "to" has fields, then only - * unsafe casting works (and that works always, even to multiple fields). - */ - return casting == NPY_UNSAFE_CASTING; - } /* - * Everything else we consider castable for unsafe for now. - * FIXME: ensure what we do here is consistent with "astype", - * i.e., deal more correctly with subarrays and user-defined dtype. + * TODO: We should grow support for `np.can_cast("d", "S")` being + * different from `np.can_cast("d", "S0")` here, at least for + * the python side API. */ - else if (casting == NPY_UNSAFE_CASTING) { - return 1; + NPY_CASTING safety; + if (PyDataType_ISUNSIZED(to) && to->subarray == NULL) { + safety = PyArray_GetCastSafety(from, NULL, NPY_DTYPE(to)); } - /* - * Equivalent simple types can be cast with any value of 'casting', but - * we need to be careful about structured to structured. - */ - if (PyArray_EquivTypenums(from->type_num, to->type_num)) { - /* For complicated case, use EquivTypes (for now) */ - if (PyTypeNum_ISUSERDEF(from->type_num) || - from->subarray != NULL) { - int ret; - - /* Only NPY_NO_CASTING prevents byte order conversion */ - if ((casting != NPY_NO_CASTING) && - (!PyArray_ISNBO(from->byteorder) || - !PyArray_ISNBO(to->byteorder))) { - PyArray_Descr *nbo_from, *nbo_to; - - nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE); - nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE); - if (nbo_from == NULL || nbo_to == NULL) { - Py_XDECREF(nbo_from); - Py_XDECREF(nbo_to); - PyErr_Clear(); - return 0; - } - ret = PyArray_EquivTypes(nbo_from, nbo_to); - Py_DECREF(nbo_from); - Py_DECREF(nbo_to); - } - else { - ret = PyArray_EquivTypes(from, to); - } - return ret; - } - - if (PyDataType_HASFIELDS(from)) { - switch (casting) { - case NPY_EQUIV_CASTING: - case NPY_SAFE_CASTING: - case NPY_SAME_KIND_CASTING: - /* - * `from' and `to' must have the same fields, and - * corresponding fields must be (recursively) castable. - */ - return can_cast_fields(from->fields, to->fields, casting); - - case NPY_NO_CASTING: - default: - return PyArray_EquivTypes(from, to); - } - } - - switch (from->type_num) { - case NPY_DATETIME: { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - if (casting == NPY_NO_CASTING) { - return PyArray_ISNBO(from->byteorder) == - PyArray_ISNBO(to->byteorder) && - can_cast_datetime64_metadata(meta1, meta2, casting); - } - else { - return can_cast_datetime64_metadata(meta1, meta2, casting); - } - } - case NPY_TIMEDELTA: { - PyArray_DatetimeMetaData *meta1, *meta2; - meta1 = get_datetime_metadata_from_dtype(from); - if (meta1 == NULL) { - PyErr_Clear(); - return 0; - } - meta2 = get_datetime_metadata_from_dtype(to); - if (meta2 == NULL) { - PyErr_Clear(); - return 0; - } - - if (casting == NPY_NO_CASTING) { - return PyArray_ISNBO(from->byteorder) == - PyArray_ISNBO(to->byteorder) && - can_cast_timedelta64_metadata(meta1, meta2, casting); - } - else { - return can_cast_timedelta64_metadata(meta1, meta2, casting); - } - } - default: - switch (casting) { - case NPY_NO_CASTING: - return PyArray_EquivTypes(from, to); - case NPY_EQUIV_CASTING: - return (from->elsize == to->elsize); - case NPY_SAFE_CASTING: - return (from->elsize <= to->elsize); - default: - return 1; - } - break; - } + else { + safety = PyArray_GetCastSafety(from, to, NPY_DTYPE(to)); } - /* If safe or same-kind casts are allowed */ - else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) { - if (PyArray_CanCastTo(from, to)) { - return 1; - } - else if(casting == NPY_SAME_KIND_CASTING) { - /* - * Also allow casting from lower to higher kinds, according - * to the ordering provided by dtype_kind_to_ordering. - * Some kinds, like datetime, don't fit in the hierarchy, - * and are special cased as -1. - */ - int from_order, to_order; - - from_order = dtype_kind_to_ordering(from->kind); - to_order = dtype_kind_to_ordering(to->kind); - - if (to->kind == 'm') { - /* both types being timedelta is already handled before. */ - int integer_order = dtype_kind_to_ordering('i'); - return (from_order != -1) && (from_order <= integer_order); - } - return (from_order != -1) && (from_order <= to_order); - } - else { - return 0; - } - } - /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */ - else { + if (safety < 0) { + PyErr_Clear(); return 0; } + /* If casting is the smaller (or equal) safety we match */ + return PyArray_MinCastSafety(safety, casting) == casting; +#else + return PyArray_LegacyCanCastTypeTo(from, to, casting); +#endif } + /* CanCastArrayTo needs this function */ static int min_scalar_type_num(char *valueptr, int type_num, int *is_small_unsigned); @@ -1035,7 +796,7 @@ ensure_dtype_nbo(PyArray_Descr *type) /** * This function should possibly become public API eventually. At this * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`. - * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement + * We will use `CastingImpl[from, to].resolve_descriptors(...)` to implement * this logic. * Before that, the API needs to be reviewed though. * @@ -1067,6 +828,35 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) return descr; } +#if NPY_USE_NEW_CASTINGIMPL + PyObject *tmp = PyArray_GetCastingImpl(NPY_DTYPE(descr), given_DType); + if (tmp == NULL || tmp == Py_None) { + Py_XDECREF(tmp); + goto error; + } + PyArray_DTypeMeta *dtypes[2] = {NPY_DTYPE(descr), given_DType}; + PyArray_Descr *given_descrs[2] = {descr, NULL}; + PyArray_Descr *loop_descrs[2]; + + PyArrayMethodObject *meth = (PyArrayMethodObject *)tmp; + NPY_CASTING casting = meth->resolve_descriptors( + meth, dtypes, given_descrs, loop_descrs); + Py_DECREF(tmp); + if (casting < 0) { + goto error; + } + Py_DECREF(loop_descrs[0]); + return loop_descrs[1]; + + error:; /* (; due to compiler limitations) */ + PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL; + PyErr_Fetch(&err_type, &err_value, &err_traceback); + PyErr_Format(PyExc_ValueError, + "cannot cast dtype %S to %S.", descr, given_DType); + npy_PyErr_ChainExceptions(err_type, err_value, err_traceback); + return NULL; + +#else /* NPY_USE_NEW_CASTS */ if (!given_DType->legacy) { PyErr_SetString(PyExc_NotImplementedError, "Must use casting to find the correct DType for a parametric " @@ -1077,6 +867,7 @@ PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType) PyArray_Descr *flex_dtype = PyArray_DescrNew(given_DType->singleton); return PyArray_AdaptFlexibleDType(descr, flex_dtype); +#endif /* NPY_USE_NEW_CASTS */ } @@ -2007,3 +1798,1108 @@ PyArray_ConvertToCommonType(PyObject *op, int *retn) PyDataMem_FREE(mps); return NULL; } + + +/** + * Private function to add a casting implementation by unwrapping a bound + * array method. + * + * @param meth + * @return 0 on success -1 on failure. + */ +NPY_NO_EXPORT int +PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth) +{ + if (meth->method->nin != 1 || meth->method->nout != 1) { + PyErr_SetString(PyExc_TypeError, + "A cast must have one input and one output."); + return -1; + } + if (meth->dtypes[0] == meth->dtypes[1]) { + if (!(meth->method->flags & NPY_METH_SUPPORTS_UNALIGNED)) { + PyErr_Format(PyExc_TypeError, + "A cast where input and output DType (class) are identical " + "must currently support unaligned data. (method: %s)", + meth->method->name); + return -1; + } + if ((meth->method->casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING) { + PyErr_Format(PyExc_TypeError, + "A cast where input and output DType (class) are identical " + "must signal `no-casting`. (method: %s)", + meth->method->name); + return -1; + } + } + if (PyDict_Contains(meth->dtypes[0]->castingimpls, + (PyObject *)meth->dtypes[1])) { + PyErr_Format(PyExc_RuntimeError, + "A cast was already added for %S -> %S. (method: %s)", + meth->dtypes[0], meth->dtypes[1], meth->method->name); + return -1; + } + if (PyDict_SetItem(meth->dtypes[0]->castingimpls, + (PyObject *)meth->dtypes[1], (PyObject *)meth->method) < 0) { + return -1; + } + return 0; +} + +/** + * Add a new casting implementation using a PyArrayMethod_Spec. + * + * @param spec + * @param private If private, allow slots not publically exposed. + * @return 0 on success -1 on failure + */ +NPY_NO_EXPORT int +PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private) +{ + /* Create a bound method, unbind and store it */ + PyBoundArrayMethodObject *meth = PyArrayMethod_FromSpec_int(spec, private); + if (meth == NULL) { + return -1; + } + int res = PyArray_AddCastingImplmentation(meth); + Py_DECREF(meth); + if (res < 0) { + return -1; + } + return 0; +} + + +NPY_NO_EXPORT NPY_CASTING +legacy_same_dtype_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + /* this function only makes sense for non-flexible legacy dtypes: */ + assert(loop_descrs[0]->elsize == loop_descrs[1]->elsize); + + /* + * Legacy dtypes (except datetime) only have byte-order and elsize as + * storage parameters. + */ + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + return NPY_EQUIV_CASTING; +} + + +/* + * Simple dtype resolver for casting between two different (non-parametric) + * (legacy) dtypes. + */ +NPY_NO_EXPORT NPY_CASTING +simple_cast_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + assert(dtypes[0]->legacy && dtypes[1]->legacy); + + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + if (given_descrs[1] != NULL) { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); + } + + if (self->casting != NPY_NO_CASTING) { + return self->casting; + } + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + return NPY_EQUIV_CASTING; +} + + +static int +add_numeric_cast(PyArray_DTypeMeta *from, PyArray_DTypeMeta *to) +{ + PyType_Slot slots[6]; + PyArray_DTypeMeta *dtypes[2] = {from, to}; + PyArrayMethod_Spec spec = { + .name = "numeric_cast", + .nin = 1, + .nout = 1, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .slots = slots, + .dtypes = dtypes, + }; + + npy_intp from_itemsize = dtypes[0]->singleton->elsize; + npy_intp to_itemsize = dtypes[1]->singleton->elsize; + + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &simple_cast_resolve_descriptors; + /* Fetch the optimized loops (2<<10 is a non-contiguous stride) */ + slots[1].slot = NPY_METH_strided_loop; + slots[1].pfunc = PyArray_GetStridedNumericCastFn( + 1, 2<<10, 2<<10, from->type_num, to->type_num); + slots[2].slot = NPY_METH_contiguous_loop; + slots[2].pfunc = PyArray_GetStridedNumericCastFn( + 1, from_itemsize, to_itemsize, from->type_num, to->type_num); + slots[3].slot = NPY_METH_unaligned_strided_loop; + slots[3].pfunc = PyArray_GetStridedNumericCastFn( + 0, 2<<10, 2<<10, from->type_num, to->type_num); + slots[4].slot = NPY_METH_unaligned_contiguous_loop; + slots[4].pfunc = PyArray_GetStridedNumericCastFn( + 0, from_itemsize, to_itemsize, from->type_num, to->type_num); + slots[5].slot = 0; + slots[5].pfunc = NULL; + + assert(slots[1].pfunc && slots[2].pfunc && slots[3].pfunc && slots[4].pfunc); + + /* Find the correct casting level, and special case no-cast */ + if (dtypes[0]->kind == dtypes[1]->kind && from_itemsize == to_itemsize) { + spec.casting = NPY_NO_CASTING; + + /* When there is no casting (equivalent C-types) use byteswap loops */ + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &legacy_same_dtype_resolve_descriptors; + slots[1].slot = NPY_METH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "numeric_copy_or_byteswap"; + spec.flags |= NPY_METH_NO_FLOATINGPOINT_ERRORS; + } + else if (_npy_can_cast_safely_table[from->type_num][to->type_num]) { + spec.casting = NPY_SAFE_CASTING; + } + else if (dtype_kind_to_ordering(dtypes[0]->kind) <= + dtype_kind_to_ordering(dtypes[1]->kind)) { + spec.casting = NPY_SAME_KIND_CASTING; + } + else { + spec.casting = NPY_UNSAFE_CASTING; + } + + /* Create a bound method, unbind and store it */ + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); +} + + +/* + * This registers the castingimpl for all casts between numeric types. + * Eventually, this function should likely be defined as part of a .c.src + * file to remove `PyArray_GetStridedNumericCastFn` entirely. + */ +static int +PyArray_InitializeNumericCasts(void) +{ + for (int from = 0; from < NPY_NTYPES; from++) { + if (!PyTypeNum_ISNUMBER(from) && from != NPY_BOOL) { + continue; + } + PyArray_DTypeMeta *from_dt = PyArray_DTypeFromTypeNum(from); + + for (int to = 0; to < NPY_NTYPES; to++) { + if (!PyTypeNum_ISNUMBER(to) && to != NPY_BOOL) { + continue; + } + PyArray_DTypeMeta *to_dt = PyArray_DTypeFromTypeNum(to); + int res = add_numeric_cast(from_dt, to_dt); + Py_DECREF(to_dt); + if (res < 0) { + Py_DECREF(from_dt); + return -1; + } + } + } + return 0; +} + + +static int +cast_to_string_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + /* + * NOTE: The following code used to be part of PyArray_AdaptFlexibleDType + * + * Get a string-size estimate of the input. These + * are generallly the size needed, rounded up to + * a multiple of eight. + */ + npy_intp size = -1; + switch (dtypes[0]->type_num) { + case NPY_BOOL: + case NPY_UBYTE: + case NPY_BYTE: + case NPY_USHORT: + case NPY_SHORT: + case NPY_UINT: + case NPY_INT: + case NPY_ULONG: + case NPY_LONG: + case NPY_ULONGLONG: + case NPY_LONGLONG: + assert(dtypes[0]->singleton->elsize <= 8); + assert(dtypes[0]->singleton->elsize > 0); + if (dtypes[0]->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (dtypes[0]->kind == 'u') { + size = REQUIRED_STR_LEN[dtypes[0]->singleton->elsize]; + } + else if (dtypes[0]->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[dtypes[0]->singleton->elsize] + 1; + } + break; + case NPY_HALF: + case NPY_FLOAT: + case NPY_DOUBLE: + size = 32; + break; + case NPY_LONGDOUBLE: + size = 48; + break; + case NPY_CFLOAT: + case NPY_CDOUBLE: + size = 2 * 32; + break; + case NPY_CLONGDOUBLE: + size = 2 * 48; + break; + case NPY_STRING: + case NPY_VOID: + size = given_descrs[0]->elsize; + break; + case NPY_UNICODE: + size = given_descrs[0]->elsize / 4; + break; + default: + PyErr_SetString(PyExc_SystemError, + "Impossible cast to string path requested."); + return -1; + } + if (dtypes[1]->type_num == NPY_UNICODE) { + size *= 4; + } + + if (given_descrs[1] == NULL) { + loop_descrs[1] = PyArray_DescrNewFromType(dtypes[1]->type_num); + if (loop_descrs[1] == NULL) { + return -1; + } + loop_descrs[1]->elsize = size; + } + else { + /* The legacy loop can handle mismatching itemsizes */ + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + + /* Set the input one as well (late for easier error management) */ + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + + if (self->casting == NPY_UNSAFE_CASTING) { + assert(dtypes[0]->type_num == NPY_UNICODE && + dtypes[1]->type_num == NPY_STRING); + return NPY_UNSAFE_CASTING; + } + assert(self->casting == NPY_SAFE_CASTING); + + if (loop_descrs[1]->elsize >= size) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; +} + + +static int +add_other_to_and_from_string_cast( + PyArray_DTypeMeta *string, PyArray_DTypeMeta *other) +{ + if (string == other) { + return 0; + } + + /* Casting from string, is always a simple legacy-style cast */ + if (other->type_num != NPY_STRING && other->type_num != NPY_UNICODE) { + if (PyArray_AddLegacyWrapping_CastingImpl( + string, other, NPY_UNSAFE_CASTING) < 0) { + return -1; + } + } + /* + * Casting to strings, is almost the same, but requires a custom resolver + * to define the correct string length. Right now we use a generic function + * for this. + */ + PyArray_DTypeMeta *dtypes[2] = {other, string}; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {NPY_METH_resolve_descriptors, &cast_to_string_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "legacy_cast_to_string", + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI, + .dtypes = dtypes, + .slots = slots, + }; + /* Almost everything can be safely cast to string (except unicode) */ + if (other->type_num != NPY_UNICODE) { + spec.casting = NPY_SAFE_CASTING; + } + else { + spec.casting = NPY_UNSAFE_CASTING; + } + + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); +} + + +NPY_NO_EXPORT NPY_CASTING +string_to_string_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(loop_descrs[0]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + if (loop_descrs[0]->elsize == loop_descrs[1]->elsize) { + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + else { + return NPY_EQUIV_CASTING; + } + } + else if (loop_descrs[0]->elsize <= loop_descrs[1]->elsize) { + return NPY_SAFE_CASTING; + } + return NPY_SAME_KIND_CASTING; +} + + +/* + * Add string casts. Right now all string casts are just legacy-wrapped ones + * (except string<->string and unicode<->unicode), but they do require + * custom type resolution for the string length. + * + * A bit like `object`, it could make sense to define a simpler protocol for + * string casts, however, we also need to remember that the itemsize of the + * output has to be found. + */ +static int +PyArray_InitializeStringCasts(void) +{ + int result = -1; + PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING); + PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE); + PyArray_DTypeMeta *other_dt = NULL; + + /* Add most casts as legacy ones */ + for (int other = 0; other < NPY_NTYPES; other++) { + if (PyTypeNum_ISDATETIME(other) || other == NPY_VOID || + other == NPY_OBJECT) { + continue; + } + other_dt = PyArray_DTypeFromTypeNum(other); + + /* The functions skip string == other_dt or unicode == other_dt */ + if (add_other_to_and_from_string_cast(string, other_dt) < 0) { + goto finish; + } + if (add_other_to_and_from_string_cast(unicode, other_dt) < 0) { + goto finish; + } + + Py_SETREF(other_dt, NULL); + } + + /* string<->string and unicode<->unicode have their own specialized casts */ + PyArray_DTypeMeta *dtypes[2]; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {NPY_METH_resolve_descriptors, &string_to_string_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "string_to_string_cast", + .casting = NPY_NO_CASTING, + .nin = 1, + .nout = 1, + .flags = (NPY_METH_REQUIRES_PYAPI | + NPY_METH_NO_FLOATINGPOINT_ERRORS | + NPY_METH_SUPPORTS_UNALIGNED), + .dtypes = dtypes, + .slots = slots, + }; + + dtypes[0] = string; + dtypes[1] = string; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto finish; + } + + dtypes[0] = unicode; + dtypes[1] = unicode; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto finish; + } + + result = 0; + finish: + Py_DECREF(string); + Py_DECREF(unicode); + Py_XDECREF(other_dt); + return result; +} + + +/* + * Small helper function to handle the case of `arr.astype(dtype="V")`. + * When the output descriptor is not passed, we always use `V<itemsize>` + * of the other dtype. + */ +static NPY_CASTING +cast_to_void_dtype_class( + PyArray_Descr **given_descrs, PyArray_Descr **loop_descrs) +{ + /* `dtype="V"` means unstructured currently (compare final path) */ + loop_descrs[1] = PyArray_DescrNewFromType(NPY_VOID); + if (loop_descrs[1] == NULL) { + return -1; + } + loop_descrs[1]->elsize = given_descrs[0]->elsize; + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW; +} + + +static NPY_CASTING +nonstructured_to_structured_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + NPY_CASTING casting; + + if (given_descrs[1] == NULL) { + return cast_to_void_dtype_class(given_descrs, loop_descrs); + } + + if (given_descrs[1]->subarray != NULL) { + /* + * We currently consider this at most a safe cast. It would be + * possible to allow a view if the field has exactly one element. + */ + casting = NPY_SAFE_CASTING; + /* Subarray dtype */ + NPY_CASTING base_casting = PyArray_GetCastSafety( + given_descrs[0], given_descrs[1]->subarray->base, NULL); + if (base_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, base_casting); + } + else if (given_descrs[1]->names != NULL) { + /* Structured dtype */ + if (PyTuple_Size(given_descrs[1]->names) == 0) { + /* TODO: This retained behaviour, but likely should be changed. */ + casting = NPY_UNSAFE_CASTING; + } + else { + /* Considered at most unsafe casting (but this could be changed) */ + casting = NPY_UNSAFE_CASTING; + if (PyTuple_Size(given_descrs[1]->names) == 1) { + /* A view may be acceptable */ + casting |= _NPY_CAST_IS_VIEW; + } + + Py_ssize_t pos = 0; + PyObject *key, *tuple; + while (PyDict_Next(given_descrs[1]->fields, &pos, &key, &tuple)) { + PyArray_Descr *field_descr = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); + NPY_CASTING field_casting = PyArray_GetCastSafety( + given_descrs[0], field_descr, NULL); + casting = PyArray_MinCastSafety(casting, field_casting); + if (casting < 0) { + return -1; + } + } + } + } + else { + /* Plain void type. This behaves much like a "view" */ + if (given_descrs[0]->elsize == given_descrs[1]->elsize && + !PyDataType_REFCHK(given_descrs[0])) { + /* + * A simple view, at the moment considered "safe" (the refcheck is + * probably not necessary, but more future proof + */ + casting = NPY_SAFE_CASTING | _NPY_CAST_IS_VIEW; + } + else if (given_descrs[0]->elsize <= given_descrs[1]->elsize) { + casting = NPY_SAFE_CASTING; + } + else { + casting = NPY_UNSAFE_CASTING; + } + } + + /* Void dtypes always do the full cast. */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + + return casting; +} + + +int give_bad_field_error(PyObject *key) +{ + if (!PyErr_Occurred()) { + PyErr_Format(PyExc_RuntimeError, + "Invalid or missing field %R, this should be impossible " + "and indicates a NumPy bug.", key); + } + return -1; +} + + +static PyObject * +PyArray_GetGenericToVoidCastingImpl(void) +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "any_to_void_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_SAFE_CASTING; + method->resolve_descriptors = &nonstructured_to_structured_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +static NPY_CASTING +structured_to_nonstructured_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + PyArray_Descr *base_descr; + + if (given_descrs[0]->subarray != NULL) { + base_descr = given_descrs[0]->subarray->base; + } + else if (given_descrs[0]->names != NULL) { + if (PyTuple_Size(given_descrs[0]->names) != 1) { + /* Only allow casting a single field */ + return -1; + } + PyObject *key = PyTuple_GetItem(given_descrs[0]->names, 0); + PyObject *base_tup = PyDict_GetItem(given_descrs[0]->fields, key); + base_descr = (PyArray_Descr *)PyTuple_GET_ITEM(base_tup, 0); + } + else { + /* + * unstructured voids are considered unsafe casts and defined, albeit, + * at this time they go back to legacy behaviour using getitem/setitem. + */ + base_descr = NULL; + } + + /* + * The cast is always considered unsafe, so the PyArray_GetCastSafety + * result currently does not matter. + */ + if (base_descr != NULL && PyArray_GetCastSafety( + base_descr, given_descrs[1], dtypes[1]) < 0) { + return -1; + } + + /* Void dtypes always do the full cast. */ + if (given_descrs[1] == NULL) { + loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); + /* + * Special case strings here, it should be useless (and only actually + * work for empty arrays). Possibly this should simply raise for + * all parametric DTypes. + */ + if (dtypes[1]->type_num == NPY_STRING) { + loop_descrs[1]->elsize = given_descrs[0]->elsize; + } + else if (dtypes[1]->type_num == NPY_UNICODE) { + loop_descrs[1]->elsize = given_descrs[0]->elsize * 4; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + return NPY_UNSAFE_CASTING; +} + + +static PyObject * +PyArray_GetVoidToGenericCastingImpl(void) +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->name = "void_to_any_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_UNSAFE_CASTING; + method->resolve_descriptors = &structured_to_nonstructured_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +/* + * Find the correct field casting safety. See the TODO note below, including + * in 1.20 (and later) this was based on field names rather than field order + * which it should be using. + * + * NOTE: In theory it would be possible to cache the all the field casting + * implementations on the dtype, to avoid duplicate work. + */ +static NPY_CASTING +can_cast_fields_safety(PyArray_Descr *from, PyArray_Descr *to) +{ + NPY_CASTING casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + + Py_ssize_t field_count = PyTuple_Size(from->names); + if (field_count != PyTuple_Size(to->names)) { + /* TODO: This should be rejected! */ + return NPY_UNSAFE_CASTING; + } + for (Py_ssize_t i = 0; i < field_count; i++) { + PyObject *from_key = PyTuple_GET_ITEM(from->names, i); + PyObject *from_tup = PyDict_GetItemWithError(from->fields, from_key); + if (from_tup == NULL) { + return give_bad_field_error(from_key); + } + PyArray_Descr *from_base = (PyArray_Descr*)PyTuple_GET_ITEM(from_tup, 0); + + /* + * TODO: This should use to_key (order), compare gh-15509 by + * by Allan Haldane. And raise an error on failure. + * (Fixing that may also requires fixing/changing promotion.) + */ + PyObject *to_tup = PyDict_GetItem(to->fields, from_key); + if (to_tup == NULL) { + return NPY_UNSAFE_CASTING; + } + PyArray_Descr *to_base = (PyArray_Descr*)PyTuple_GET_ITEM(to_tup, 0); + + NPY_CASTING field_casting = PyArray_GetCastSafety(from_base, to_base, NULL); + if (field_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, field_casting); + } + if (!(casting & _NPY_CAST_IS_VIEW)) { + assert((casting & ~_NPY_CAST_IS_VIEW) != NPY_NO_CASTING); + return casting; + } + + /* + * If the itemsize (includes padding at the end), fields, or names + * do not match, this cannot be a view and also not a "no" cast + * (identical dtypes). + * It may be possible that this can be relaxed in some cases. + */ + if (from->elsize != to->elsize) { + /* + * The itemsize may mismatch even if all fields and formats match + * (due to additional padding). + */ + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + + int cmp = PyObject_RichCompareBool(from->fields, to->fields, Py_EQ); + if (cmp != 1) { + if (cmp == -1) { + PyErr_Clear(); + } + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + cmp = PyObject_RichCompareBool(from->names, to->names, Py_EQ); + if (cmp != 1) { + if (cmp == -1) { + PyErr_Clear(); + } + return PyArray_MinCastSafety(casting, NPY_EQUIV_CASTING); + } + return casting; +} + + +static NPY_CASTING +void_to_void_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + NPY_CASTING casting; + + if (given_descrs[1] == NULL) { + /* This is weird, since it doesn't return the original descr, but... */ + return cast_to_void_dtype_class(given_descrs, loop_descrs); + } + + if (given_descrs[0]->names != NULL && given_descrs[1]->names != NULL) { + /* From structured to structured, need to check fields */ + casting = can_cast_fields_safety(given_descrs[0], given_descrs[1]); + } + else if (given_descrs[0]->names != NULL) { + return structured_to_nonstructured_resolve_descriptors( + self, dtypes, given_descrs, loop_descrs); + } + else if (given_descrs[1]->names != NULL) { + return nonstructured_to_structured_resolve_descriptors( + self, dtypes, given_descrs, loop_descrs); + } + else if (given_descrs[0]->subarray == NULL && + given_descrs[1]->subarray == NULL) { + /* Both are plain void dtypes */ + if (given_descrs[0]->elsize == given_descrs[1]->elsize) { + casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + else if (given_descrs[0]->elsize < given_descrs[1]->elsize) { + casting = NPY_SAFE_CASTING; + } + else { + casting = NPY_SAME_KIND_CASTING; + } + } + else { + /* + * At this point, one of the dtypes must be a subarray dtype, the + * other is definitely not a structured one. + */ + PyArray_ArrayDescr *from_sub = given_descrs[0]->subarray; + PyArray_ArrayDescr *to_sub = given_descrs[1]->subarray; + assert(from_sub || to_sub); + + /* If the shapes do not match, this is at most an unsafe cast */ + casting = NPY_UNSAFE_CASTING; + if (from_sub && to_sub) { + int res = PyObject_RichCompareBool(from_sub->shape, to_sub->shape, Py_EQ); + if (res < 0) { + return -1; + } + else if (res) { + /* Both are subarrays and the shape matches */ + casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + } + NPY_CASTING field_casting = PyArray_GetCastSafety( + given_descrs[0]->subarray->base, given_descrs[1]->subarray->base, NULL); + if (field_casting < 0) { + return -1; + } + casting = PyArray_MinCastSafety(casting, field_casting); + } + + /* Void dtypes always do the full cast. */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + + return casting; +} + + +/* + * This initializes the void to void cast. Voids include structured dtypes, + * which means that they can cast from and to any other dtype and, in that + * sense, are special (similar to Object). + */ +static int +PyArray_InitializeVoidToVoidCast(void) +{ + PyArray_DTypeMeta *Void = PyArray_DTypeFromTypeNum(NPY_VOID); + PyArray_DTypeMeta *dtypes[2] = {Void, Void}; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {NPY_METH_resolve_descriptors, &void_to_void_resolve_descriptors}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "void_to_void_cast", + .casting = NPY_NO_CASTING, + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_DECREF(Void); + return res; +} + + +/* + * Implement object to any casting implementation. Casting from object may + * require inspecting of all array elements (for parametric dtypes), and + * the resolver will thus reject all parametric dtypes if the out dtype + * is not provided. + */ +static NPY_CASTING +object_to_any_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + if (given_descrs[1] == NULL) { + /* + * This should not really be called, since object -> parametric casts + * require inspecting the object array. Allow legacy ones, the path + * here is that e.g. "M8" input is considered to be the DType class, + * and by allowing it here, we go back to the "M8" instance. + */ + if (dtypes[1]->parametric) { + PyErr_Format(PyExc_TypeError, + "casting from object to the parametric DType %S requires " + "the specified output dtype instance. " + "This may be a NumPy issue, since the correct instance " + "should be discovered automatically, however.", dtypes[1]); + return -1; + } + loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_UNSAFE_CASTING; +} + + +/* + * Casting to object is special since it is generic to all input dtypes. + */ +static PyObject * +PyArray_GetObjectToGenericCastingImpl(void) +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->nin = 1; + method->nout = 1; + method->name = "object_to_any_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_UNSAFE_CASTING; + method->resolve_descriptors = &object_to_any_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + + +/* Any object object is simple (could even use the default) */ +static NPY_CASTING +any_to_object_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + if (given_descrs[1] == NULL) { + loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); + if (loop_descrs[1] == NULL) { + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + return NPY_SAFE_CASTING; +} + + +/* + * Casting to object is special since it is generic to all input dtypes. + */ +static PyObject * +PyArray_GetGenericToObjectCastingImpl(void) +{ + static PyArrayMethodObject *method = NULL; + + if (method != NULL) { + Py_INCREF(method); + return (PyObject *)method; + } + + method = PyObject_New(PyArrayMethodObject, &PyArrayMethod_Type); + if (method == NULL) { + return PyErr_NoMemory(); + } + + method->nin = 1; + method->nout = 1; + method->name = "any_to_object_cast"; + method->flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + method->casting = NPY_SAFE_CASTING; + method->resolve_descriptors = &any_to_object_resolve_descriptors; + method->get_strided_loop = NULL; + + return (PyObject *)method; +} + + +static int +PyArray_InitializeObjectToObjectCast(void) +{ + /* + * The object dtype does not support byte order changes, so its cast + * is always a direct view. + */ + PyArray_DTypeMeta *Object = PyArray_DTypeFromTypeNum(NPY_OBJECT); + PyArray_DTypeMeta *dtypes[2] = {Object, Object}; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {0, NULL}}; + PyArrayMethod_Spec spec = { + .name = "object_to_object_cast", + .casting = NPY_NO_CASTING | _NPY_CAST_IS_VIEW, + .nin = 1, + .nout = 1, + .flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED, + .dtypes = dtypes, + .slots = slots, + }; + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_DECREF(Object); + return res; +} + + +NPY_NO_EXPORT int +PyArray_InitializeCasts() +{ + if (PyArray_InitializeNumericCasts() < 0) { + return -1; + } + if (PyArray_InitializeStringCasts() < 0) { + return -1; + } + if (PyArray_InitializeVoidToVoidCast() < 0) { + return -1; + } + if (PyArray_InitializeObjectToObjectCast() < 0) { + return -1; + } + /* Datetime casts are defined in datetime.c */ + if (PyArray_InitializeDatetimeCasts() < 0) { + return -1; + } + return 0; +} diff --git a/numpy/core/src/multiarray/convert_datatype.h b/numpy/core/src/multiarray/convert_datatype.h index a2b36b497..cc1930f77 100644 --- a/numpy/core/src/multiarray/convert_datatype.h +++ b/numpy/core/src/multiarray/convert_datatype.h @@ -1,6 +1,13 @@ #ifndef _NPY_ARRAY_CONVERT_DATATYPE_H_ #define _NPY_ARRAY_CONVERT_DATATYPE_H_ +#include "array_method.h" + +extern NPY_NO_EXPORT npy_intp REQUIRED_STR_LEN[]; + +NPY_NO_EXPORT PyObject * +_get_castingimpl(PyObject *NPY_UNUSED(module), PyObject *args); + NPY_NO_EXPORT PyArray_VectorUnaryFunc * PyArray_GetCastFunc(PyArray_Descr *descr, int type_num); @@ -16,6 +23,9 @@ PyArray_CommonDType(PyArray_DTypeMeta *dtype1, PyArray_DTypeMeta *dtype2); NPY_NO_EXPORT int PyArray_ValidType(int type); +NPY_NO_EXPORT int +dtype_kind_to_ordering(char kind); + /* Like PyArray_CanCastArrayTo */ NPY_NO_EXPORT npy_bool can_cast_scalar_to(PyArray_Descr *scal_type, char *scal_data, @@ -36,26 +46,37 @@ npy_set_invalid_cast_error( PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, NPY_CASTING casting, npy_bool scalar); -/* - * This function calls Py_DECREF on flex_dtype, and replaces it with - * a new dtype that has been adapted based on the values in data_dtype - * and data_obj. If the flex_dtype is not flexible, it returns it as-is. - * - * Usually, if data_obj is not an array, dtype should be the result - * given by the PyArray_GetArrayParamsFromObject function. - * - * The data_obj may be NULL if just a dtype is known for the source. - * - * If *flex_dtype is NULL, returns immediately, without setting an - * exception, leaving any previous error handling intact. - * - * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, - * and NPY_DATETIME with generic units. - */ -NPY_NO_EXPORT PyArray_Descr * -PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); - NPY_NO_EXPORT PyArray_Descr * PyArray_CastDescrToDType(PyArray_Descr *descr, PyArray_DTypeMeta *given_DType); +NPY_NO_EXPORT int +PyArray_AddCastingImplmentation(PyBoundArrayMethodObject *meth); + +NPY_NO_EXPORT int +PyArray_AddCastingImplementation_FromSpec(PyArrayMethod_Spec *spec, int private); + +NPY_NO_EXPORT NPY_CASTING +PyArray_MinCastSafety(NPY_CASTING casting1, NPY_CASTING casting2); + +NPY_NO_EXPORT NPY_CASTING +PyArray_GetCastSafety( + PyArray_Descr *from, PyArray_Descr *to, PyArray_DTypeMeta *to_dtype); + +NPY_NO_EXPORT NPY_CASTING +legacy_same_dtype_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta **dtypes, + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs); + +NPY_NO_EXPORT NPY_CASTING +simple_cast_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta **dtypes, + PyArray_Descr **input_descrs, + PyArray_Descr **loop_descrs); + +NPY_NO_EXPORT int +PyArray_InitializeCasts(void); + #endif diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 2426076b9..f6031e370 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -756,9 +756,11 @@ PyArray_NewFromDescr_int( Py_DECREF(descr); return NULL; } + fa->_buffer_info = NULL; fa->nd = nd; fa->dimensions = NULL; fa->data = NULL; + if (data == NULL) { fa->flags = NPY_ARRAY_DEFAULT; if (flags) { diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c index 4afc45fb6..9c1b606bb 100644 --- a/numpy/core/src/multiarray/datetime.c +++ b/numpy/core/src/multiarray/datetime.c @@ -25,6 +25,9 @@ #include "_datetime.h" #include "datetime_strings.h" #include "convert_datatype.h" +#include "array_method.h" +#include "dtypemeta.h" +#include "usertypes.h" /* * Computes the python `ret, d = divmod(d, unit)`. @@ -3725,3 +3728,375 @@ find_object_datetime_type(PyObject *obj, int type_num) return NULL; } } + + + + +/* + * Describes casting within datetimes or timedelta + */ +static NPY_CASTING +time_to_time_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + /* This is a within-dtype cast, which currently must handle byteswapping */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (given_descrs[1] == NULL) { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[0]); + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + int is_timedelta = given_descrs[0]->type_num == NPY_TIMEDELTA; + + if (given_descrs[0] == given_descrs[1]) { + return NPY_NO_CASTING | _NPY_CAST_IS_VIEW; + } + + NPY_CASTING byteorder_may_allow_view = 0; + if (PyDataType_ISNOTSWAPPED(loop_descrs[0]) == + PyDataType_ISNOTSWAPPED(loop_descrs[1])) { + byteorder_may_allow_view = _NPY_CAST_IS_VIEW; + } + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(loop_descrs[0]); + assert(meta1 != NULL); + meta2 = get_datetime_metadata_from_dtype(loop_descrs[1]); + assert(meta2 != NULL); + + if (meta1->base == meta2->base && meta1->num == meta2->num) { + if (byteorder_may_allow_view) { + return NPY_NO_CASTING | byteorder_may_allow_view; + } + return NPY_EQUIV_CASTING; + } + else if (meta1->base == NPY_FR_GENERIC) { + return NPY_SAFE_CASTING | byteorder_may_allow_view; + } + else if (meta2->base == NPY_FR_GENERIC) { + /* TODO: This is actually an invalid cast (casting will error) */ + return NPY_UNSAFE_CASTING; + } + else if (is_timedelta && ( + /* jump between time units and date units is unsafe for timedelta */ + (meta1->base <= NPY_FR_M && meta2->base > NPY_FR_M) || + (meta1->base > NPY_FR_M && meta2->base <= NPY_FR_M))) { + return NPY_UNSAFE_CASTING; + } + else if (meta1->base <= meta2->base) { + /* Casting to a more precise unit is currently considered safe */ + if (datetime_metadata_divides(meta1, meta2, is_timedelta)) { + /* If it divides, we consider it to be a safe cast */ + return NPY_SAFE_CASTING; + } + else { + return NPY_SAME_KIND_CASTING; + } + } + return NPY_SAME_KIND_CASTING; +} + + +/* Handles datetime<->timedelta type resolution (both directions) */ +static NPY_CASTING +datetime_to_timedelta_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + loop_descrs[0] = ensure_dtype_nbo(given_descrs[0]); + if (loop_descrs[0] == NULL) { + return -1; + } + if (given_descrs[1] == NULL) { + PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]); + assert(meta != NULL); + loop_descrs[1] = create_datetime_dtype(dtypes[1]->type_num, meta); + } + else { + loop_descrs[1] = ensure_dtype_nbo(given_descrs[1]); + } + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + /* + * Mostly NPY_UNSAFE_CASTING is not true, the cast will fail. + * TODO: Once ufuncs use dtype specific promotion rules, + * this is likely unnecessary + */ + return NPY_UNSAFE_CASTING; +} + + +/* In the current setup both strings and unicode casts support all outputs */ +static NPY_CASTING +time_to_string_resolve_descriptors( + PyArrayMethodObject *self, + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr **given_descrs, + PyArray_Descr **loop_descrs) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (given_descrs[1] != NULL) { + /* + * At the time of writing, NumPy does not check the length here, + * but will error if filling fails. + */ + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + else { + /* Find the correct string length, possibly based on the unit */ + int size; + if (given_descrs[0]->type_num == NPY_DATETIME) { + PyArray_DatetimeMetaData *meta = get_datetime_metadata_from_dtype(given_descrs[0]); + assert(meta != NULL); + size = get_datetime_iso_8601_strlen(0, meta->base); + } + else { + size = 21; + } + if (dtypes[1]->type_num == NPY_UNICODE) { + size *= 4; + } + loop_descrs[1] = PyArray_DescrNewFromType(dtypes[1]->type_num); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + loop_descrs[1]->elsize = size; + } + assert(self->casting == NPY_UNSAFE_CASTING); + return NPY_UNSAFE_CASTING; +} + + +static NPY_CASTING +string_to_datetime_cast_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + /* We currently support byte-swapping, so any (unicode) string is OK */ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + if (given_descrs[1] == NULL) { + /* NOTE: This doesn't actually work, and will error during the cast */ + loop_descrs[1] = dtypes[1]->default_descr(dtypes[1]); + if (loop_descrs[1] == NULL) { + Py_DECREF(loop_descrs[0]); + return -1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + return NPY_UNSAFE_CASTING; +} + + +/* + * This registers the castingimpl for all datetime related casts. + */ +NPY_NO_EXPORT int +PyArray_InitializeDatetimeCasts() +{ + int result = -1; + + PyType_Slot slots[3]; + PyArray_DTypeMeta *dtypes[2]; + PyArrayMethod_Spec spec = { + .name = "datetime_casts", + .nin = 1, + .nout = 1, + .casting = NPY_NO_CASTING, + .flags = NPY_METH_SUPPORTS_UNALIGNED, + .slots = slots, + .dtypes = dtypes, + }; + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &time_to_time_resolve_descriptors; + slots[1].slot = NPY_METH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + PyArray_DTypeMeta *datetime = PyArray_DTypeFromTypeNum(NPY_DATETIME); + PyArray_DTypeMeta *timedelta = PyArray_DTypeFromTypeNum(NPY_TIMEDELTA); + PyArray_DTypeMeta *string = PyArray_DTypeFromTypeNum(NPY_STRING); + PyArray_DTypeMeta *unicode = PyArray_DTypeFromTypeNum(NPY_UNICODE); + PyArray_DTypeMeta *tmp = NULL; + + dtypes[0] = datetime; + dtypes[1] = datetime; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + dtypes[0] = timedelta; + dtypes[1] = timedelta; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + /* + * Casting between timedelta and datetime uses legacy casting loops, but + * custom dtype resolution (to handle copying of the time unit). + */ + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &datetime_to_timedelta_resolve_descriptors; + slots[1].slot = NPY_METH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "timedelta_and_datetime_cast"; + dtypes[0] = timedelta; + dtypes[1] = datetime; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + spec.name = "datetime_to_timedelta_cast"; + dtypes[0] = datetime; + dtypes[1] = timedelta; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + /* + * Cast from numeric types to times. These use the cast functions + * as stored on the datatype, which should be replaced at some point. + * Some of these casts can fail (casting to unitless datetime), but these + * are rather special. + */ + for (int num = 0; num < NPY_NTYPES; num++) { + if (!PyTypeNum_ISNUMBER(num) && num != NPY_BOOL) { + continue; + } + + Py_XSETREF(tmp, PyArray_DTypeFromTypeNum(num)); + + if (PyArray_AddLegacyWrapping_CastingImpl( + tmp, datetime, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + datetime, tmp, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + + NPY_CASTING to_timedelta_casting = NPY_UNSAFE_CASTING; + if (PyTypeNum_ISINTEGER(num) || num == NPY_BOOL) { + /* timedelta casts like int64 right now... */ + if (PyTypeNum_ISUNSIGNED(num) && tmp->singleton->elsize == 8) { + to_timedelta_casting = NPY_SAME_KIND_CASTING; + } + else { + to_timedelta_casting = NPY_SAFE_CASTING; + } + } + if (PyArray_AddLegacyWrapping_CastingImpl( + tmp, timedelta, to_timedelta_casting) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + timedelta, tmp, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + } + + /* + * Cast times to string and unicode + */ + spec.casting = NPY_UNSAFE_CASTING; + /* + * Casts can error and need API (unicodes needs it for string->unicode). + * Unicode handling is currently implemented via a legacy cast. + */ + spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &time_to_string_resolve_descriptors; + slots[1].slot = NPY_METH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + for (int num = NPY_DATETIME; num <= NPY_TIMEDELTA; num++) { + for (int str = NPY_STRING; str <= NPY_UNICODE; str++) { + dtypes[0] = PyArray_DTypeFromTypeNum(num); + dtypes[1] = PyArray_DTypeFromTypeNum(str); + + int res = PyArray_AddCastingImplementation_FromSpec(&spec, 1); + Py_SETREF(dtypes[0], NULL); + Py_SETREF(dtypes[1], NULL); + if (res < 0) { + return -1; + } + } + } + + /* + * Cast strings to timedelta are currently only legacy casts + */ + if (PyArray_AddLegacyWrapping_CastingImpl( + string, timedelta, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + if (PyArray_AddLegacyWrapping_CastingImpl( + unicode, timedelta, NPY_UNSAFE_CASTING) < 0) { + goto fail; + } + + /* + * Cast strings to datetime + */ + dtypes[1] = datetime; + spec.casting = NPY_UNSAFE_CASTING; + + /* The default type resolution should work fine. */ + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &string_to_datetime_cast_resolve_descriptors; + slots[1].slot = NPY_METH_get_loop; + slots[1].pfunc = NULL; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + dtypes[0] = string; + spec.flags = NPY_METH_SUPPORTS_UNALIGNED; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + dtypes[0] = unicode; + /* + * Unicode handling is currently implemented via a legacy cast, which + * requires the Python API. + */ + spec.flags = NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI; + if (PyArray_AddCastingImplementation_FromSpec(&spec, 1) < 0) { + goto fail; + } + + result = 0; + fail: + Py_DECREF(datetime); + Py_DECREF(timedelta); + Py_DECREF(string); + Py_DECREF(unicode); + Py_XDECREF(tmp); + return result; +} + diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index af4e6c22e..630bd76f3 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -1006,9 +1006,8 @@ _strided_to_strided_string_to_datetime(char *dst, npy_intp dst_stride, /* * Assumes src_dtype and dst_dtype are both datetimes or both timedeltas */ -static int +NPY_NO_EXPORT int get_nbo_cast_datetime_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) @@ -1082,12 +1081,10 @@ get_nbo_cast_datetime_transfer_function(int aligned, return NPY_SUCCEED; } -static int -get_nbo_datetime_to_string_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) +NPY_NO_EXPORT int +get_nbo_datetime_to_string_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *src_meta; _strided_datetime_cast_data *data; @@ -1127,7 +1124,7 @@ get_nbo_datetime_to_string_transfer_function(int aligned, return NPY_SUCCEED; } -static int +NPY_NO_EXPORT int get_datetime_to_unicode_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1140,8 +1137,8 @@ get_datetime_to_unicode_transfer_function(int aligned, PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ - str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(dst_dtype, str_dtype); + str_dtype = PyArray_DescrNewFromType(NPY_STRING); + str_dtype->elsize = dst_dtype->elsize / 4; if (str_dtype == NULL) { return NPY_FAIL; } @@ -1156,10 +1153,9 @@ get_datetime_to_unicode_transfer_function(int aligned, } /* Get the NBO datetime to string aligned contig function */ - if (get_nbo_datetime_to_string_transfer_function(1, - src_dtype->elsize, str_dtype->elsize, - src_dtype, str_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { + if (get_nbo_datetime_to_string_transfer_function( + src_dtype, str_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); NPY_AUXDATA_FREE(todata); return NPY_FAIL; @@ -1198,12 +1194,10 @@ get_datetime_to_unicode_transfer_function(int aligned, return NPY_SUCCEED; } -static int -get_nbo_string_to_datetime_transfer_function(int aligned, - npy_intp src_stride, npy_intp dst_stride, - PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, - PyArray_StridedUnaryOp **out_stransfer, - NpyAuxData **out_transferdata) +NPY_NO_EXPORT int +get_nbo_string_to_datetime_transfer_function( + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + PyArray_StridedUnaryOp **out_stransfer, NpyAuxData **out_transferdata) { PyArray_DatetimeMetaData *dst_meta; _strided_datetime_cast_data *data; @@ -1250,7 +1244,7 @@ get_nbo_string_to_datetime_transfer_function(int aligned, return NPY_SUCCEED; } -static int +NPY_NO_EXPORT int get_unicode_to_datetime_transfer_function(int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1263,11 +1257,12 @@ get_unicode_to_datetime_transfer_function(int aligned, PyArray_Descr *str_dtype; /* Get an ASCII string data type, adapted to match the UNICODE one */ - str_dtype = PyArray_DescrFromType(NPY_STRING); - str_dtype = PyArray_AdaptFlexibleDType(src_dtype, str_dtype); + str_dtype = PyArray_DescrNewFromType(NPY_STRING); if (str_dtype == NULL) { return NPY_FAIL; } + assert(src_dtype->type_num == NPY_UNICODE); + str_dtype->elsize = src_dtype->elsize / 4; /* Get the cast operation from src */ if (PyArray_GetDTypeTransferFunction(aligned, @@ -1281,10 +1276,9 @@ get_unicode_to_datetime_transfer_function(int aligned, } /* Get the string to NBO datetime aligned contig function */ - if (get_nbo_string_to_datetime_transfer_function(1, - str_dtype->elsize, dst_dtype->elsize, - str_dtype, dst_dtype, - &caststransfer, &castdata) != NPY_SUCCEED) { + if (get_nbo_string_to_datetime_transfer_function( + str_dtype, dst_dtype, + &caststransfer, &castdata) != NPY_SUCCEED) { Py_DECREF(str_dtype); NPY_AUXDATA_FREE(todata); return NPY_FAIL; @@ -1323,7 +1317,7 @@ get_unicode_to_datetime_transfer_function(int aligned, } -static int +NPY_NO_EXPORT int get_legacy_dtype_cast_function( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1502,7 +1496,6 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder) || !PyArray_ISNBO(dst_dtype->byteorder); return get_nbo_cast_datetime_transfer_function(aligned, - src_stride, dst_stride, src_dtype, dst_dtype, out_stransfer, out_transferdata); } @@ -1518,10 +1511,8 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_api = 1; *out_needs_wrap = !PyArray_ISNBO(src_dtype->byteorder); return get_nbo_datetime_to_string_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); + src_dtype, dst_dtype, + out_stransfer, out_transferdata); case NPY_UNICODE: return get_datetime_to_unicode_transfer_function( @@ -1538,10 +1529,8 @@ get_nbo_cast_transfer_function(int aligned, *out_needs_api = 1; *out_needs_wrap = !PyArray_ISNBO(dst_dtype->byteorder); return get_nbo_string_to_datetime_transfer_function( - aligned, - src_stride, dst_stride, - src_dtype, dst_dtype, - out_stransfer, out_transferdata); + src_dtype, dst_dtype, + out_stransfer, out_transferdata); case NPY_UNICODE: return get_unicode_to_datetime_transfer_function( @@ -1561,7 +1550,7 @@ get_nbo_cast_transfer_function(int aligned, } -static int +NPY_NO_EXPORT int wrap_aligned_contig_transfer_function_with_copyswapn( int aligned, npy_intp src_stride, npy_intp dst_stride, PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, @@ -1570,7 +1559,7 @@ wrap_aligned_contig_transfer_function_with_copyswapn( PyArray_StridedUnaryOp *caststransfer, NpyAuxData *castdata) { NpyAuxData *todata = NULL, *fromdata = NULL; - PyArray_StridedUnaryOp *tobuffer, *frombuffer; + PyArray_StridedUnaryOp *tobuffer = NULL, *frombuffer = NULL; npy_intp src_itemsize = src_dtype->elsize; npy_intp dst_itemsize = dst_dtype->elsize; @@ -3768,6 +3757,53 @@ PyArray_GetDTypeTransferFunction(int aligned, out_needs_api); } + +/* + * Basic version of PyArray_GetDTypeTransferFunction for legacy dtype + * support. + * It supports only wrapping the copyswapn functions and the legacy + * cast functions registered with `PyArray_RegisterCastFunc`. + * This function takes the easy way out: It does not wrap + */ +NPY_NO_EXPORT int +PyArray_GetLegacyDTypeTransferFunction(int aligned, + npy_intp src_stride, npy_intp dst_stride, + PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype, + int move_references, + PyArray_StridedUnaryOp **out_stransfer, + NpyAuxData **out_transferdata, + int *out_needs_api) +{ + /* Note: We ignore `needs_wrap`; needs-wrap is handled by another cast */ + int needs_wrap = 0; + + if (src_dtype->type_num == dst_dtype->type_num) { + /* + * This is a cast within the same dtype. For legacy user-dtypes, + * it is always valid to handle this using the copy swap function. + */ + return wrap_copy_swap_function(aligned, + src_stride, dst_stride, + src_dtype, + PyArray_ISNBO(src_dtype->byteorder) != + PyArray_ISNBO(dst_dtype->byteorder), + out_stransfer, out_transferdata); + } + + if (get_legacy_dtype_cast_function(aligned, + src_stride, dst_stride, + src_dtype, dst_dtype, + move_references, + out_stransfer, + out_transferdata, + out_needs_api, + &needs_wrap) != NPY_SUCCEED) { + return NPY_FAIL; + } + return NPY_SUCCEED; +} + + NPY_NO_EXPORT int PyArray_GetMaskedDTypeTransferFunction(int aligned, npy_intp src_stride, diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c index e63a60738..4c11723e7 100644 --- a/numpy/core/src/multiarray/dtypemeta.c +++ b/numpy/core/src/multiarray/dtypemeta.c @@ -27,6 +27,7 @@ dtypemeta_dealloc(PyArray_DTypeMeta *self) { Py_XDECREF(self->scalar_type); Py_XDECREF(self->singleton); + Py_XDECREF(self->castingimpls); PyType_Type.tp_dealloc((PyObject *) self); } @@ -565,6 +566,12 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr) /* Let python finish the initialization (probably unnecessary) */ if (PyType_Ready((PyTypeObject *)dtype_class) < 0) { + Py_DECREF(dtype_class); + return -1; + } + dtype_class->castingimpls = PyDict_New(); + if (dtype_class->castingimpls == NULL) { + Py_DECREF(dtype_class); return -1; } diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.c b/numpy/core/src/multiarray/legacy_dtype_implementation.c new file mode 100644 index 000000000..3ce4710fd --- /dev/null +++ b/numpy/core/src/multiarray/legacy_dtype_implementation.c @@ -0,0 +1,716 @@ +/* + * This file hosts legacy implementations of certain functions for + * which alternatives exists, but the old functions are still required + * in certain code paths, or until the code transition is finalized. + * + * This code should typically not require modification, and if modified + * similar changes may be necessary in the new version. + */ + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include "numpy/arrayobject.h" +#include "scalartypes.h" +#include "_datetime.h" +#include "datetime_strings.h" +#include "convert_datatype.h" + +#include "legacy_dtype_implementation.h" + + +/* + * Compare the field dictionaries for two types. + * + * Return 1 if the field types and field names of the two descrs are equal and + * in the same order, 0 if not. + */ +static int +_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) { + + int val; + + if (type1->fields == type2->fields && type1->names == type2->names) { + return 1; + } + if (type1->fields == NULL || type2->fields == NULL) { + return 0; + } + + val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + return 1; +} + +/* + * Compare the subarray data for two types. + * Return 1 if they are the same, 0 if not. + */ +static int +_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) +{ + int val; + + if (sub1 == sub2) { + return 1; + + } + if (sub1 == NULL || sub2 == NULL) { + return 0; + } + + val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ); + if (val != 1 || PyErr_Occurred()) { + PyErr_Clear(); + return 0; + } + + return PyArray_EquivTypes(sub1->base, sub2->base); +} + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) +{ + int type_num1, type_num2, size1, size2; + + if (type1 == type2) { + return NPY_TRUE; + } + + type_num1 = type1->type_num; + type_num2 = type2->type_num; + size1 = type1->elsize; + size2 = type2->elsize; + + if (size1 != size2) { + return NPY_FALSE; + } + if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) { + return NPY_FALSE; + } + if (type1->subarray || type2->subarray) { + return ((type_num1 == type_num2) + && _equivalent_subarrays(type1->subarray, type2->subarray)); + } + if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) { + return ((type_num1 == type_num2) && _equivalent_fields(type1, type2)); + } + if (type_num1 == NPY_DATETIME + || type_num1 == NPY_TIMEDELTA + || type_num2 == NPY_DATETIME + || type_num2 == NPY_TIMEDELTA) { + return ((type_num1 == type_num2) + && has_equivalent_datetime_metadata(type1, type2)); + } + return type1->kind == type2->kind; +} + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypenums(int typenum1, int typenum2) +{ + PyArray_Descr *d1, *d2; + npy_bool ret; + + if (typenum1 == typenum2) { + return NPY_SUCCEED; + } + + d1 = PyArray_DescrFromType(typenum1); + d2 = PyArray_DescrFromType(typenum2); + ret = PyArray_LegacyEquivTypes(d1, d2); + Py_DECREF(d1); + Py_DECREF(d2); + return ret; +} + + +NPY_NO_EXPORT int +PyArray_LegacyCanCastSafely(int fromtype, int totype) +{ + PyArray_Descr *from; + + /* Fast table lookup for small type numbers */ + if ((unsigned int)fromtype < NPY_NTYPES && + (unsigned int)totype < NPY_NTYPES) { + return _npy_can_cast_safely_table[fromtype][totype]; + } + + /* Identity */ + if (fromtype == totype) { + return 1; + } + + from = PyArray_DescrFromType(fromtype); + /* + * cancastto is a NPY_NOTYPE terminated C-int-array of types that + * the data-type can be cast to safely. + */ + if (from->f->cancastto) { + int *curtype = from->f->cancastto; + + while (*curtype != NPY_NOTYPE) { + if (*curtype++ == totype) { + return 1; + } + } + } + return 0; +} + + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to) +{ + int from_type_num = from->type_num; + int to_type_num = to->type_num; + npy_bool ret; + + ret = (npy_bool) PyArray_LegacyCanCastSafely(from_type_num, to_type_num); + if (ret) { + /* Check String and Unicode more closely */ + if (from_type_num == NPY_STRING) { + if (to_type_num == NPY_STRING) { + ret = (from->elsize <= to->elsize); + } + else if (to_type_num == NPY_UNICODE) { + ret = (from->elsize << 2 <= to->elsize); + } + } + else if (from_type_num == NPY_UNICODE) { + if (to_type_num == NPY_UNICODE) { + ret = (from->elsize <= to->elsize); + } + } + /* + * For datetime/timedelta, only treat casts moving towards + * more precision as safe. + */ + else if (from_type_num == NPY_DATETIME && to_type_num == NPY_DATETIME) { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + return can_cast_datetime64_metadata(meta1, meta2, + NPY_SAFE_CASTING); + } + else if (from_type_num == NPY_TIMEDELTA && + to_type_num == NPY_TIMEDELTA) { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + return can_cast_timedelta64_metadata(meta1, meta2, + NPY_SAFE_CASTING); + } + /* + * If to_type_num is STRING or unicode + * see if the length is long enough to hold the + * stringified value of the object. + */ + else if (to_type_num == NPY_STRING || to_type_num == NPY_UNICODE) { + /* + * Boolean value cast to string type is 5 characters max + * for string 'False'. + */ + int char_size = 1; + if (to_type_num == NPY_UNICODE) { + char_size = 4; + } + + ret = 0; + if (PyDataType_ISUNSIZED(to)) { + ret = 1; + } + /* + * Need at least 5 characters to convert from boolean + * to 'True' or 'False'. + */ + else if (from->kind == 'b' && to->elsize >= 5 * char_size) { + ret = 1; + } + else if (from->kind == 'u') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + else if (to->elsize >= + REQUIRED_STR_LEN[from->elsize] * char_size) { + ret = 1; + } + } + else if (from->kind == 'i') { + /* Guard against unexpected integer size */ + if (from->elsize > 8 || from->elsize < 0) { + ret = 0; + } + /* Extra character needed for sign */ + else if (to->elsize >= + (REQUIRED_STR_LEN[from->elsize] + 1) * char_size) { + ret = 1; + } + } + } + } + return ret; +} + + +/* + * Compare two field dictionaries for castability. + * + * Return 1 if 'field1' can be cast to 'field2' according to the rule + * 'casting', 0 if not. + * + * Castabiliy of field dictionaries is defined recursively: 'field1' and + * 'field2' must have the same field names (possibly in different + * orders), and the corresponding field types must be castable according + * to the given casting rule. + */ +static int +can_cast_fields(PyObject *field1, PyObject *field2, NPY_CASTING casting) +{ + Py_ssize_t ppos; + PyObject *key; + PyObject *tuple1, *tuple2; + + if (field1 == field2) { + return 1; + } + if (field1 == NULL || field2 == NULL) { + return 0; + } + if (PyDict_Size(field1) != PyDict_Size(field2)) { + return 0; + } + + /* Iterate over all the fields and compare for castability */ + ppos = 0; + while (PyDict_Next(field1, &ppos, &key, &tuple1)) { + if ((tuple2 = PyDict_GetItem(field2, key)) == NULL) { + return 0; + } + /* Compare the dtype of the field for castability */ + if (!PyArray_CanCastTypeTo( + (PyArray_Descr *)PyTuple_GET_ITEM(tuple1, 0), + (PyArray_Descr *)PyTuple_GET_ITEM(tuple2, 0), + casting)) { + return 0; + } + } + + return 1; +} + + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, + NPY_CASTING casting) +{ + /* + * Fast paths for equality and for basic types. + */ + if (from == to || + ((NPY_LIKELY(PyDataType_ISNUMBER(from)) || + PyDataType_ISOBJECT(from)) && + NPY_LIKELY(from->type_num == to->type_num) && + NPY_LIKELY(from->byteorder == to->byteorder))) { + return 1; + } + /* + * Cases with subarrays and fields need special treatment. + */ + if (PyDataType_HASFIELDS(from)) { + /* + * If from is a structured data type, then it can be cast to a simple + * non-object one only for unsafe casting *and* if it has a single + * field; recurse just in case the single field is itself structured. + */ + if (!PyDataType_HASFIELDS(to) && !PyDataType_ISOBJECT(to)) { + if (casting == NPY_UNSAFE_CASTING && + PyDict_Size(from->fields) == 1) { + Py_ssize_t ppos = 0; + PyObject *tuple; + PyArray_Descr *field; + PyDict_Next(from->fields, &ppos, NULL, &tuple); + field = (PyArray_Descr *)PyTuple_GET_ITEM(tuple, 0); + /* + * For a subarray, we need to get the underlying type; + * since we already are casting unsafely, we can ignore + * the shape. + */ + if (PyDataType_HASSUBARRAY(field)) { + field = field->subarray->base; + } + return PyArray_LegacyCanCastTypeTo(field, to, casting); + } + else { + return 0; + } + } + /* + * Casting from one structured data type to another depends on the fields; + * we pass that case on to the EquivTypenums case below. + * + * TODO: move that part up here? Need to check whether equivalent type + * numbers is an addition constraint that is needed. + * + * TODO/FIXME: For now, always allow structured to structured for unsafe + * casting; this is not correct, but needed since the treatment in can_cast + * below got out of sync with astype; see gh-13667. + */ + if (casting == NPY_UNSAFE_CASTING) { + return 1; + } + } + else if (PyDataType_HASFIELDS(to)) { + /* + * If "from" is a simple data type and "to" has fields, then only + * unsafe casting works (and that works always, even to multiple fields). + */ + return casting == NPY_UNSAFE_CASTING; + } + /* + * Everything else we consider castable for unsafe for now. + * FIXME: ensure what we do here is consistent with "astype", + * i.e., deal more correctly with subarrays and user-defined dtype. + */ + else if (casting == NPY_UNSAFE_CASTING) { + return 1; + } + /* + * Equivalent simple types can be cast with any value of 'casting', but + * we need to be careful about structured to structured. + */ + if (PyArray_LegacyEquivTypenums(from->type_num, to->type_num)) { + /* For complicated case, use EquivTypes (for now) */ + if (PyTypeNum_ISUSERDEF(from->type_num) || + from->subarray != NULL) { + int ret; + + /* Only NPY_NO_CASTING prevents byte order conversion */ + if ((casting != NPY_NO_CASTING) && + (!PyArray_ISNBO(from->byteorder) || + !PyArray_ISNBO(to->byteorder))) { + PyArray_Descr *nbo_from, *nbo_to; + + nbo_from = PyArray_DescrNewByteorder(from, NPY_NATIVE); + nbo_to = PyArray_DescrNewByteorder(to, NPY_NATIVE); + if (nbo_from == NULL || nbo_to == NULL) { + Py_XDECREF(nbo_from); + Py_XDECREF(nbo_to); + PyErr_Clear(); + return 0; + } + ret = PyArray_LegacyEquivTypes(nbo_from, nbo_to); + Py_DECREF(nbo_from); + Py_DECREF(nbo_to); + } + else { + ret = PyArray_LegacyEquivTypes(from, to); + } + return ret; + } + + if (PyDataType_HASFIELDS(from)) { + switch (casting) { + case NPY_EQUIV_CASTING: + case NPY_SAFE_CASTING: + case NPY_SAME_KIND_CASTING: + /* + * `from' and `to' must have the same fields, and + * corresponding fields must be (recursively) castable. + */ + return can_cast_fields(from->fields, to->fields, casting); + + case NPY_NO_CASTING: + default: + return PyArray_LegacyEquivTypes(from, to); + } + } + + switch (from->type_num) { + case NPY_DATETIME: { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + if (casting == NPY_NO_CASTING) { + return PyArray_ISNBO(from->byteorder) == + PyArray_ISNBO(to->byteorder) && + can_cast_datetime64_metadata(meta1, meta2, casting); + } + else { + return can_cast_datetime64_metadata(meta1, meta2, casting); + } + } + case NPY_TIMEDELTA: { + PyArray_DatetimeMetaData *meta1, *meta2; + meta1 = get_datetime_metadata_from_dtype(from); + if (meta1 == NULL) { + PyErr_Clear(); + return 0; + } + meta2 = get_datetime_metadata_from_dtype(to); + if (meta2 == NULL) { + PyErr_Clear(); + return 0; + } + + if (casting == NPY_NO_CASTING) { + return PyArray_ISNBO(from->byteorder) == + PyArray_ISNBO(to->byteorder) && + can_cast_timedelta64_metadata(meta1, meta2, casting); + } + else { + return can_cast_timedelta64_metadata(meta1, meta2, casting); + } + } + default: + switch (casting) { + case NPY_NO_CASTING: + return PyArray_LegacyEquivTypes(from, to); + case NPY_EQUIV_CASTING: + return (from->elsize == to->elsize); + case NPY_SAFE_CASTING: + return (from->elsize <= to->elsize); + default: + return 1; + } + break; + } + } + /* If safe or same-kind casts are allowed */ + else if (casting == NPY_SAFE_CASTING || casting == NPY_SAME_KIND_CASTING) { + if (PyArray_LegacyCanCastTo(from, to)) { + return 1; + } + else if(casting == NPY_SAME_KIND_CASTING) { + /* + * Also allow casting from lower to higher kinds, according + * to the ordering provided by dtype_kind_to_ordering. + * Some kinds, like datetime, don't fit in the hierarchy, + * and are special cased as -1. + */ + int from_order, to_order; + + from_order = dtype_kind_to_ordering(from->kind); + to_order = dtype_kind_to_ordering(to->kind); + + if (to->kind == 'm') { + /* both types being timedelta is already handled before. */ + int integer_order = dtype_kind_to_ordering('i'); + return (from_order != -1) && (from_order <= integer_order); + } + + return (from_order != -1) && (from_order <= to_order); + } + else { + return 0; + } + } + /* NPY_NO_CASTING or NPY_EQUIV_CASTING was specified */ + else { + return 0; + } +} + + +/* + * Legacy function to find the correct dtype when casting from any built-in + * dtype to NPY_STRING, NPY_UNICODE, NPY_VOID, and NPY_DATETIME with generic + * units. + * + * This function returns a dtype based on flex_dtype and the values in + * data_dtype. It also calls Py_DECREF on the flex_dtype. If the + * flex_dtype is not flexible, it returns it as-is. + * + * Usually, if data_obj is not an array, dtype should be the result + * given by the PyArray_GetArrayParamsFromObject function. + * + * If *flex_dtype is NULL, returns immediately, without setting an + * exception, leaving any previous error handling intact. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype) +{ + PyArray_DatetimeMetaData *meta; + PyArray_Descr *retval = NULL; + int flex_type_num; + + if (flex_dtype == NULL) { + return retval; + } + + flex_type_num = flex_dtype->type_num; + + /* Flexible types with expandable size */ + if (PyDataType_ISUNSIZED(flex_dtype)) { + /* First replace the flex_dtype */ + retval = PyArray_DescrNew(flex_dtype); + Py_DECREF(flex_dtype); + if (retval == NULL) { + return retval; + } + + if (data_dtype->type_num == flex_type_num || + flex_type_num == NPY_VOID) { + (retval)->elsize = data_dtype->elsize; + } + else if (flex_type_num == NPY_STRING || flex_type_num == NPY_UNICODE) { + npy_intp size = 8; + + /* + * Get a string-size estimate of the input. These + * are generallly the size needed, rounded up to + * a multiple of eight. + */ + switch (data_dtype->type_num) { + case NPY_BOOL: + case NPY_UBYTE: + case NPY_BYTE: + case NPY_USHORT: + case NPY_SHORT: + case NPY_UINT: + case NPY_INT: + case NPY_ULONG: + case NPY_LONG: + case NPY_ULONGLONG: + case NPY_LONGLONG: + if (data_dtype->kind == 'b') { + /* 5 chars needed for cast to 'True' or 'False' */ + size = 5; + } + else if (data_dtype->elsize > 8 || + data_dtype->elsize < 0) { + /* + * Element size should never be greater than 8 or + * less than 0 for integer type, but just in case... + */ + break; + } + else if (data_dtype->kind == 'u') { + size = REQUIRED_STR_LEN[data_dtype->elsize]; + } + else if (data_dtype->kind == 'i') { + /* Add character for sign symbol */ + size = REQUIRED_STR_LEN[data_dtype->elsize] + 1; + } + break; + case NPY_HALF: + case NPY_FLOAT: + case NPY_DOUBLE: + size = 32; + break; + case NPY_LONGDOUBLE: + size = 48; + break; + case NPY_CFLOAT: + case NPY_CDOUBLE: + size = 2 * 32; + break; + case NPY_CLONGDOUBLE: + size = 2 * 48; + break; + case NPY_OBJECT: + size = 64; + break; + case NPY_STRING: + case NPY_VOID: + size = data_dtype->elsize; + break; + case NPY_UNICODE: + size = data_dtype->elsize / 4; + break; + case NPY_DATETIME: + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + Py_DECREF(retval); + return NULL; + } + size = get_datetime_iso_8601_strlen(0, meta->base); + break; + case NPY_TIMEDELTA: + size = 21; + break; + } + + if (flex_type_num == NPY_STRING) { + retval->elsize = size; + } + else if (flex_type_num == NPY_UNICODE) { + retval->elsize = size * 4; + } + } + else { + /* + * We should never get here, but just in case someone adds + * a new flex dtype... + */ + PyErr_SetString(PyExc_TypeError, + "don't know how to adapt flex dtype"); + Py_DECREF(retval); + return NULL; + } + } + /* Flexible type with generic time unit that adapts */ + else if (flex_type_num == NPY_DATETIME || + flex_type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(flex_dtype); + retval = flex_dtype; + if (meta == NULL) { + return NULL; + } + + if (meta->base == NPY_FR_GENERIC) { + if (data_dtype->type_num == NPY_DATETIME || + data_dtype->type_num == NPY_TIMEDELTA) { + meta = get_datetime_metadata_from_dtype(data_dtype); + if (meta == NULL) { + return NULL; + } + + retval = create_datetime_dtype(flex_type_num, meta); + Py_DECREF(flex_dtype); + } + } + } + else { + retval = flex_dtype; + } + return retval; +} diff --git a/numpy/core/src/multiarray/legacy_dtype_implementation.h b/numpy/core/src/multiarray/legacy_dtype_implementation.h new file mode 100644 index 000000000..ca171d773 --- /dev/null +++ b/numpy/core/src/multiarray/legacy_dtype_implementation.h @@ -0,0 +1,40 @@ +#ifndef _NPY_LEGACY_DTYPE_IMPLEMENTATION_H +#define _NPY_LEGACY_DTYPE_IMPLEMENTATION_H + + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypes(PyArray_Descr *type1, PyArray_Descr *type2); + +NPY_NO_EXPORT unsigned char +PyArray_LegacyEquivTypenums(int typenum1, int typenum2); + +NPY_NO_EXPORT int +PyArray_LegacyCanCastSafely(int fromtype, int totype); + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTo(PyArray_Descr *from, PyArray_Descr *to); + +NPY_NO_EXPORT npy_bool +PyArray_LegacyCanCastTypeTo(PyArray_Descr *from, PyArray_Descr *to, + NPY_CASTING casting); + +/* + * This function calls Py_DECREF on flex_dtype, and replaces it with + * a new dtype that has been adapted based on the values in data_dtype + * and data_obj. If the flex_dtype is not flexible, it returns it as-is. + * + * Usually, if data_obj is not an array, dtype should be the result + * given by the PyArray_GetArrayParamsFromObject function. + * + * The data_obj may be NULL if just a dtype is known for the source. + * + * If *flex_dtype is NULL, returns immediately, without setting an + * exception, leaving any previous error handling intact. + * + * The current flexible dtypes include NPY_STRING, NPY_UNICODE, NPY_VOID, + * and NPY_DATETIME with generic units. + */ +NPY_NO_EXPORT PyArray_Descr * +PyArray_AdaptFlexibleDType(PyArray_Descr *data_dtype, PyArray_Descr *flex_dtype); + +#endif /*_NPY_LEGACY_DTYPE_IMPLEMENTATION_H*/ diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 76df2337b..9c8bb4135 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -2180,7 +2180,7 @@ static PyObject * array_sizeof(PyArrayObject *self) { /* object + dimension and strides */ - Py_ssize_t nbytes = NPY_SIZEOF_PYARRAYOBJECT + + Py_ssize_t nbytes = Py_TYPE(self)->tp_basicsize + PyArray_NDIM(self) * sizeof(npy_intp) * 2; if (PyArray_CHKFLAGS(self, NPY_ARRAY_OWNDATA)) { nbytes += PyArray_NBYTES(self); diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 1aad70dc6..af5949e73 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -30,6 +30,8 @@ #include "npy_config.h" #include "npy_pycompat.h" #include "npy_import.h" +#include "convert_datatype.h" +#include "legacy_dtype_implementation.h" NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; @@ -1480,65 +1482,6 @@ array_putmask(PyObject *NPY_UNUSED(module), PyObject *args, PyObject *kwds) return PyArray_PutMask((PyArrayObject *)array, values, mask); } -/* - * Compare the field dictionaries for two types. - * - * Return 1 if the field types and field names of the two descrs are equal and - * in the same order, 0 if not. - */ -static int -_equivalent_fields(PyArray_Descr *type1, PyArray_Descr *type2) { - - int val; - - if (type1->fields == type2->fields && type1->names == type2->names) { - return 1; - } - if (type1->fields == NULL || type2->fields == NULL) { - return 0; - } - - val = PyObject_RichCompareBool(type1->fields, type2->fields, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - val = PyObject_RichCompareBool(type1->names, type2->names, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - return 1; -} - -/* - * Compare the subarray data for two types. - * Return 1 if they are the same, 0 if not. - */ -static int -_equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) -{ - int val; - - if (sub1 == sub2) { - return 1; - - } - if (sub1 == NULL || sub2 == NULL) { - return 0; - } - - val = PyObject_RichCompareBool(sub1->shape, sub2->shape, Py_EQ); - if (val != 1 || PyErr_Occurred()) { - PyErr_Clear(); - return 0; - } - - return PyArray_EquivTypes(sub1->base, sub2->base); -} - /*NUMPY_API * @@ -1548,40 +1491,24 @@ _equivalent_subarrays(PyArray_ArrayDescr *sub1, PyArray_ArrayDescr *sub2) NPY_NO_EXPORT unsigned char PyArray_EquivTypes(PyArray_Descr *type1, PyArray_Descr *type2) { - int type_num1, type_num2, size1, size2; - - if (type1 == type2) { - return NPY_TRUE; - } - - type_num1 = type1->type_num; - type_num2 = type2->type_num; - size1 = type1->elsize; - size2 = type2->elsize; - - if (size1 != size2) { - return NPY_FALSE; - } - if (PyArray_ISNBO(type1->byteorder) != PyArray_ISNBO(type2->byteorder)) { - return NPY_FALSE; - } - if (type1->subarray || type2->subarray) { - return ((type_num1 == type_num2) - && _equivalent_subarrays(type1->subarray, type2->subarray)); - } - if (type_num1 == NPY_VOID || type_num2 == NPY_VOID) { - return ((type_num1 == type_num2) && _equivalent_fields(type1, type2)); - } - if (type_num1 == NPY_DATETIME - || type_num1 == NPY_TIMEDELTA - || type_num2 == NPY_DATETIME - || type_num2 == NPY_TIMEDELTA) { - return ((type_num1 == type_num2) - && has_equivalent_datetime_metadata(type1, type2)); +#if NPY_USE_NEW_CASTINGIMPL + /* + * Do not use PyArray_CanCastTypeTo because it supports legacy flexible + * dtypes as input. + */ + NPY_CASTING safety = PyArray_GetCastSafety(type1, type2, NULL); + if (safety < 0) { + PyErr_Clear(); + return 0; } - return type1->kind == type2->kind; + /* If casting is "no casting" this dtypes are considered equivalent. */ + return PyArray_MinCastSafety(safety, NPY_NO_CASTING) == NPY_NO_CASTING; +#else + return PyArray_LegacyEquivTypes(type1, type2); +#endif } + /*NUMPY_API*/ NPY_NO_EXPORT unsigned char PyArray_EquivTypenums(int typenum1, int typenum2) @@ -2003,20 +1930,41 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) int alloc = 0; void *dptr; PyObject *ret; - + PyObject *base = NULL; if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|O:scalar", kwlist, &PyArrayDescr_Type, &typecode, &obj)) { return NULL; } if (PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) { - if (!PySequence_Check(obj)) { - PyErr_SetString(PyExc_TypeError, - "found non-sequence while unpickling scalar with " - "NPY_LIST_PICKLE set"); + if (typecode->type_num == NPY_OBJECT) { + /* Deprecated 2020-11-24, NumPy 1.20 */ + if (DEPRECATE( + "Unpickling a scalar with object dtype is deprecated. " + "Object scalars should never be created. If this was a " + "properly created pickle, please open a NumPy issue. In " + "a best effort this returns the original object.") < 0) { + return NULL; + } + Py_INCREF(obj); + return obj; + } + /* We store the full array to unpack it here: */ + if (!PyArray_CheckExact(obj)) { + /* We pickle structured voids as arrays currently */ + PyErr_SetString(PyExc_RuntimeError, + "Unpickling NPY_LIST_PICKLE (structured void) scalar " + "requires an array. The pickle file may be corrupted?"); return NULL; } - dptr = &obj; + if (!PyArray_EquivTypes(PyArray_DESCR((PyArrayObject *)obj), typecode)) { + PyErr_SetString(PyExc_RuntimeError, + "Pickled array is not compatible with requested scalar " + "dtype. The pickle file may be corrupted?"); + return NULL; + } + base = obj; + dptr = PyArray_BYTES((PyArrayObject *)obj); } else if (PyDataType_FLAGCHK(typecode, NPY_ITEM_IS_POINTER)) { @@ -2066,7 +2014,7 @@ array_scalar(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) dptr = PyBytes_AS_STRING(obj); } } - ret = PyArray_Scalar(dptr, typecode, NULL); + ret = PyArray_Scalar(dptr, typecode, base); /* free dptr which contains zeros */ if (alloc) { @@ -4299,6 +4247,8 @@ static struct PyMethodDef array_module_methods[] = { METH_VARARGS, NULL}, {"_discover_array_parameters", (PyCFunction)_discover_array_parameters, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_get_castingimpl", (PyCFunction)_get_castingimpl, + METH_VARARGS | METH_KEYWORDS, NULL}, /* from umath */ {"frompyfunc", (PyCFunction) ufunc_frompyfunc, @@ -4317,6 +4267,7 @@ static struct PyMethodDef array_module_methods[] = { }; #include "__multiarray_api.c" +#include "array_method.h" /* Establish scalar-type hierarchy * @@ -4767,9 +4718,20 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { if (set_typeinfo(d) != 0) { goto err; } + if (PyType_Ready(&PyArrayMethod_Type) < 0) { + goto err; + } + if (PyType_Ready(&PyBoundArrayMethod_Type) < 0) { + goto err; + } if (initialize_and_map_pytypes_to_dtypes() < 0) { goto err; } + + if (PyArray_InitializeCasts() < 0) { + goto err; + } + if (initumath(m) != 0) { goto err; } diff --git a/numpy/core/src/multiarray/npy_buffer.h b/numpy/core/src/multiarray/npy_buffer.h index 5ff8b6c2c..d10f1a020 100644 --- a/numpy/core/src/multiarray/npy_buffer.h +++ b/numpy/core/src/multiarray/npy_buffer.h @@ -3,8 +3,8 @@ extern NPY_NO_EXPORT PyBufferProcs array_as_buffer; -NPY_NO_EXPORT void -_dealloc_cached_buffer_info(PyObject *self); +NPY_NO_EXPORT int +_buffer_info_free(void *buffer_info, PyObject *obj); NPY_NO_EXPORT PyArray_Descr* _descriptor_from_pep3118_format(char const *s); diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index f04bdbaa8..d018fccbb 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -67,8 +67,11 @@ gentype_alloc(PyTypeObject *type, Py_ssize_t nitems) const size_t size = _PyObject_VAR_SIZE(type, nitems + 1); obj = (PyObject *)PyObject_Malloc(size); + if (obj == NULL) { + PyErr_NoMemory(); + return NULL; + } /* - * Fixme. Need to check for no memory. * If we don't need to zero memory, we could use * PyObject_{New, NewVar} for this whole function. */ @@ -1742,13 +1745,8 @@ gentype_reduce(PyObject *self, PyObject *NPY_UNUSED(args)) if (arr == NULL) { return NULL; } - /* arr.item() */ - PyObject *val = PyArray_GETITEM(arr, PyArray_DATA(arr)); - Py_DECREF(arr); - if (val == NULL) { - return NULL; - } - PyObject *tup = Py_BuildValue("NN", obj, val); + /* Use the whole array which handles sturctured void correctly */ + PyObject *tup = Py_BuildValue("NN", obj, arr); if (tup == NULL) { return NULL; } @@ -2601,16 +2599,18 @@ NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type = { .tp_basicsize = sizeof(PyObject), }; + static void void_dealloc(PyVoidScalarObject *v) { - _dealloc_cached_buffer_info((PyObject *)v); - if (v->flags & NPY_ARRAY_OWNDATA) { npy_free_cache(v->obval, Py_SIZE(v)); } Py_XDECREF(v->descr); Py_XDECREF(v->base); + if (_buffer_info_free(v->_buffer_info, (PyObject *)v) < 0) { + PyErr_WriteUnraisable(NULL); + } Py_TYPE(v)->tp_free(v); } diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c index 1404c9b68..3eaf99196 100644 --- a/numpy/core/src/multiarray/usertypes.c +++ b/numpy/core/src/multiarray/usertypes.c @@ -39,6 +39,10 @@ maintainer email: oliphant.travis@ieee.org #include "usertypes.h" #include "dtypemeta.h" #include "scalartypes.h" +#include "array_method.h" +#include "convert_datatype.h" +#include "legacy_dtype_implementation.h" + NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL; @@ -488,3 +492,65 @@ legacy_userdtype_common_dtype_function( Py_INCREF(Py_NotImplemented); return (PyArray_DTypeMeta *)Py_NotImplemented; } + + +/** + * This function wraps a legacy cast into an array-method. This is mostly + * used for legacy user-dtypes, but for example numeric to/from datetime + * casts were only defined that way as well. + * + * @param from + * @param to + * @param casting If `NPY_NO_CASTING` will check the legacy registered cast, + * otherwise uses the provided cast. + */ +NPY_NO_EXPORT int +PyArray_AddLegacyWrapping_CastingImpl( + PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting) +{ + if (casting < 0) { + if (from == to) { + casting = NPY_NO_CASTING; + } + else if (PyArray_LegacyCanCastTypeTo( + from->singleton, to->singleton, NPY_SAFE_CASTING)) { + casting = NPY_SAFE_CASTING; + } + else if (PyArray_LegacyCanCastTypeTo( + from->singleton, to->singleton, NPY_SAME_KIND_CASTING)) { + casting = NPY_SAME_KIND_CASTING; + } + else { + casting = NPY_UNSAFE_CASTING; + } + } + + PyArray_DTypeMeta *dtypes[2] = {from, to}; + PyArrayMethod_Spec spec = { + /* Name is not actually used, but allows identifying these. */ + .name = "legacy_cast", + .nin = 1, + .nout = 1, + .casting = casting, + .dtypes = dtypes, + }; + + if (from == to) { + spec.flags = NPY_METH_REQUIRES_PYAPI | NPY_METH_SUPPORTS_UNALIGNED; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {NPY_METH_resolve_descriptors, &legacy_same_dtype_resolve_descriptors}, + {0, NULL}}; + spec.slots = slots; + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); + } + else { + spec.flags = NPY_METH_REQUIRES_PYAPI; + PyType_Slot slots[] = { + {NPY_METH_get_loop, NULL}, + {NPY_METH_resolve_descriptors, &simple_cast_resolve_descriptors}, + {0, NULL}}; + spec.slots = slots; + return PyArray_AddCastingImplementation_FromSpec(&spec, 1); + } +} diff --git a/numpy/core/src/multiarray/usertypes.h b/numpy/core/src/multiarray/usertypes.h index 1b323d458..8b2fc80e6 100644 --- a/numpy/core/src/multiarray/usertypes.h +++ b/numpy/core/src/multiarray/usertypes.h @@ -1,6 +1,8 @@ #ifndef _NPY_PRIVATE_USERTYPES_H_ #define _NPY_PRIVATE_USERTYPES_H_ +#include "array_method.h" + extern NPY_NO_EXPORT PyArray_Descr **userdescrs; NPY_NO_EXPORT void @@ -21,4 +23,8 @@ NPY_NO_EXPORT PyArray_DTypeMeta * legacy_userdtype_common_dtype_function( PyArray_DTypeMeta *cls, PyArray_DTypeMeta *other); +NPY_NO_EXPORT int +PyArray_AddLegacyWrapping_CastingImpl( + PyArray_DTypeMeta *from, PyArray_DTypeMeta *to, NPY_CASTING casting); + #endif diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index 18b6d1434..ff4663dc3 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -398,8 +398,8 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) /**end repeat1**/ /**begin repeat1 - * #kind = atan2,hypot,pow,fmod,copysign# - * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN# + * #kind = atan2,hypot,pow,copysign# + * #KIND = ATAN2,HYPOT,POW,COPYSIGN# */ #ifdef @kind@@c@ #undef @kind@@c@ @@ -412,6 +412,32 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) #endif /**end repeat1**/ +/**begin repeat1 + * #kind = fmod# + * #KIND = FMOD# + */ +#ifdef @kind@@c@ +#undef @kind@@c@ +#endif +#ifndef HAVE_MODF@C@ +NPY_INPLACE @type@ +npy_@kind@@c@(@type@ x, @type@ y) +{ + int are_inputs_inf = (npy_isinf(x) && npy_isinf(y)); + /* force set invalid flag, doesnt raise by default on gcc < 8 */ + if (npy_isnan(x) || npy_isnan(y)) { + npy_set_floatstatus_invalid(); + } + if (are_inputs_inf || !y) { + if (!npy_isnan(x)) { + npy_set_floatstatus_invalid(); + } + } + return (@type@) npy_@kind@((double)x, (double) y); +} +#endif +/**end repeat1**/ + #ifdef modf@c@ #undef modf@c@ #endif @@ -473,8 +499,8 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) /**end repeat1**/ /**begin repeat1 - * #kind = atan2,hypot,pow,fmod,copysign# - * #KIND = ATAN2,HYPOT,POW,FMOD,COPYSIGN# + * #kind = atan2,hypot,pow,copysign# + * #KIND = ATAN2,HYPOT,POW,COPYSIGN# */ #ifdef HAVE_@KIND@@C@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) @@ -484,6 +510,29 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) #endif /**end repeat1**/ +/**begin repeat1 + * #kind = fmod# + * #KIND = FMOD# + */ +#ifdef HAVE_FMOD@C@ +NPY_INPLACE @type@ +npy_@kind@@c@(@type@ x, @type@ y) +{ + int are_inputs_inf = (npy_isinf(x) && npy_isinf(y)); + /* force set invalid flag, doesnt raise by default on gcc < 8 */ + if (npy_isnan(x) || npy_isnan(y)) { + npy_set_floatstatus_invalid(); + } + if (are_inputs_inf || !y) { + if (!npy_isnan(x)) { + npy_set_floatstatus_invalid(); + } + } + return @kind@@c@(x, y); +} +#endif +/**end repeat1**/ + #ifdef HAVE_MODF@C@ NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr) { @@ -625,6 +674,38 @@ NPY_INPLACE @type@ npy_logaddexp2@c@(@type@ x, @type@ y) } /* + * Wrapper function for remainder edge cases + * Internally calls npy_divmod* + */ +NPY_INPLACE @type@ +npy_remainder@c@(@type@ a, @type@ b) +{ + @type@ mod; + if (NPY_UNLIKELY(!b)) { + mod = npy_fmod@c@(a, b); + } else { + npy_divmod@c@(a, b, &mod); + } + return mod; +} + +NPY_INPLACE @type@ +npy_floor_divide@c@(@type@ a, @type@ b) { + @type@ div, mod; + if (NPY_UNLIKELY(!b)) { + div = a / b; + if (!a || npy_isnan(a)) { + npy_set_floatstatus_invalid(); + } else { + npy_set_floatstatus_divbyzero(); + } + } else { + div = npy_divmod@c@(a, b, &mod); + } + return div; +} + +/* * Python version of divmod. * * The implementation is mostly copied from cpython 3.5. @@ -634,12 +715,19 @@ npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus) { @type@ div, mod, floordiv; + /* force set invalid flag, doesnt raise by default on gcc < 8 */ + if (npy_isnan(a) || npy_isnan(b)) { + npy_set_floatstatus_invalid(); + } mod = npy_fmod@c@(a, b); - - if (!b) { + if (NPY_UNLIKELY(!b)) { + div = a / b; + if (a && !npy_isnan(a)) { + npy_set_floatstatus_divbyzero(); + } /* If b == 0, return result of fmod. For IEEE is nan */ *modulus = mod; - return mod; + return div; } /* a - mod should be very nearly an integer multiple of b */ diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index c9efdeb4e..c2e06a4fd 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1955,8 +1955,7 @@ NPY_NO_EXPORT void BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - @type@ mod; - *((@type@ *)op1) = npy_divmod@c@(in1, in2, &mod); + *((@type@ *)op1) = npy_floor_divide@c@(in1, in2); } } @@ -1966,7 +1965,7 @@ NPY_NO_EXPORT void BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - npy_divmod@c@(in1, in2, (@type@ *)op1); + *((@type@ *) op1) = npy_remainder@c@(in1, in2); } } @@ -2306,8 +2305,13 @@ HALF_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps BINARY_LOOP { const npy_half in1 = *(npy_half *)ip1; const npy_half in2 = *(npy_half *)ip2; - npy_half mod; - *((npy_half *)op1) = npy_half_divmod(in1, in2, &mod); + + float fh1 = npy_half_to_float(in1); + float fh2 = npy_half_to_float(in2); + float div; + + div = npy_floor_dividef(fh1, fh2); + *((npy_half *)op1) = npy_float_to_half(div); } } @@ -2317,7 +2321,11 @@ HALF_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, v BINARY_LOOP { const npy_half in1 = *(npy_half *)ip1; const npy_half in2 = *(npy_half *)ip2; - npy_half_divmod(in1, in2, (npy_half *)op1); + float fh1 = npy_half_to_float(in1); + float fh2 = npy_half_to_float(in2); + float mod; + mod = npy_remainderf(fh1, fh2); + *((npy_half *)op1) = npy_float_to_half(mod); } } diff --git a/numpy/core/src/umath/scalarmath.c.src b/numpy/core/src/umath/scalarmath.c.src index 55bc958cb..86dade0f1 100644 --- a/numpy/core/src/umath/scalarmath.c.src +++ b/numpy/core/src/umath/scalarmath.c.src @@ -285,7 +285,11 @@ static void @name@_ctype_floor_divide(@type@ a, @type@ b, @type@ *out) { @type@ mod; - *out = npy_divmod@c@(a, b, &mod); + if (!b) { + *out = a / b; + } else { + *out = npy_divmod@c@(a, b, &mod); + } } @@ -318,7 +322,11 @@ static void half_ctype_floor_divide(npy_half a, npy_half b, npy_half *out) { npy_half mod; - *out = npy_half_divmod(a, b, &mod); + if (!b) { + *out = a / b; + } else { + *out = npy_half_divmod(a, b, &mod); + } } diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 24730f969..0f42f7076 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -281,6 +281,19 @@ def test_array_astype(): a = np.array(1000, dtype='i4') assert_raises(TypeError, a.astype, 'U1', casting='safe') + +@pytest.mark.parametrize("dt", ["d", "f", "S13", "U32"]) +def test_array_astype_to_void(dt): + dt = np.dtype(dt) + arr = np.array([], dtype=dt) + assert arr.astype("V").dtype.itemsize == dt.itemsize + +def test_object_array_astype_to_void(): + # This is different to `test_array_astype_to_void` as object arrays + # are inspected. The default void is "V8" (8 is the length of double) + arr = np.array([], dtype="O").astype("V") + assert arr.dtype == "V8" + @pytest.mark.parametrize("t", np.sctypes['uint'] + np.sctypes['int'] + np.sctypes['float'] ) diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py new file mode 100644 index 000000000..fec0ae7c7 --- /dev/null +++ b/numpy/core/tests/test_casting_unittests.py @@ -0,0 +1,301 @@ +""" +The tests exercise the casting machinery in a more low-level manner. +The reason is mostly to test a new implementation of the casting machinery. + +Unlike most tests in NumPy, these are closer to unit-tests rather +than integration tests. +""" + +import pytest +import textwrap +import enum + +import numpy as np + +from numpy.core._multiarray_umath import ( + _get_castingimpl as get_castingimpl) +from numpy.core._multiarray_tests import uses_new_casts + + +# Simple skips object, parametric and long double (unsupported by struct) +simple_dtypes = "?bhilqBHILQefdFD" +if np.dtype("l").itemsize != np.dtype("q").itemsize: + # Remove l and L, the table was generated with 64bit linux in mind. + # TODO: Should have two tables or no a different solution. + simple_dtypes = simple_dtypes.replace("l", "").replace("L", "") +simple_dtypes = [type(np.dtype(c)) for c in simple_dtypes] + + +def simple_dtype_instances(): + for dtype_class in simple_dtypes: + dt = dtype_class() + yield pytest.param(dt, id=str(dt)) + if dt.byteorder != "|": + dt = dt.newbyteorder() + yield pytest.param(dt, id=str(dt)) + + +def get_expected_stringlength(dtype): + """Returns the string length when casting the basic dtypes to strings. + """ + if dtype == np.bool_: + return 5 + if dtype.kind in "iu": + if dtype.itemsize == 1: + length = 3 + elif dtype.itemsize == 2: + length = 5 + elif dtype.itemsize == 4: + length = 10 + elif dtype.itemsize == 8: + length = 20 + else: + raise AssertionError(f"did not find expected length for {dtype}") + + if dtype.kind == "i": + length += 1 # adds one character for the sign + + return length + + # Note: Can't do dtype comparison for longdouble on windows + if dtype.char == "g": + return 48 + elif dtype.char == "G": + return 48 * 2 + elif dtype.kind == "f": + return 32 # also for half apparently. + elif dtype.kind == "c": + return 32 * 2 + + raise AssertionError(f"did not find expected length for {dtype}") + + +class Casting(enum.IntEnum): + no = 0 + equiv = 1 + safe = 2 + same_kind = 3 + unsafe = 4 + cast_is_view = 1 << 16 + + +def _get_cancast_table(): + table = textwrap.dedent(""" + X ? b h i l q B H I L Q e f d g F D G S U V O M m + ? # = = = = = = = = = = = = = = = = = = = = = . = + b . # = = = = . . . . . = = = = = = = = = = = . = + h . ~ # = = = . . . . . ~ = = = = = = = = = = . = + i . ~ ~ # = = . . . . . ~ ~ = = ~ = = = = = = . = + l . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . = + q . ~ ~ ~ # # . . . . . ~ ~ = = ~ = = = = = = . = + B . ~ = = = = # = = = = = = = = = = = = = = = . = + H . ~ ~ = = = ~ # = = = ~ = = = = = = = = = = . = + I . ~ ~ ~ = = ~ ~ # = = ~ ~ = = ~ = = = = = = . = + L . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~ + Q . ~ ~ ~ ~ ~ ~ ~ ~ # # ~ ~ = = ~ = = = = = = . ~ + e . . . . . . . . . . . # = = = = = = = = = = . . + f . . . . . . . . . . . ~ # = = = = = = = = = . . + d . . . . . . . . . . . ~ ~ # = ~ = = = = = = . . + g . . . . . . . . . . . ~ ~ ~ # ~ ~ = = = = = . . + F . . . . . . . . . . . . . . . # = = = = = = . . + D . . . . . . . . . . . . . . . ~ # = = = = = . . + G . . . . . . . . . . . . . . . ~ ~ # = = = = . . + S . . . . . . . . . . . . . . . . . . # = = = . . + U . . . . . . . . . . . . . . . . . . . # = = . . + V . . . . . . . . . . . . . . . . . . . . # = . . + O . . . . . . . . . . . . . . . . . . . . = # . . + M . . . . . . . . . . . . . . . . . . . . = = # . + m . . . . . . . . . . . . . . . . . . . . = = . # + """).strip().split("\n") + dtypes = [type(np.dtype(c)) for c in table[0][2::2]] + + convert_cast = {".": Casting.unsafe, "~": Casting.same_kind, + "=": Casting.safe, "#": Casting.equiv, + " ": -1} + + cancast = {} + for from_dt, row in zip(dtypes, table[1:]): + cancast[from_dt] = {} + for to_dt, c in zip(dtypes, row[2::2]): + cancast[from_dt][to_dt] = convert_cast[c] + + return cancast + +CAST_TABLE = _get_cancast_table() + + +class TestChanges: + """ + These test cases excercise some behaviour changes + """ + @pytest.mark.parametrize("string", ["S", "U"]) + @pytest.mark.parametrize("floating", ["e", "f", "d", "g"]) + def test_float_to_string(self, floating, string): + assert np.can_cast(floating, string) + # 100 is long enough to hold any formatted floating + if uses_new_casts(): + assert np.can_cast(floating, f"{string}100") + else: + assert not np.can_cast(floating, f"{string}100") + assert np.can_cast(floating, f"{string}100", casting="same_kind") + + def test_to_void(self): + # But in general, we do consider these safe: + assert np.can_cast("d", "V") + assert np.can_cast("S20", "V") + + # Do not consider it a safe cast if the void is too smaller: + if uses_new_casts(): + assert not np.can_cast("d", "V1") + assert not np.can_cast("S20", "V1") + assert not np.can_cast("U1", "V1") + # Structured to unstructured is just like any other: + assert np.can_cast("d,i", "V", casting="same_kind") + else: + assert np.can_cast("d", "V1") + assert np.can_cast("S20", "V1") + assert np.can_cast("U1", "V1") + assert not np.can_cast("d,i", "V", casting="same_kind") + + +class TestCasting: + @pytest.mark.parametrize("from_Dt", simple_dtypes) + def test_simple_cancast(self, from_Dt): + for to_Dt in simple_dtypes: + cast = get_castingimpl(from_Dt, to_Dt) + + for from_dt in [from_Dt(), from_Dt().newbyteorder()]: + default = cast._resolve_descriptors((from_dt, None))[1][1] + assert default == to_Dt() + del default + + for to_dt in [to_Dt(), to_Dt().newbyteorder()]: + casting, (from_res, to_res) = cast._resolve_descriptors( + (from_dt, to_dt)) + assert(type(from_res) == from_Dt) + assert(type(to_res) == to_Dt) + if casting & Casting.cast_is_view: + # If a view is acceptable, this is "no" casting + # and byte order must be matching. + assert casting == Casting.no | Casting.cast_is_view + # The above table lists this as "equivalent" + assert Casting.equiv == CAST_TABLE[from_Dt][to_Dt] + # Note that to_res may not be the same as from_dt + assert from_res.isnative == to_res.isnative + else: + if from_Dt == to_Dt: + # Note that to_res may not be the same as from_dt + assert from_res.isnative != to_res.isnative + assert casting == CAST_TABLE[from_Dt][to_Dt] + + if from_Dt is to_Dt: + assert(from_dt is from_res) + assert(to_dt is to_res) + + + def string_with_modified_length(self, dtype, change_length): + fact = 1 if dtype.char == "S" else 4 + length = dtype.itemsize // fact + change_length + return np.dtype(f"{dtype.byteorder}{dtype.char}{length}") + + @pytest.mark.parametrize("other_DT", simple_dtypes) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_cancast(self, other_DT, string_char): + fact = 1 if string_char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(other_DT, string_DT) + + other_dt = other_DT() + expected_length = get_expected_stringlength(other_dt) + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert safety == Casting.safe # we consider to string casts "safe" + assert isinstance(res_dt, string_DT) + + # These casts currently implement changing the string length, so + # check the cast-safety for too long/fixed string lengths: + for change_length in [-1, 0, 1]: + if change_length >= 0: + expected_safety = Casting.safe + else: + expected_safety = Casting.same_kind + + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + assert res_dt is to_dt + assert safety == expected_safety + + # The opposite direction is always considered unsafe: + cast = get_castingimpl(string_DT, other_DT) + + safety, _ = cast._resolve_descriptors((string_dt, other_dt)) + assert safety == Casting.unsafe + + cast = get_castingimpl(string_DT, other_DT) + safety, (_, res_dt) = cast._resolve_descriptors((string_dt, None)) + assert safety == Casting.unsafe + assert other_dt is res_dt # returns the singleton for simple dtypes + + @pytest.mark.parametrize("other_dt", ["S8", "<U8", ">U8"]) + @pytest.mark.parametrize("string_char", ["S", "U"]) + def test_string_to_string_cancast(self, other_dt, string_char): + other_dt = np.dtype(other_dt) + + fact = 1 if string_char == "S" else 4 + div = 1 if other_dt.char == "S" else 4 + + string_DT = type(np.dtype(string_char)) + cast = get_castingimpl(type(other_dt), string_DT) + + expected_length = other_dt.itemsize // div + string_dt = np.dtype(f"{string_char}{expected_length}") + + safety, (res_other_dt, res_dt) = cast._resolve_descriptors((other_dt, None)) + assert res_dt.itemsize == expected_length * fact + assert isinstance(res_dt, string_DT) + + if other_dt.char == string_char: + if other_dt.isnative: + expected_safety = Casting.no | Casting.cast_is_view + else: + expected_safety = Casting.equiv + elif string_char == "U": + expected_safety = Casting.safe + else: + expected_safety = Casting.unsafe + + assert expected_safety == safety + + for change_length in [-1, 0, 1]: + to_dt = self.string_with_modified_length(string_dt, change_length) + safety, (_, res_dt) = cast._resolve_descriptors((other_dt, to_dt)) + + assert res_dt is to_dt + if expected_safety == Casting.unsafe: + assert safety == expected_safety + elif change_length < 0: + assert safety == Casting.same_kind + elif change_length == 0: + assert safety == expected_safety + elif change_length > 0: + assert safety == Casting.safe + + def test_void_to_string_special_case(self): + # Cover a small special case in void to string casting that could + # probably just as well be turned into an error (compare + # `test_object_to_parametric_internal_error` below). + assert np.array([], dtype="V5").astype("S").dtype.itemsize == 5 + assert np.array([], dtype="V5").astype("U").dtype.itemsize == 4 * 5 + + def test_object_to_parametric_internal_error(self): + # We reject casting from object to a parametric type, without + # figuring out the correct instance first. + object_dtype = type(np.dtype(object)) + other_dtype = type(np.dtype(str)) + cast = get_castingimpl(object_dtype, other_dtype) + with pytest.raises(TypeError, + match="casting from object to the parametric DType"): + cast._resolve_descriptors((np.dtype("O"), None)) diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 380b78f67..a67fe62c3 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -771,3 +771,17 @@ class TestDeprecateSubarrayDTypeDuringArrayCoercion(_DeprecationTestCase): np.array(arr, dtype="(2,2)f") self.assert_deprecated(check) + + +class TestDeprecatedUnpickleObjectScalar(_DeprecationTestCase): + # Deprecated 2020-11-24, NumPy 1.20 + """ + Technically, it should be impossible to create numpy object scalars, + but there was an unpickle path that would in theory allow it. That + path is invalid and must lead to the warning. + """ + message = "Unpickling a scalar with object dtype is deprecated." + + def test_deprecated(self): + ctor = np.core.multiarray.scalar + self.assert_deprecated(lambda: ctor(np.dtype("O"), 1)) diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 1b2b85cc1..0ebcc72da 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -153,6 +153,9 @@ class TestBuiltin: 'formats': ['f4', 'i4'], 'offsets': [4, 0]}) assert_equal(x == y, False) + # But it is currently an equivalent cast: + assert np.can_cast(x, y, casting="equiv") + class TestRecord: def test_equivalent_record(self): diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 61806f99f..12306cbb8 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -7526,6 +7526,25 @@ class TestNewBufferProtocol: f.a = 3 assert_equal(arr['a'], 3) + @pytest.mark.parametrize("obj", [np.ones(3), np.ones(1, dtype="i,i")[()]]) + def test_error_if_stored_buffer_info_is_corrupted(self, obj): + """ + If a user extends a NumPy array before 1.20 and then runs it + on NumPy 1.20+. A C-subclassed array might in theory modify + the new buffer-info field. This checks that an error is raised + if this happens (for buffer export), an error is written on delete. + This is a sanity check to help users transition to safe code, it + may be deleted at any point. + """ + # corrupt buffer info: + _multiarray_tests.corrupt_or_fix_bufferinfo(obj) + name = type(obj) + with pytest.raises(RuntimeError, + match=f".*{name} appears to be C subclassed"): + memoryview(obj) + # Fix buffer info again before we delete (or we lose the memory) + _multiarray_tests.corrupt_or_fix_bufferinfo(obj) + class TestArrayAttributeDeletion: diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index f5428f98c..866a96e31 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -922,6 +922,25 @@ class TestTypes: assert_equal(np.promote_types('u8', 'S1'), np.dtype('S20')) assert_equal(np.promote_types('u8', 'S30'), np.dtype('S30')) + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V6"), np.dtype("V10")], + [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])], + [np.dtype("i8,i8"), np.dtype("i4,i4")], + ]) + def test_invalid_void_promotion(self, dtype1, dtype2): + # Mainly test structured void promotion, which currently allows + # byte-swapping, but nothing else: + with pytest.raises(TypeError): + np.promote_types(dtype1, dtype2) + + @pytest.mark.parametrize(["dtype1", "dtype2"], + [[np.dtype("V10"), np.dtype("V10")], + [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])], + [np.dtype("i8,i8"), np.dtype("i8,>i8")], + ]) + def test_valid_void_promotion(self, dtype1, dtype2): + assert np.promote_types(dtype1, dtype2) is dtype1 + @pytest.mark.parametrize("dtype", list(np.typecodes["All"]) + ["i,i", "S3", "S100", "U3", "U100", rational]) diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index f28ad5ac9..4d4b4b515 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -424,7 +424,16 @@ class TestRecord: # make sure we did not pickle the address assert not isinstance(obj, bytes) - assert_raises(TypeError, ctor, dtype, 13) + assert_raises(RuntimeError, ctor, dtype, 13) + + # Test roundtrip: + dump = pickle.dumps(a[0]) + unpickled = pickle.loads(dump) + assert a[0] == unpickled + + # Also check the similar (impossible) "object scalar" path: + with pytest.warns(DeprecationWarning): + assert ctor(np.dtype("O"), data) is data def test_objview_record(self): # https://github.com/numpy/numpy/issues/2599 diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index c7f44cf50..d8529418e 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -276,6 +276,10 @@ class TestModulus: # Check nans, inf with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in remainder") + sup.filter(RuntimeWarning, "divide by zero encountered in remainder") + sup.filter(RuntimeWarning, "divide by zero encountered in floor_divide") + sup.filter(RuntimeWarning, "divide by zero encountered in divmod") + sup.filter(RuntimeWarning, "invalid value encountered in divmod") for dt in np.typecodes['Float']: fone = np.array(1.0, dtype=dt) fzer = np.array(0.0, dtype=dt) @@ -290,6 +294,9 @@ class TestModulus: assert_(np.isnan(rem), 'dt: %s' % dt) rem = operator.mod(finf, fone) assert_(np.isnan(rem), 'dt: %s' % dt) + for op in [floordiv_and_mod, divmod]: + div, mod = op(fone, fzer) + assert_(np.isinf(div)) and assert_(np.isnan(mod)) def test_inplace_floordiv_handling(self): # issue gh-12927 diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index f57493e9c..3f89cc59b 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -13,7 +13,7 @@ from numpy.testing import ( assert_, assert_equal, assert_raises, assert_raises_regex, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings, - _gen_alignment_data, assert_array_almost_equal_nulp + _gen_alignment_data, assert_array_almost_equal_nulp, assert_warns ) def on_powerpc(): @@ -293,6 +293,42 @@ class TestDivision: assert_equal(np.signbit(x//1), 0) assert_equal(np.signbit((-x)//1), 1) + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_floor_division_errors(self, dtype): + fnan = np.array(np.nan, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + fzer = np.array(0.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + # divide by zero error check + with np.errstate(divide='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.floor_divide, fone, fzer) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.floor_divide, fnan, fone) + assert_raises(FloatingPointError, np.floor_divide, fone, fnan) + assert_raises(FloatingPointError, np.floor_divide, fnan, fzer) + + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_floor_division_corner_cases(self, dtype): + # test corner cases like 1.0//0.0 for errors and return vals + x = np.zeros(10, dtype=dtype) + y = np.ones(10, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + fzer = np.array(0.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in floor_divide") + div = np.floor_divide(fnan, fone) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + div = np.floor_divide(fone, fnan) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + div = np.floor_divide(fnan, fzer) + assert(np.isnan(div)), "dt: %s, div: %s" % (dt, div) + # verify 1.0//0.0 computations return inf + with np.errstate(divide='ignore'): + z = np.floor_divide(y, x) + assert_(np.isinf(z).all()) + def floor_divide_and_remainder(x, y): return (np.floor_divide(x, y), np.remainder(x, y)) @@ -366,9 +402,90 @@ class TestRemainder: else: assert_(b > rem >= 0, msg) + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + def test_float_divmod_errors(self, dtype): + # Check valid errors raised for divmod and remainder + fzero = np.array(0.0, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + # since divmod is combination of both remainder and divide + # ops it will set both dividebyzero and invalid flags + with np.errstate(divide='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.divmod, fone, fzero) + with np.errstate(divide='ignore', invalid='raise'): + assert_raises(FloatingPointError, np.divmod, fone, fzero) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.divmod, fzero, fzero) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, np.divmod, finf, finf) + with np.errstate(divide='ignore', invalid='raise'): + assert_raises(FloatingPointError, np.divmod, finf, fzero) + with np.errstate(divide='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.divmod, finf, fzero) + + @pytest.mark.parametrize('dtype', np.typecodes['Float']) + @pytest.mark.parametrize('fn', [np.fmod, np.remainder]) + def test_float_remainder_errors(self, dtype, fn): + fzero = np.array(0.0, dtype=dtype) + fone = np.array(1.0, dtype=dtype) + finf = np.array(np.inf, dtype=dtype) + fnan = np.array(np.nan, dtype=dtype) + with np.errstate(invalid='raise'): + assert_raises(FloatingPointError, fn, fone, fzero) + assert_raises(FloatingPointError, fn, fnan, fzero) + assert_raises(FloatingPointError, fn, fone, fnan) + assert_raises(FloatingPointError, fn, fnan, fone) + + def test_float_remainder_overflow(self): + a = np.finfo(np.float64).tiny + with np.errstate(over='ignore', invalid='ignore'): + div, mod = np.divmod(4, a) + np.isinf(div) + assert_(mod == 0) + with np.errstate(over='raise', invalid='ignore'): + assert_raises(FloatingPointError, np.divmod, 4, a) + with np.errstate(invalid='raise', over='ignore'): + assert_raises(FloatingPointError, np.divmod, 4, a) + + def test_float_divmod_corner_cases(self): + # check nan cases + for dt in np.typecodes['Float']: + fnan = np.array(np.nan, dtype=dt) + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + finf = np.array(np.inf, dtype=dt) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in divmod") + sup.filter(RuntimeWarning, "divide by zero encountered in divmod") + div, rem = np.divmod(fone, fzer) + assert(np.isinf(div)), 'dt: %s, div: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(fzer, fzer) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + assert_(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(finf, finf) + assert(np.isnan(div)), 'dt: %s, rem: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(finf, fzer) + assert(np.isinf(div)), 'dt: %s, rem: %s' % (dt, rem) + assert(np.isnan(rem)), 'dt: %s, rem: %s' % (dt, rem) + div, rem = np.divmod(fnan, fone) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + div, rem = np.divmod(fone, fnan) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + div, rem = np.divmod(fnan, fzer) + assert(np.isnan(rem)), "dt: %s, rem: %s" % (dt, rem) + assert(np.isnan(div)), "dt: %s, rem: %s" % (dt, rem) + def test_float_remainder_corner_cases(self): # Check remainder magnitude. for dt in np.typecodes['Float']: + fone = np.array(1.0, dtype=dt) + fzer = np.array(0.0, dtype=dt) + fnan = np.array(np.nan, dtype=dt) b = np.array(1.0, dtype=dt) a = np.nextafter(np.array(0.0, dtype=dt), -b) rem = np.remainder(a, b) @@ -379,6 +496,7 @@ class TestRemainder: # Check nans, inf with suppress_warnings() as sup: sup.filter(RuntimeWarning, "invalid value encountered in remainder") + sup.filter(RuntimeWarning, "invalid value encountered in fmod") for dt in np.typecodes['Float']: fone = np.array(1.0, dtype=dt) fzer = np.array(0.0, dtype=dt) @@ -389,10 +507,30 @@ class TestRemainder: # MSVC 2008 returns NaN here, so disable the check. #rem = np.remainder(fone, finf) #assert_(rem == fone, 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(finf, fone) + fmod = np.fmod(finf, fone) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + rem = np.remainder(finf, finf) + fmod = np.fmod(finf, fone) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + rem = np.remainder(finf, fzer) + fmod = np.fmod(finf, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) rem = np.remainder(fone, fnan) + fmod = np.fmod(fone, fnan) assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) - rem = np.remainder(finf, fone) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, fmod)) + rem = np.remainder(fnan, fzer) + fmod = np.fmod(fnan, fzer) + assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem)) + rem = np.remainder(fnan, fone) + fmod = np.fmod(fnan, fone) assert_(np.isnan(rem), 'dt: %s, rem: %s' % (dt, rem)) + assert_(np.isnan(fmod), 'dt: %s, fmod: %s' % (dt, rem)) class TestCbrt: diff --git a/numpy/distutils/unixccompiler.py b/numpy/distutils/unixccompiler.py index 9bb7251d8..0cd2d243e 100644 --- a/numpy/distutils/unixccompiler.py +++ b/numpy/distutils/unixccompiler.py @@ -3,6 +3,8 @@ unixccompiler - can handle very long argument lists for ar. """ import os +import sys +import subprocess from distutils.errors import CompileError, DistutilsExecError, LibError from distutils.unixccompiler import UnixCCompiler @@ -56,6 +58,11 @@ def UnixCCompiler__compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts # add commandline flags to dependency file if deps: + # After running the compiler, the file created will be in EBCDIC + # but will not be tagged as such. This tags it so the file does not + # have multiple different encodings being written to it + if sys.platform == 'zos': + subprocess.check_output(['chtag', '-tc', 'IBM1047', obj + '.d']) with open(obj + '.d', 'a') as f: f.write(_commandline_dep_string(cc_args, extra_postargs, pp_opts)) diff --git a/numpy/f2py/__version__.py b/numpy/f2py/__version__.py index 104c2e1a8..e20d7c1db 100644 --- a/numpy/f2py/__version__.py +++ b/numpy/f2py/__version__.py @@ -1,8 +1 @@ -major = 2 - -try: - from __svn_version__ import version - version_info = (major, version) - version = '%s_%s' % version_info -except (ImportError, ValueError): - version = str(major) +from numpy.version import version diff --git a/numpy/f2py/capi_maps.py b/numpy/f2py/capi_maps.py index fabbfc4c2..472ddde43 100644 --- a/numpy/f2py/capi_maps.py +++ b/numpy/f2py/capi_maps.py @@ -11,8 +11,6 @@ $Date: 2005/05/06 10:57:33 $ Pearu Peterson """ -__version__ = "$Revision: 1.60 $"[10:-1] - from . import __version__ f2py_version = __version__.version diff --git a/numpy/f2py/common_rules.py b/numpy/f2py/common_rules.py index 90483e55b..937d8bc72 100644 --- a/numpy/f2py/common_rules.py +++ b/numpy/f2py/common_rules.py @@ -13,8 +13,6 @@ $Date: 2005/05/06 10:57:33 $ Pearu Peterson """ -__version__ = "$Revision: 1.19 $"[10:-1] - from . import __version__ f2py_version = __version__.version diff --git a/numpy/f2py/f2py2e.py b/numpy/f2py/f2py2e.py index be2c345d1..b45d985aa 100755 --- a/numpy/f2py/f2py2e.py +++ b/numpy/f2py/f2py2e.py @@ -29,18 +29,14 @@ from . import __version__ from . import capi_maps f2py_version = __version__.version +numpy_version = __version__.version errmess = sys.stderr.write # outmess=sys.stdout.write show = pprint.pprint outmess = auxfuncs.outmess -try: - from numpy import __version__ as numpy_version -except ImportError: - numpy_version = 'N/A' - -__usage__ = """\ -Usage: +__usage__ =\ +f"""Usage: 1) To construct extension module sources: @@ -97,8 +93,8 @@ Options: --[no-]latex-doc Create (or not) <modulename>module.tex. Default is --no-latex-doc. --short-latex Create 'incomplete' LaTeX document (without commands - \\documentclass, \\tableofcontents, and \\begin{document}, - \\end{document}). + \\documentclass, \\tableofcontents, and \\begin{{document}}, + \\end{{document}}). --[no-]rest-doc Create (or not) <modulename>module.rst. Default is --no-rest-doc. @@ -167,12 +163,12 @@ Extra options (only effective with -c): array. Integer <int> sets the threshold for array sizes when a message should be shown. -Version: %s -numpy Version: %s +Version: {f2py_version} +numpy Version: {numpy_version} Requires: Python 3.5 or higher. License: NumPy license (see LICENSE.txt in the NumPy source code) Copyright 1999 - 2011 Pearu Peterson all rights reserved. -http://cens.ioc.ee/projects/f2py2e/""" % (f2py_version, numpy_version) +http://cens.ioc.ee/projects/f2py2e/""" def scaninputline(inputline): diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py index a14f60194..f1490527e 100755 --- a/numpy/f2py/rules.py +++ b/numpy/f2py/rules.py @@ -50,18 +50,15 @@ $Date: 2005/08/30 08:58:42 $ Pearu Peterson """ -__version__ = "$Revision: 1.129 $"[10:-1] - -from . import __version__ -f2py_version = __version__.version - -from .. import version as _numpy_version -numpy_version = _numpy_version.version - import os import time import copy +# __version__.version is now the same as the NumPy version +from . import __version__ +f2py_version = __version__.version +numpy_version = __version__.version + from .auxfuncs import ( applyrules, debugcapi, dictappend, errmess, gentitle, getargs2, hascallstatement, hasexternals, hasinitvalue, hasnote, hasresultnote, @@ -202,7 +199,7 @@ PyMODINIT_FUNC PyInit_#modulename#(void) { \tif (PyErr_Occurred()) \t\t{PyErr_SetString(PyExc_ImportError, \"can't initialize module #modulename# (failed to import numpy)\"); return m;} \td = PyModule_GetDict(m); -\ts = PyUnicode_FromString(\"$R""" + """evision: $\"); +\ts = PyUnicode_FromString(\"#f2py_version#\"); \tPyDict_SetItemString(d, \"__version__\", s); \tPy_DECREF(s); \ts = PyUnicode_FromString( diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 984f3086e..696fe617b 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -1290,7 +1290,7 @@ def _interp_dispatcher(x, xp, fp, left=None, right=None, period=None): @array_function_dispatch(_interp_dispatcher) def interp(x, xp, fp, left=None, right=None, period=None): """ - One-dimensional linear interpolation. + One-dimensional linear interpolation for monotonically increasing sample points. Returns the one-dimensional piecewise linear interpolant to a function with given discrete data points (`xp`, `fp`), evaluated at `x`. @@ -1337,8 +1337,8 @@ def interp(x, xp, fp, left=None, right=None, period=None): -------- scipy.interpolate - Notes - ----- + Warnings + -------- The x-coordinate sequence is expected to be increasing, but this is not explicitly enforced. However, if the sequence `xp` is non-increasing, interpolation results are meaningless. diff --git a/numpy/testing/print_coercion_tables.py b/numpy/testing/print_coercion_tables.py index 8024df128..3a447cd2d 100755 --- a/numpy/testing/print_coercion_tables.py +++ b/numpy/testing/print_coercion_tables.py @@ -3,6 +3,7 @@ """ import numpy as np +from collections import namedtuple # Generic object that can be added, but doesn't do anything else class GenericObject: @@ -25,7 +26,17 @@ def print_cancast_table(ntypes): for row in ntypes: print(row, end=' ') for col in ntypes: - print(int(np.can_cast(row, col)), end=' ') + if np.can_cast(row, col, "equiv"): + cast = "#" + elif np.can_cast(row, col, "safe"): + cast = "=" + elif np.can_cast(row, col, "same_kind"): + cast = "~" + elif np.can_cast(row, col, "unsafe"): + cast = "." + else: + cast = " " + print(cast, end=' ') print() def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, use_promote_types=False): @@ -69,6 +80,101 @@ def print_coercion_table(ntypes, inputfirstvalue, inputsecondvalue, firstarray, print() +def print_new_cast_table(*, can_cast=True, legacy=False, flags=False): + """Prints new casts, the values given are default "can-cast" values, not + actual ones. + """ + from numpy.core._multiarray_tests import get_all_cast_information + + cast_table = { + 0 : "#", # No cast (classify as equivalent here) + 1 : "#", # equivalent casting + 2 : "=", # safe casting + 3 : "~", # same-kind casting + 4 : ".", # unsafe casting + } + flags_table = { + 0 : "▗", 7: "█", + 1: "▚", 2: "▐", 4: "▄", + 3: "▜", 5: "▙", + 6: "▟", + } + + cast_info = namedtuple("cast_info", ["can_cast", "legacy", "flags"]) + no_cast_info = cast_info(" ", " ", " ") + + casts = get_all_cast_information() + table = {} + dtypes = set() + for cast in casts: + dtypes.add(cast["from"]) + dtypes.add(cast["to"]) + + if cast["from"] not in table: + table[cast["from"]] = {} + to_dict = table[cast["from"]] + + can_cast = cast_table[cast["casting"]] + legacy = "L" if cast["legacy"] else "." + flags = 0 + if cast["requires_pyapi"]: + flags |= 1 + if cast["supports_unaligned"]: + flags |= 2 + if cast["no_floatingpoint_errors"]: + flags |= 4 + + flags = flags_table[flags] + to_dict[cast["to"]] = cast_info(can_cast=can_cast, legacy=legacy, flags=flags) + + # The np.dtype(x.type) is a bit strange, because dtype classes do + # not expose much yet. + types = np.typecodes["All"] + def sorter(x): + # This is a bit weird hack, to get a table as close as possible to + # the one printing all typecodes (but expecting user-dtypes). + dtype = np.dtype(x.type) + try: + indx = types.index(dtype.char) + except ValueError: + indx = np.inf + return (indx, dtype.char) + + dtypes = sorted(dtypes, key=sorter) + + def print_table(field="can_cast"): + print('X', end=' ') + for dt in dtypes: + print(np.dtype(dt.type).char, end=' ') + print() + for from_dt in dtypes: + print(np.dtype(from_dt.type).char, end=' ') + row = table.get(from_dt, {}) + for to_dt in dtypes: + print(getattr(row.get(to_dt, no_cast_info), field), end=' ') + print() + + if can_cast: + # Print the actual table: + print() + print("Casting: # is equivalent, = is safe, ~ is same-kind, and . is unsafe") + print() + print_table("can_cast") + + if legacy: + print() + print("L denotes a legacy cast . a non-legacy one.") + print() + print_table("legacy") + + if flags: + print() + print(f"{flags_table[0]}: no flags, {flags_table[1]}: PyAPI, " + f"{flags_table[2]}: supports unaligned, {flags_table[4]}: no-float-errors") + print() + print_table("flags") + + if __name__ == '__main__': print("can cast") print_cancast_table(np.typecodes['All']) @@ -89,3 +195,5 @@ if __name__ == '__main__': print() print("promote_types") print_coercion_table(np.typecodes['All'], 0, 0, False, True) + print("New casting type promotion:") + print_new_cast_table(can_cast=True, legacy=True, flags=True) diff --git a/numpy/tests/test_scripts.py b/numpy/tests/test_scripts.py index a0f2ba70a..e67a82947 100644 --- a/numpy/tests/test_scripts.py +++ b/numpy/tests/test_scripts.py @@ -38,9 +38,9 @@ def find_f2py_commands(): def test_f2py(f2py_cmd): # test that we can run f2py script stdout = subprocess.check_output([f2py_cmd, '-v']) - assert_equal(stdout.strip(), b'2') + assert_equal(stdout.strip(), np.__version__.encode('ascii')) def test_pep338(): stdout = subprocess.check_output([sys.executable, '-mnumpy.f2py', '-v']) - assert_equal(stdout.strip(), b'2') + assert_equal(stdout.strip(), np.__version__.encode('ascii')) diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py index a9bf94f13..e72e8fb4d 100644 --- a/numpy/typing/__init__.py +++ b/numpy/typing/__init__.py @@ -120,7 +120,7 @@ API # NOTE: The API section will be appended with additional entries # further down in this file -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List if TYPE_CHECKING: import sys @@ -131,6 +131,17 @@ if TYPE_CHECKING: else: def final(f): return f +if not TYPE_CHECKING: + __all__ = ["ArrayLike", "DTypeLike", "NBitBase"] +else: + # Ensure that all objects within this module are accessible while + # static type checking. This includes private ones, as we need them + # for internal use. + # + # Declare to mypy that `__all__` is a list of strings without assigning + # an explicit value + __all__: List[str] + @final # Dissallow the creation of arbitrary `NBitBase` subclasses class NBitBase: @@ -194,7 +205,7 @@ class _16Bit(_32Bit): ... # type: ignore[misc] class _8Bit(_16Bit): ... # type: ignore[misc] # Clean up the namespace -del TYPE_CHECKING, final +del TYPE_CHECKING, final, List from ._scalars import ( _CharLike, @@ -213,7 +224,7 @@ from ._dtype_like import _SupportsDType, _VoidDTypeLike, DTypeLike if __doc__ is not None: from ._add_docstring import _docstrings __doc__ += _docstrings - __doc__ += f'\n.. autoclass:: numpy.typing.NBitBase\n' + __doc__ += '\n.. autoclass:: numpy.typing.NBitBase\n' del _docstrings from numpy._pytesttester import PytestTester |
