diff options
Diffstat (limited to 'numpy/core')
119 files changed, 5879 insertions, 1690 deletions
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py index dad9293e1..b89e27f0f 100644 --- a/numpy/core/__init__.py +++ b/numpy/core/__init__.py @@ -9,6 +9,7 @@ are available in the main ``numpy`` namespace - use that instead. from numpy.version import version as __version__ import os +import warnings # disables OpenBLAS affinity setting of the main thread that limits # python threads or processes to one core @@ -80,8 +81,8 @@ from .memmap import * from .defchararray import chararray from . import function_base from .function_base import * -from . import machar -from .machar import * +from . import _machar +from ._machar import * from . import getlimits from .getlimits import * from . import shape_base @@ -105,11 +106,9 @@ from . import _methods __all__ = ['char', 'rec', 'memmap'] __all__ += numeric.__all__ -__all__ += fromnumeric.__all__ __all__ += ['record', 'recarray', 'format_parser'] __all__ += ['chararray'] __all__ += function_base.__all__ -__all__ += machar.__all__ __all__ += getlimits.__all__ __all__ += shape_base.__all__ __all__ += einsumfunc.__all__ @@ -151,6 +150,17 @@ def _DType_reduce(DType): return _DType_reconstruct, (scalar_type,) +def __getattr__(name): + # Deprecated 2021-10-20, NumPy 1.22 + if name == "machar": + warnings.warn( + "The `np.core.machar` module is deprecated (NumPy 1.22)", + DeprecationWarning, stacklevel=2, + ) + return _machar + raise AttributeError(f"Module {__name__!r} has no attribute {name!r}") + + import copyreg copyreg.pickle(ufunc, _ufunc_reduce) diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 7467be80f..078c58976 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -328,7 +328,7 @@ add_newdoc('numpy.core', 'nditer', ... with it: ... for (a, b, c) in it: ... addop(a, b, out=c) - ... return it.operands[2] + ... return it.operands[2] Here is the same function, but following the C-style pattern: @@ -1573,6 +1573,19 @@ add_newdoc('numpy.core.multiarray', 'frombuffer', array_function_like_doc, )) +add_newdoc('numpy.core.multiarray', '_from_dlpack', + """ + _from_dlpack(x, /) + + Create a NumPy array from an object implementing the ``__dlpack__`` + protocol. + + See Also + -------- + `Array API documentation + <https://data-apis.org/array-api/latest/design_topics/data_interchange.html#syntax-for-data-interchange-with-dlpack>`_ + """) + add_newdoc('numpy.core', 'fastCopyAndTranspose', """_fastCopyAndTranspose(a)""") @@ -2263,6 +2276,15 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_priority__', add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_struct__', """Array protocol: C-struct side.""")) +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack__', + """a.__dlpack__(*, stream=None) + + DLPack Protocol: Part of the Array API.""")) + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack_device__', + """a.__dlpack_device__() + + DLPack Protocol: Part of the Array API.""")) add_newdoc('numpy.core.multiarray', 'ndarray', ('base', """ @@ -2819,7 +2841,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__class_getitem__', >>> import numpy as np >>> np.ndarray[Any, np.dtype[Any]] - numpy.ndarray[typing.Any, numpy.dtype[Any]] + numpy.ndarray[typing.Any, numpy.dtype[typing.Any]] Notes ----- @@ -4727,6 +4749,26 @@ add_newdoc('numpy.core.umath', '_add_newdoc_ufunc', and then throwing away the ufunc. """) +add_newdoc('numpy.core.multiarray', 'get_handler_name', + """ + get_handler_name(a: ndarray) -> str,None + + Return the name of the memory handler used by `a`. If not provided, return + the name of the memory handler that will be used to allocate data for the + next `ndarray` in this context. May return None if `a` does not own its + memory, in which case you can traverse ``a.base`` for a memory handler. + """) + +add_newdoc('numpy.core.multiarray', 'get_handler_version', + """ + get_handler_version(a: ndarray) -> int,None + + Return the version of the memory handler used by `a`. If not provided, + return the version of the memory handler that will be used to allocate data + for the next `ndarray` in this context. May return None if `a` does not own + its memory, in which case you can traverse ``a.base`` for a memory handler. + """) + add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage', """ _set_madvise_hugepage(enabled: bool) -> bool diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py index 8773d6c96..94859a9d5 100644 --- a/numpy/core/_add_newdocs_scalars.py +++ b/numpy/core/_add_newdocs_scalars.py @@ -290,3 +290,22 @@ for float_name in ('half', 'single', 'double', 'longdouble'): >>> np.{float_name}(3.2).is_integer() False """)) + +for int_name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', + 'int64', 'uint64', 'int64', 'uint64', 'int64', 'uint64'): + # Add negative examples for signed cases by checking typecode + add_newdoc('numpy.core.numerictypes', int_name, ('bit_count', + f""" + {int_name}.bit_count() -> int + + Computes the number of 1-bits in the absolute value of the input. + Analogous to the builtin `int.bit_count` or ``popcount`` in C++. + + Examples + -------- + >>> np.{int_name}(127).bit_count() + 7""" + + (f""" + >>> np.{int_name}(-127).bit_count() + 7 + """ if dtype(int_name).char.islower() else ""))) diff --git a/numpy/core/machar.py b/numpy/core/_machar.py index c77be793f..ace19a429 100644 --- a/numpy/core/machar.py +++ b/numpy/core/_machar.py @@ -13,6 +13,7 @@ from numpy.core.overrides import set_module # Need to speed this up...especially for longfloat +# Deprecated 2021-10-20, NumPy 1.22 @set_module('numpy') class MachAr: """ diff --git a/numpy/core/_ufunc_config.pyi b/numpy/core/_ufunc_config.pyi index 9c8cc8ab6..cd7129bcb 100644 --- a/numpy/core/_ufunc_config.pyi +++ b/numpy/core/_ufunc_config.pyi @@ -1,11 +1,10 @@ -from typing import Optional, Union, Callable, Any, Literal, Protocol, TypedDict +from typing import Optional, Union, Callable, Any, Literal, TypedDict + +from numpy import _SupportsWrite _ErrKind = Literal["ignore", "warn", "raise", "call", "print", "log"] _ErrFunc = Callable[[str, int], Any] -class _SupportsWrite(Protocol): - def write(self, msg: str, /) -> Any: ... - class _ErrDict(TypedDict): divide: _ErrKind over: _ErrKind @@ -30,8 +29,8 @@ def geterr() -> _ErrDict: ... def setbufsize(size: int) -> int: ... def getbufsize() -> int: ... def seterrcall( - func: Union[None, _ErrFunc, _SupportsWrite] -) -> Union[None, _ErrFunc, _SupportsWrite]: ... -def geterrcall() -> Union[None, _ErrFunc, _SupportsWrite]: ... + func: Union[None, _ErrFunc, _SupportsWrite[str]] +) -> Union[None, _ErrFunc, _SupportsWrite[str]]: ... +def geterrcall() -> Union[None, _ErrFunc, _SupportsWrite[str]]: ... # See `numpy/__init__.pyi` for the `errstate` class diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt index a02c7153a..e1ee8a860 100644 --- a/numpy/core/code_generators/cversions.txt +++ b/numpy/core/code_generators/cversions.txt @@ -1,6 +1,8 @@ # Hash below were defined from numpy_api_order.txt and ufunc_api_order.txt # When adding a new version here for a new minor release, also add the same -# version as NPY_x_y_API_VERSION in numpyconfig.h +# version as NPY_x_y_API_VERSION in numpyconfig.h and C_API_VERSION in +# setup_common.py. + 0x00000001 = 603580d224763e58c5e7147f804dc0f5 0x00000002 = 8ecb29306758515ae69749c803a75da1 0x00000003 = bf22c0d05b31625d2a7015988d61ce5a @@ -56,5 +58,8 @@ # DType related API additions. # A new field was added to the end of PyArrayObject_fields. # Version 14 (NumPy 1.21) No change. -# Version 14 (NumPy 1.22) No change. 0x0000000e = 17a0f366e55ec05e5c5c149123478452 + +# Version 15 (NumPy 1.22) Configurable memory allocations +# Version 14 (NumPy 1.23) No change. +0x0000000f = b8783365b873681cd204be50cdfb448d diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index c2458c2b5..b401ee6a5 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -41,6 +41,7 @@ API_FILES = [join('multiarray', 'alloc.c'), join('multiarray', 'datetime_busdaycal.c'), join('multiarray', 'datetime_strings.c'), join('multiarray', 'descriptor.c'), + join('multiarray', 'dlpack.c'), join('multiarray', 'dtypemeta.c'), join('multiarray', 'einsum.c.src'), join('multiarray', 'flagsobject.c'), diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 9fa87a11e..dc71fc5c9 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -829,7 +829,7 @@ defdict = { docstrings.get('numpy.core.umath.ceil'), None, TD('e', f='ceil', astype={'e': 'f'}), - TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]), + TD(inexactvec, dispatch=[('loops_unary_fp', 'fd')]), TD('fdg', f='ceil'), TD(O, f='npy_ObjectCeil'), ), diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index fbd323368..d12d62d8f 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -19,6 +19,7 @@ from code_generators.genapi import StealRef, NonNull multiarray_global_vars = { 'NPY_NUMUSERTYPES': (7, 'int'), 'NPY_DEFAULT_ASSIGN_CASTING': (292, 'NPY_CASTING'), + 'PyDataMem_DefaultHandler': (306, 'PyObject*'), } multiarray_scalar_bool_values = { @@ -76,9 +77,9 @@ multiarray_types_api = { # End 1.6 API } -#define NPY_NUMUSERTYPES (*(int *)PyArray_API[6]) -#define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[7]) -#define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[8]) +# define NPY_NUMUSERTYPES (*(int *)PyArray_API[6]) +# define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[7]) +# define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[8]) multiarray_funcs_api = { 'PyArray_GetNDArrayCVersion': (0,), @@ -350,6 +351,9 @@ multiarray_funcs_api = { 'PyArray_ResolveWritebackIfCopy': (302,), 'PyArray_SetWritebackIfCopyBase': (303,), # End 1.14 API + 'PyDataMem_SetHandler': (304,), + 'PyDataMem_GetHandler': (305,), + # End 1.21 API } ufunc_types_api = { diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 4e1182de6..cd584eea7 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -1420,7 +1420,7 @@ add_newdoc('numpy.core.umath', 'floor_divide', add_newdoc('numpy.core.umath', 'fmod', """ - Return the element-wise remainder of division. + Returns the element-wise remainder of division. This is the NumPy implementation of the C library function fmod, the remainder has the same sign as the dividend `x1`. It is equivalent to @@ -1678,7 +1678,7 @@ add_newdoc('numpy.core.umath', 'invert', add_newdoc('numpy.core.umath', 'isfinite', """ - Test element-wise for finiteness (not infinity or not Not a Number). + Test element-wise for finiteness (not infinity and not Not a Number). The result is returned as a boolean array. @@ -3065,8 +3065,14 @@ add_newdoc('numpy.core.umath', 'power', First array elements raised to powers from second array, element-wise. Raise each base in `x1` to the positionally-corresponding power in - `x2`. `x1` and `x2` must be broadcastable to the same shape. Note that an - integer type raised to a negative integer power will raise a ValueError. + `x2`. `x1` and `x2` must be broadcastable to the same shape. + + An integer type raised to a negative integer power will raise a + ``ValueError``. + + Negative values raised to a non-integral value will return ``nan``. + To get complex results, cast the input to complex, or specify the + ``dtype`` to be ``complex`` (see the example below). Parameters ---------- @@ -3121,6 +3127,21 @@ add_newdoc('numpy.core.umath', 'power', >>> x1 ** x2 array([ 0, 1, 8, 27, 16, 5]) + Negative values raised to a non-integral value will result in ``nan`` + (and a warning will be generated). + + >>> x3 = np.array([-1.0, -4.0]) + >>> with np.errstate(invalid='ignore'): + ... p = np.power(x3, 1.5) + ... + >>> p + array([nan, nan]) + + To get complex results, give the argument ``dtype=complex``. + + >>> np.power(x3, 1.5, dtype=complex) + array([-1.83697020e-16-1.j, -1.46957616e-15-8.j]) + """) add_newdoc('numpy.core.umath', 'float_power', @@ -3134,6 +3155,10 @@ add_newdoc('numpy.core.umath', 'float_power', inexact. The intent is that the function will return a usable result for negative powers and seldom overflow for positive powers. + Negative values raised to a non-integral value will return ``nan``. + To get complex results, cast the input to complex, or specify the + ``dtype`` to be ``complex`` (see the example below). + .. versionadded:: 1.12.0 Parameters @@ -3181,6 +3206,21 @@ add_newdoc('numpy.core.umath', 'float_power', array([[ 0., 1., 8., 27., 16., 5.], [ 0., 1., 8., 27., 16., 5.]]) + Negative values raised to a non-integral value will result in ``nan`` + (and a warning will be generated). + + >>> x3 = np.array([-1, -4]) + >>> with np.errstate(invalid='ignore'): + ... p = np.float_power(x3, 1.5) + ... + >>> p + array([nan, nan]) + + To get complex results, give the argument ``dtype=complex``. + + >>> np.float_power(x3, 1.5, dtype=complex) + array([-1.83697020e-16-1.j, -1.46957616e-15-8.j]) + """) add_newdoc('numpy.core.umath', 'radians', @@ -3292,7 +3332,7 @@ add_newdoc('numpy.core.umath', 'reciprocal', add_newdoc('numpy.core.umath', 'remainder', """ - Return element-wise remainder of division. + Returns the element-wise remainder of division. Computes the remainder complementary to the `floor_divide` function. It is equivalent to the Python modulus operator``x1 % x2`` and has the same sign @@ -3787,6 +3827,7 @@ add_newdoc('numpy.core.umath', 'sqrt', -------- lib.scimath.sqrt A version which returns complex numbers when given negative reals. + Note: 0.0 and -0.0 are handled differently for complex inputs. Notes ----- diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py index 0f7031bac..ab4a4d2be 100644 --- a/numpy/core/getlimits.py +++ b/numpy/core/getlimits.py @@ -5,13 +5,12 @@ __all__ = ['finfo', 'iinfo'] import warnings -from .machar import MachAr +from ._machar import MachAr from .overrides import set_module from . import numeric from . import numerictypes as ntypes from .numeric import array, inf, NaN from .umath import log10, exp2, nextafter, isnan -from . import umath def _fr0(a): @@ -386,6 +385,8 @@ class finfo: machar : MachAr The object which calculated these parameters and holds more detailed information. + + .. deprecated:: 1.22 machep : int The exponent that yields `eps`. max : floating point number of the appropriate type @@ -502,7 +503,7 @@ class finfo: self.eps = machar.eps.flat[0] self.nexp = machar.iexp self.nmant = machar.it - self.machar = machar + self._machar = machar self._str_tiny = machar._str_xmin.strip() self._str_max = machar._str_xmax.strip() self._str_epsneg = machar._str_epsneg.strip() @@ -552,11 +553,11 @@ class finfo: """ # This check is necessary because the value for smallest_normal is # platform dependent for longdouble types. - if isnan(self.machar.smallest_normal.flat[0]): + if isnan(self._machar.smallest_normal.flat[0]): warnings.warn( 'The value of smallest normal is undefined for double double', UserWarning, stacklevel=2) - return self.machar.smallest_normal.flat[0] + return self._machar.smallest_normal.flat[0] @property def tiny(self): @@ -575,6 +576,20 @@ class finfo: """ return self.smallest_normal + @property + def machar(self): + """The object which calculated these parameters and holds more + detailed information. + + .. deprecated:: 1.22 + """ + # Deprecated 2021-10-27, NumPy 1.22 + warnings.warn( + "`finfo.machar` is deprecated (NumPy 1.22)", + DeprecationWarning, stacklevel=2, + ) + return self._machar + @set_module('numpy') class iinfo: diff --git a/numpy/core/getlimits.pyi b/numpy/core/getlimits.pyi index ca22e18f7..66d062995 100644 --- a/numpy/core/getlimits.pyi +++ b/numpy/core/getlimits.pyi @@ -1,58 +1,8 @@ -from typing import Any, Generic, List, Type, TypeVar +from typing import List from numpy import ( finfo as finfo, iinfo as iinfo, - floating, - signedinteger, ) -from numpy.typing import NBitBase, NDArray - -_NBit = TypeVar("_NBit", bound=NBitBase) - __all__: List[str] - -class MachArLike(Generic[_NBit]): - def __init__( - self, - ftype: Type[floating[_NBit]], - *, - eps: floating[Any], - epsneg: floating[Any], - huge: floating[Any], - tiny: floating[Any], - ibeta: int, - smallest_subnormal: None | floating[Any] = ..., - # Expand `**kwargs` into keyword-only arguments - machep: int, - negep: int, - minexp: int, - maxexp: int, - it: int, - iexp: int, - irnd: int, - ngrd: int, - ) -> None: ... - @property - def smallest_subnormal(self) -> NDArray[floating[_NBit]]: ... - eps: NDArray[floating[_NBit]] - epsilon: NDArray[floating[_NBit]] - epsneg: NDArray[floating[_NBit]] - huge: NDArray[floating[_NBit]] - ibeta: signedinteger[_NBit] - iexp: int - irnd: int - it: int - machep: int - maxexp: int - minexp: int - negep: int - ngrd: int - precision: int - resolution: NDArray[floating[_NBit]] - smallest_normal: NDArray[floating[_NBit]] - tiny: NDArray[floating[_NBit]] - title: str - xmax: NDArray[floating[_NBit]] - xmin: NDArray[floating[_NBit]] diff --git a/numpy/core/include/numpy/experimental_dtype_api.h b/numpy/core/include/numpy/experimental_dtype_api.h index 22854a725..554c7fb6c 100644 --- a/numpy/core/include/numpy/experimental_dtype_api.h +++ b/numpy/core/include/numpy/experimental_dtype_api.h @@ -16,13 +16,47 @@ * in your module init. (A version mismatch will be reported, just update * to the correct one, this will alert you of possible changes.) * - * The two main symbols exported are: + * The following lists the main symbols currently exported. Please do not + * hesitate to ask for help or clarification: * - * - PyUFunc_AddLoopFromSpec (Register a new loop for a ufunc) - * - PyArrayInitDTypeMeta_FromSpec (Create a new DType) + * - PyUFunc_AddLoopFromSpec: * - * Please check the in-line documentation for details and do not hesitate to - * ask for help. + * Register a new loop for a ufunc. This uses the `PyArrayMethod_Spec` + * which must be filled in (see in-line comments). + * + * - PyUFunc_AddPromoter: + * + * Register a new promoter for a ufunc. A promoter is a function stored + * in a PyCapsule (see in-line comments). It is passed the operation and + * requested DType signatures and can mutate it to attempt a new search + * for a matching loop/promoter. + * I.e. for Numba a promoter could even add the desired loop. + * + * - PyArrayInitDTypeMeta_FromSpec: + * + * Initialize a new DType. It must currently be a static Python C type + * that is declared as `PyArray_DTypeMeta` and not `PyTypeObject`. + * Further, it must subclass `np.dtype` and set its type to + * `PyArrayDTypeMeta_Type` (before calling `PyType_Read()`). + * + * - PyArray_CommonDType: + * + * Find the common-dtype ("promotion") for two DType classes. Similar + * to `np.result_type`, but works on the classes and not instances. + * + * - PyArray_PromoteDTypeSequence: + * + * Same as CommonDType, but works with an arbitrary number of DTypes. + * This function is smarter and can often return successful and unambiguous + * results when `common_dtype(common_dtype(dt1, dt2), dt3)` would + * depend on the operation order or fail. Nevertheless, DTypes should + * aim to ensure that their common-dtype implementation is associative + * and commutative! (Mainly, unsigned and signed integers are not.) + * + * For guaranteed consistent results DTypes must implement common-Dtype + * "transitively". If A promotes B and B promotes C, than A must generally + * also promote C; where "promotes" means implements the promotion. + * (There are some exceptions for abstract DTypes) * * WARNING * ======= @@ -67,11 +101,28 @@ __not_imported(void) printf("*****\nCritical error, dtype API not imported\n*****\n"); } static void *__uninitialized_table[] = { + &__not_imported, &__not_imported, &__not_imported, &__not_imported, &__not_imported, &__not_imported, &__not_imported, &__not_imported}; static void **__experimental_dtype_api_table = __uninitialized_table; + +/* + * DTypeMeta struct, the content may be made fully opaque (except the size). + * We may also move everything into a single `void *dt_slots`. + */ +typedef struct { + PyHeapTypeObject super; + PyArray_Descr *singleton; + int type_num; + PyTypeObject *scalar_type; + npy_uint64 flags; + void *dt_slots; + void *reserved[3]; +} PyArray_DTypeMeta; + + /* * ****************************************************** * ArrayMethod API (Casting and UFuncs) @@ -128,6 +179,28 @@ typedef PyObject *_ufunc_addloop_fromspec_func( /* + * Type of the C promoter function, which must be wrapped into a + * PyCapsule with name "numpy._ufunc_promoter". + */ +typedef int promoter_function(PyObject *ufunc, + PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *new_op_dtypes[]); + +/* + * Function to register a promoter. + * + * @param ufunc The ufunc object to register the promoter with. + * @param DType_tuple A Python tuple containing DTypes or None matching the + * number of inputs and outputs of the ufunc. + * @param promoter A PyCapsule with name "numpy._ufunc_promoter" containing + * a pointer to a `promoter_function`. + */ +typedef int _ufunc_addpromoter_func( + PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter); +#define PyUFunc_AddPromoter \ + (*(_ufunc_addpromoter_func *)(__experimental_dtype_api_table[1])) + +/* * In addition to the normal casting levels, NPY_CAST_IS_VIEW indicates * that no cast operation is necessary at all (although a copy usually will be) * @@ -221,24 +294,8 @@ typedef struct{ } PyArrayDTypeMeta_Spec; -/* - * DTypeMeta struct, the content may be made fully opaque (except the size). - * We may also move everything into a single `void *dt_slots`. - */ -typedef struct { - PyHeapTypeObject super; - PyArray_Descr *singleton; - int type_num; - PyTypeObject *scalar_type; - npy_uint64 flags; - void *dt_slots; - void *reserved[3]; -} PyArray_DTypeMeta; - - #define PyArrayDTypeMeta_Type \ - (&(PyTypeObject *)__experimental_dtype_api_table[1]) - + (*(PyTypeObject *)__experimental_dtype_api_table[2]) typedef int __dtypemeta_fromspec( PyArray_DTypeMeta *DType, PyArrayDTypeMeta_Spec *dtype_spec); /* @@ -250,8 +307,25 @@ typedef int __dtypemeta_fromspec( * uses `PyArray_DTypeMeta` defined above as the C-structure. */ #define PyArrayInitDTypeMeta_FromSpec \ - ((__dtypemeta_fromspec *)(__experimental_dtype_api_table[2])) + ((__dtypemeta_fromspec *)(__experimental_dtype_api_table[3])) + + +/* + * ************************************* + * WORKING WITH DTYPES + * ************************************* + */ + +typedef PyArray_DTypeMeta *__common_dtype( + PyArray_DTypeMeta *DType1, PyArray_DTypeMeta *DType2); +#define PyArray_CommonDType \ + ((__common_dtype *)(__experimental_dtype_api_table[4])) + +typedef PyArray_DTypeMeta *__promote_dtype_sequence( + npy_intp num, PyArray_DTypeMeta *DTypes[]); +#define PyArray_PromoteDTypeSequence \ + ((__promote_dtype_sequence *)(__experimental_dtype_api_table[5])) /* @@ -264,7 +338,7 @@ typedef int __dtypemeta_fromspec( * runtime-check this. * You must call this function to use the symbols defined in this file. */ -#define __EXPERIMENTAL_DTYPE_VERSION 1 +#define __EXPERIMENTAL_DTYPE_VERSION 2 static int import_experimental_dtype_api(int version) diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 8d810fa64..6240adc0c 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -355,12 +355,10 @@ struct NpyAuxData_tag { #define NPY_ERR(str) fprintf(stderr, #str); fflush(stderr); #define NPY_ERR2(str) fprintf(stderr, str); fflush(stderr); - /* - * Macros to define how array, and dimension/strides data is - * allocated. - */ - - /* Data buffer - PyDataMem_NEW/FREE/RENEW are in multiarraymodule.c */ +/* +* Macros to define how array, and dimension/strides data is +* allocated. These should be made private +*/ #define NPY_USE_PYMEM 1 @@ -667,6 +665,29 @@ typedef struct _arr_descr { } PyArray_ArrayDescr; /* + * Memory handler structure for array data. + */ +/* The declaration of free differs from PyMemAllocatorEx */ +typedef struct { + void *ctx; + void* (*malloc) (void *ctx, size_t size); + void* (*calloc) (void *ctx, size_t nelem, size_t elsize); + void* (*realloc) (void *ctx, void *ptr, size_t new_size); + void (*free) (void *ctx, void *ptr, size_t size); + /* + * This is the end of the version=1 struct. Only add new fields after + * this line + */ +} PyDataMemAllocator; + +typedef struct { + char name[127]; /* multiple of 64 to keep the struct aligned */ + uint8_t version; /* currently 1 */ + PyDataMemAllocator allocator; +} PyDataMem_Handler; + + +/* * The main array object structure. * * It has been recommended to use the inline functions defined below @@ -716,6 +737,10 @@ typedef struct tagPyArrayObject_fields { /* For weak references */ PyObject *weakreflist; void *_buffer_info; /* private buffer info, tagged to allow warning */ + /* + * For malloc/calloc/realloc/free per object + */ + PyObject *mem_handler; } PyArrayObject_fields; /* @@ -843,7 +868,7 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *); /* * Always copy the array. Returned arrays are always CONTIGUOUS, - * ALIGNED, and WRITEABLE. + * ALIGNED, and WRITEABLE. See also: NPY_ARRAY_ENSURENOCOPY = 0x4000. * * This flag may be requested in constructor functions. */ @@ -913,6 +938,13 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *); #define NPY_ARRAY_WRITEBACKIFCOPY 0x2000 /* + * No copy may be made while converting from an object/array (result is a view) + * + * This flag may be requested in constructor functions. + */ +#define NPY_ARRAY_ENSURENOCOPY 0x4000 + +/* * NOTE: there are also internal flags defined in multiarray/arrayobject.h, * which start at bit 31 and work down. */ @@ -1659,6 +1691,12 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags) ((PyArrayObject_fields *)arr)->flags &= ~flags; } +static NPY_INLINE NPY_RETURNS_BORROWED_REF PyObject * +PyArray_HANDLER(PyArrayObject *arr) +{ + return ((PyArrayObject_fields *)arr)->mem_handler; +} + #define PyTypeNum_ISBOOL(type) ((type) == NPY_BOOL) #define PyTypeNum_ISUNSIGNED(type) (((type) == NPY_UBYTE) || \ diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 12a3e725a..88794ca07 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -356,14 +356,31 @@ typedef unsigned long npy_ulonglong; typedef unsigned char npy_bool; #define NPY_FALSE 0 #define NPY_TRUE 1 - - +/* + * `NPY_SIZEOF_LONGDOUBLE` isn't usually equal to sizeof(long double). + * In some certain cases, it may forced to be equal to sizeof(double) + * even against the compiler implementation and the same goes for + * `complex long double`. + * + * Therefore, avoid `long double`, use `npy_longdouble` instead, + * and when it comes to standard math functions make sure of using + * the double version when `NPY_SIZEOF_LONGDOUBLE` == `NPY_SIZEOF_DOUBLE`. + * For example: + * npy_longdouble *ptr, x; + * #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + * npy_longdouble r = modf(x, ptr); + * #else + * npy_longdouble r = modfl(x, ptr); + * #endif + * + * See https://github.com/numpy/numpy/issues/20348 + */ #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE - typedef double npy_longdouble; - #define NPY_LONGDOUBLE_FMT "g" + #define NPY_LONGDOUBLE_FMT "g" + typedef double npy_longdouble; #else - typedef long double npy_longdouble; - #define NPY_LONGDOUBLE_FMT "Lg" + #define NPY_LONGDOUBLE_FMT "Lg" + typedef long double npy_longdouble; #endif #ifndef Py_USING_UNICODE diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index b1e6363e3..bead0dc14 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -150,6 +150,17 @@ NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b); NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b); NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b); +NPY_INPLACE uint8_t npy_popcountuhh(npy_ubyte a); +NPY_INPLACE uint8_t npy_popcountuh(npy_ushort a); +NPY_INPLACE uint8_t npy_popcountu(npy_uint a); +NPY_INPLACE uint8_t npy_popcountul(npy_ulong a); +NPY_INPLACE uint8_t npy_popcountull(npy_ulonglong a); +NPY_INPLACE uint8_t npy_popcounthh(npy_byte a); +NPY_INPLACE uint8_t npy_popcounth(npy_short a); +NPY_INPLACE uint8_t npy_popcount(npy_int a); +NPY_INPLACE uint8_t npy_popcountl(npy_long a); +NPY_INPLACE uint8_t npy_popcountll(npy_longlong a); + /* * C99 double math funcs */ diff --git a/numpy/core/include/numpy/numpyconfig.h b/numpy/core/include/numpy/numpyconfig.h index b2ce66244..4eac083e7 100644 --- a/numpy/core/include/numpy/numpyconfig.h +++ b/numpy/core/include/numpy/numpyconfig.h @@ -19,6 +19,19 @@ #define NPY_SIZEOF_LONG 4 #define NPY_SIZEOF_PY_INTPTR_T 4 #endif + + #undef NPY_SIZEOF_LONGDOUBLE + #undef NPY_SIZEOF_COMPLEX_LONGDOUBLE + + #ifdef __x86_64 + #define NPY_SIZEOF_LONGDOUBLE 16 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32 + #elif defined(__arm64__) + #define NPY_SIZEOF_LONGDOUBLE 8 + #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 16 + #else + #error "unknown architecture" + #endif #endif /** @@ -43,6 +56,7 @@ #define NPY_1_19_API_VERSION 0x00000008 #define NPY_1_20_API_VERSION 0x0000000e #define NPY_1_21_API_VERSION 0x0000000e -#define NPY_1_22_API_VERSION 0x0000000e +#define NPY_1_22_API_VERSION 0x0000000f +#define NPY_1_23_API_VERSION 0x0000000f #endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_NUMPYCONFIG_H_ */ diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 3f184bd45..1d7050bbe 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -173,11 +173,8 @@ typedef struct _tagPyUFuncObject { * but this was never implemented. (This is also why the above * selector is called the "legacy" selector.) */ - #if PY_VERSION_HEX >= 0x03080000 vectorcallfunc vectorcall; - #else - void *reserved2; - #endif + /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ void *_always_null_previously_masked_innerloop_selector; diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 154df6f4d..f88d75978 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -14,8 +14,9 @@ from ._multiarray_umath import * # noqa: F403 # do not change them. issue gh-15518 # _get_ndarray_c_version is semi-public, on purpose not added to __all__ from ._multiarray_umath import ( - _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string, - _ARRAY_API, _monotonicity, _get_ndarray_c_version, _set_madvise_hugepage, + _fastCopyAndTranspose, _flagdict, _from_dlpack, _insert, _reconstruct, + _vec_string, _ARRAY_API, _monotonicity, _get_ndarray_c_version, + _set_madvise_hugepage, ) __all__ = [ @@ -23,29 +24,30 @@ __all__ = [ 'ITEM_HASOBJECT', 'ITEM_IS_POINTER', 'LIST_PICKLE', 'MAXDIMS', 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NEEDS_INIT', 'NEEDS_PYAPI', 'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP', '_fastCopyAndTranspose', - '_flagdict', '_insert', '_reconstruct', '_vec_string', '_monotonicity', - 'add_docstring', 'arange', 'array', 'asarray', 'asanyarray', - 'ascontiguousarray', 'asfortranarray', 'bincount', 'broadcast', - 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', + '_flagdict', '_from_dlpack', '_insert', '_reconstruct', '_vec_string', + '_monotonicity', 'add_docstring', 'arange', 'array', 'asarray', + 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'bincount', + 'broadcast', 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', 'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2', 'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data', 'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype', 'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat', - 'frombuffer', 'fromfile', 'fromiter', 'fromstring', 'inner', - 'interp', 'interp_complex', 'is_busday', 'lexsort', - 'matmul', 'may_share_memory', 'min_scalar_type', 'ndarray', 'nditer', - 'nested_iters', 'normalize_axis_index', 'packbits', - 'promote_types', 'putmask', 'ravel_multi_index', 'result_type', 'scalar', - 'set_datetimeparse_function', 'set_legacy_print_mode', 'set_numeric_ops', - 'set_string_function', 'set_typeDict', 'shares_memory', - 'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot', - 'where', 'zeros'] + 'frombuffer', 'fromfile', 'fromiter', 'fromstring', + 'get_handler_name', 'get_handler_version', 'inner', 'interp', + 'interp_complex', 'is_busday', 'lexsort', 'matmul', 'may_share_memory', + 'min_scalar_type', 'ndarray', 'nditer', 'nested_iters', + 'normalize_axis_index', 'packbits', 'promote_types', 'putmask', + 'ravel_multi_index', 'result_type', 'scalar', 'set_datetimeparse_function', + 'set_legacy_print_mode', 'set_numeric_ops', 'set_string_function', + 'set_typeDict', 'shares_memory', 'tracemalloc_domain', 'typeinfo', + 'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros'] # For backward compatibility, make sure pickle imports these functions from here _reconstruct.__module__ = 'numpy.core.multiarray' scalar.__module__ = 'numpy.core.multiarray' +_from_dlpack.__module__ = 'numpy' arange.__module__ = 'numpy' array.__module__ = 'numpy' asarray.__module__ = 'numpy' diff --git a/numpy/core/multiarray.pyi b/numpy/core/multiarray.pyi index 1f3792ecb..a9f68e181 100644 --- a/numpy/core/multiarray.pyi +++ b/numpy/core/multiarray.pyi @@ -50,6 +50,7 @@ from numpy import ( _ModeKind, _SupportsBuffer, _IOProtocol, + _CopyMode, _NDIterFlagsKind, _NDIterOpFlagsKind, ) @@ -177,7 +178,7 @@ def array( object: _ArrayType, dtype: None = ..., *, - copy: bool = ..., + copy: bool | _CopyMode = ..., order: _OrderKACF = ..., subok: L[True], ndmin: int = ..., @@ -188,7 +189,7 @@ def array( object: _ArrayLike[_SCT], dtype: None = ..., *, - copy: bool = ..., + copy: bool | _CopyMode = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., @@ -199,7 +200,7 @@ def array( object: object, dtype: None = ..., *, - copy: bool = ..., + copy: bool | _CopyMode = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., @@ -210,7 +211,7 @@ def array( object: Any, dtype: _DTypeLike[_SCT], *, - copy: bool = ..., + copy: bool | _CopyMode = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., @@ -221,7 +222,7 @@ def array( object: Any, dtype: DTypeLike, *, - copy: bool = ..., + copy: bool | _CopyMode = ..., order: _OrderKACF = ..., subok: bool = ..., ndmin: int = ..., diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index d8a0cf9a6..014fa0a39 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -13,8 +13,8 @@ from .multiarray import ( WRAP, arange, array, asarray, asanyarray, ascontiguousarray, asfortranarray, broadcast, can_cast, compare_chararrays, concatenate, copyto, dot, dtype, empty, - empty_like, flatiter, frombuffer, fromfile, fromiter, fromstring, - inner, lexsort, matmul, may_share_memory, + empty_like, flatiter, frombuffer, _from_dlpack, fromfile, fromiter, + fromstring, inner, lexsort, matmul, may_share_memory, min_scalar_type, ndarray, nditer, nested_iters, promote_types, putmask, result_type, set_numeric_ops, shares_memory, vdot, where, zeros, normalize_axis_index) @@ -41,7 +41,7 @@ __all__ = [ 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', 'arange', 'array', 'asarray', 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'zeros', 'count_nonzero', 'empty', 'broadcast', 'dtype', - 'fromstring', 'fromfile', 'frombuffer', 'where', + 'fromstring', 'fromfile', 'frombuffer', '_from_dlpack', 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type', 'result_type', 'isfortran', 'empty_like', 'zeros_like', 'ones_like', @@ -1184,7 +1184,7 @@ def roll(a, shift, axis=None): >>> np.roll(x, -2) array([2, 3, 4, 5, 6, 7, 8, 9, 0, 1]) - >>> x2 = np.reshape(x, (2,5)) + >>> x2 = np.reshape(x, (2, 5)) >>> x2 array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) @@ -1206,6 +1206,12 @@ def roll(a, shift, axis=None): >>> np.roll(x2, -1, axis=1) array([[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]]) + >>> np.roll(x2, (1, 1), axis=(1, 0)) + array([[9, 5, 6, 7, 8], + [4, 0, 1, 2, 3]]) + >>> np.roll(x2, (2, 1), axis=(1, 0)) + array([[8, 9, 5, 6, 7], + [3, 4, 0, 1, 2]]) """ a = asanyarray(a) @@ -1823,6 +1829,14 @@ def fromfunction(function, shape, *, dtype=float, like=None, **kwargs): Examples -------- + >>> np.fromfunction(lambda i, j: i, (2, 2), dtype=float) + array([[0., 0.], + [1., 1.]]) + + >>> np.fromfunction(lambda i, j: j, (2, 2), dtype=float) + array([[0., 1.], + [0., 1.]]) + >>> np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int) array([[ True, False, False], [False, True, False], diff --git a/numpy/core/numeric.pyi b/numpy/core/numeric.pyi index 54ab4b7c8..d7ec30351 100644 --- a/numpy/core/numeric.pyi +++ b/numpy/core/numeric.pyi @@ -1,6 +1,5 @@ from typing import ( Any, - Optional, Union, Sequence, Tuple, @@ -8,18 +7,64 @@ from typing import ( List, overload, TypeVar, - Iterable, Literal, + Type, + SupportsAbs, + SupportsIndex, + NoReturn, ) +from typing_extensions import TypeGuard -from numpy import ndarray, generic, dtype, bool_, signedinteger, _OrderKACF, _OrderCF -from numpy.typing import ArrayLike, DTypeLike, _ShapeLike +from numpy import ( + ComplexWarning as ComplexWarning, + dtype, + generic, + unsignedinteger, + signedinteger, + floating, + complexfloating, + bool_, + int_, + intp, + float64, + timedelta64, + object_, + _OrderKACF, + _OrderCF, +) + +from numpy.typing import ( + ArrayLike, + NDArray, + DTypeLike, + _ShapeLike, + _SupportsDType, + _FiniteNestedSequence, + _SupportsArray, + _ScalarLike_co, + _ArrayLikeBool_co, + _ArrayLikeUInt_co, + _ArrayLikeInt_co, + _ArrayLikeFloat_co, + _ArrayLikeComplex_co, + _ArrayLikeTD64_co, + _ArrayLikeObject_co, +) _T = TypeVar("_T") -_ArrayType = TypeVar("_ArrayType", bound=ndarray) +_SCT = TypeVar("_SCT", bound=generic) +_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any]) +_DTypeLike = Union[ + dtype[_SCT], + Type[_SCT], + _SupportsDType[dtype[_SCT]], +] +_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]] _CorrelateMode = Literal["valid", "same", "full"] +__all__: List[str] + @overload def zeros_like( a: _ArrayType, @@ -30,20 +75,61 @@ def zeros_like( ) -> _ArrayType: ... @overload def zeros_like( - a: ArrayLike, - dtype: DTypeLike = ..., + a: _ArrayLike[_SCT], + dtype: None = ..., order: _OrderKACF = ..., subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def zeros_like( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def zeros_like( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def zeros_like( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload def ones( shape: _ShapeLike, - dtype: DTypeLike = ..., + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def ones( + shape: _ShapeLike, + dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, like: ArrayLike = ..., -) -> ndarray: ... +) -> NDArray[_SCT]: ... +@overload +def ones( + shape: _ShapeLike, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... @overload def ones_like( @@ -55,21 +141,64 @@ def ones_like( ) -> _ArrayType: ... @overload def ones_like( - a: ArrayLike, - dtype: DTypeLike = ..., + a: _ArrayLike[_SCT], + dtype: None = ..., order: _OrderKACF = ..., subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def ones_like( + a: object, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def ones_like( + a: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def ones_like( + a: Any, + dtype: DTypeLike, + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload def full( shape: _ShapeLike, fill_value: Any, - dtype: DTypeLike = ..., + dtype: None = ..., + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... +@overload +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: _DTypeLike[_SCT], order: _OrderCF = ..., *, like: ArrayLike = ..., -) -> ndarray: ... +) -> NDArray[_SCT]: ... +@overload +def full( + shape: _ShapeLike, + fill_value: Any, + dtype: DTypeLike, + order: _OrderCF = ..., + *, + like: ArrayLike = ..., +) -> NDArray[Any]: ... @overload def full_like( @@ -82,13 +211,40 @@ def full_like( ) -> _ArrayType: ... @overload def full_like( - a: ArrayLike, + a: _ArrayLike[_SCT], fill_value: Any, - dtype: DTypeLike = ..., + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload +def full_like( + a: object, + fill_value: Any, + dtype: None = ..., + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... +@overload +def full_like( + a: Any, + fill_value: Any, + dtype: _DTypeLike[_SCT], + order: _OrderKACF = ..., + subok: bool = ..., + shape: None | _ShapeLike= ..., +) -> NDArray[_SCT]: ... +@overload +def full_like( + a: Any, + fill_value: Any, + dtype: DTypeLike, order: _OrderKACF = ..., subok: bool = ..., - shape: Optional[_ShapeLike] = ..., -) -> ndarray: ... + shape: None | _ShapeLike= ..., +) -> NDArray[Any]: ... @overload def count_nonzero( @@ -105,78 +261,306 @@ def count_nonzero( keepdims: bool = ..., ) -> Any: ... # TODO: np.intp or ndarray[np.intp] -def isfortran(a: Union[ndarray, generic]) -> bool: ... +def isfortran(a: NDArray[Any] | generic) -> bool: ... -def argwhere(a: ArrayLike) -> ndarray: ... +def argwhere(a: ArrayLike) -> NDArray[intp]: ... -def flatnonzero(a: ArrayLike) -> ndarray: ... +def flatnonzero(a: ArrayLike) -> NDArray[intp]: ... +@overload def correlate( - a: ArrayLike, - v: ArrayLike, + a: _ArrayLikeBool_co, + v: _ArrayLikeBool_co, + mode: _CorrelateMode = ..., +) -> NDArray[bool_]: ... +@overload +def correlate( + a: _ArrayLikeUInt_co, + v: _ArrayLikeUInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def correlate( + a: _ArrayLikeInt_co, + v: _ArrayLikeInt_co, mode: _CorrelateMode = ..., -) -> ndarray: ... +) -> NDArray[signedinteger[Any]]: ... +@overload +def correlate( + a: _ArrayLikeFloat_co, + v: _ArrayLikeFloat_co, + mode: _CorrelateMode = ..., +) -> NDArray[floating[Any]]: ... +@overload +def correlate( + a: _ArrayLikeComplex_co, + v: _ArrayLikeComplex_co, + mode: _CorrelateMode = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def correlate( + a: _ArrayLikeTD64_co, + v: _ArrayLikeTD64_co, + mode: _CorrelateMode = ..., +) -> NDArray[timedelta64]: ... +@overload +def correlate( + a: _ArrayLikeObject_co, + v: _ArrayLikeObject_co, + mode: _CorrelateMode = ..., +) -> NDArray[object_]: ... +@overload def convolve( - a: ArrayLike, - v: ArrayLike, + a: _ArrayLikeBool_co, + v: _ArrayLikeBool_co, mode: _CorrelateMode = ..., -) -> ndarray: ... +) -> NDArray[bool_]: ... +@overload +def convolve( + a: _ArrayLikeUInt_co, + v: _ArrayLikeUInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def convolve( + a: _ArrayLikeInt_co, + v: _ArrayLikeInt_co, + mode: _CorrelateMode = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def convolve( + a: _ArrayLikeFloat_co, + v: _ArrayLikeFloat_co, + mode: _CorrelateMode = ..., +) -> NDArray[floating[Any]]: ... +@overload +def convolve( + a: _ArrayLikeComplex_co, + v: _ArrayLikeComplex_co, + mode: _CorrelateMode = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def convolve( + a: _ArrayLikeTD64_co, + v: _ArrayLikeTD64_co, + mode: _CorrelateMode = ..., +) -> NDArray[timedelta64]: ... +@overload +def convolve( + a: _ArrayLikeObject_co, + v: _ArrayLikeObject_co, + mode: _CorrelateMode = ..., +) -> NDArray[object_]: ... @overload def outer( - a: ArrayLike, - b: ArrayLike, + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, out: None = ..., -) -> ndarray: ... +) -> NDArray[bool_]: ... @overload def outer( - a: ArrayLike, - b: ArrayLike, - out: _ArrayType = ..., + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + out: None = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def outer( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + out: None = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def outer( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + out: None = ..., +) -> NDArray[floating[Any]]: ... +@overload +def outer( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + out: None = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def outer( + a: _ArrayLikeTD64_co, + b: _ArrayLikeTD64_co, + out: None = ..., +) -> NDArray[timedelta64]: ... +@overload +def outer( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + out: None = ..., +) -> NDArray[object_]: ... +@overload +def outer( + a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + b: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co, + out: _ArrayType, ) -> _ArrayType: ... +@overload def tensordot( - a: ArrayLike, - b: ArrayLike, - axes: Union[int, Tuple[_ShapeLike, _ShapeLike]] = ..., -) -> ndarray: ... + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[bool_]: ... +@overload +def tensordot( + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[floating[Any]]: ... +@overload +def tensordot( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def tensordot( + a: _ArrayLikeTD64_co, + b: _ArrayLikeTD64_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[timedelta64]: ... +@overload +def tensordot( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + axes: int | Tuple[_ShapeLike, _ShapeLike] = ..., +) -> NDArray[object_]: ... +@overload +def roll( + a: _ArrayLike[_SCT], + shift: _ShapeLike, + axis: None | _ShapeLike = ..., +) -> NDArray[_SCT]: ... +@overload def roll( a: ArrayLike, shift: _ShapeLike, - axis: Optional[_ShapeLike] = ..., -) -> ndarray: ... + axis: None | _ShapeLike = ..., +) -> NDArray[Any]: ... -def rollaxis(a: ndarray, axis: int, start: int = ...) -> ndarray: ... +def rollaxis( + a: NDArray[_SCT], + axis: int, + start: int = ..., +) -> NDArray[_SCT]: ... def moveaxis( - a: ndarray, + a: NDArray[_SCT], source: _ShapeLike, destination: _ShapeLike, -) -> ndarray: ... +) -> NDArray[_SCT]: ... +@overload def cross( - a: ArrayLike, - b: ArrayLike, + a: _ArrayLikeBool_co, + b: _ArrayLikeBool_co, axisa: int = ..., axisb: int = ..., axisc: int = ..., - axis: Optional[int] = ..., -) -> ndarray: ... + axis: None | int = ..., +) -> NoReturn: ... +@overload +def cross( + a: _ArrayLikeUInt_co, + b: _ArrayLikeUInt_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[unsignedinteger[Any]]: ... +@overload +def cross( + a: _ArrayLikeInt_co, + b: _ArrayLikeInt_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[signedinteger[Any]]: ... +@overload +def cross( + a: _ArrayLikeFloat_co, + b: _ArrayLikeFloat_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[floating[Any]]: ... +@overload +def cross( + a: _ArrayLikeComplex_co, + b: _ArrayLikeComplex_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[complexfloating[Any, Any]]: ... +@overload +def cross( + a: _ArrayLikeObject_co, + b: _ArrayLikeObject_co, + axisa: int = ..., + axisb: int = ..., + axisc: int = ..., + axis: None | int = ..., +) -> NDArray[object_]: ... @overload def indices( dimensions: Sequence[int], - dtype: DTypeLike = ..., + dtype: Type[int] = ..., sparse: Literal[False] = ..., -) -> ndarray: ... +) -> NDArray[int_]: ... @overload def indices( dimensions: Sequence[int], - dtype: DTypeLike = ..., + dtype: Type[int] = ..., sparse: Literal[True] = ..., -) -> Tuple[ndarray, ...]: ... +) -> Tuple[NDArray[int_], ...]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: _DTypeLike[_SCT], + sparse: Literal[False] = ..., +) -> NDArray[_SCT]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: _DTypeLike[_SCT], + sparse: Literal[True], +) -> Tuple[NDArray[_SCT], ...]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: DTypeLike, + sparse: Literal[False] = ..., +) -> NDArray[Any]: ... +@overload +def indices( + dimensions: Sequence[int], + dtype: DTypeLike, + sparse: Literal[True], +) -> Tuple[NDArray[Any], ...]: ... def fromfunction( function: Callable[..., _T], @@ -187,18 +571,39 @@ def fromfunction( **kwargs: Any, ) -> _T: ... -def isscalar(element: Any) -> bool: ... +def isscalar(element: object) -> TypeGuard[ + generic | bool | int | float | complex | str | bytes | memoryview +]: ... -def binary_repr(num: int, width: Optional[int] = ...) -> str: ... +def binary_repr(num: int, width: None | int = ...) -> str: ... -def base_repr(number: int, base: int = ..., padding: int = ...) -> str: ... +def base_repr( + number: SupportsAbs[float], + base: float = ..., + padding: SupportsIndex = ..., +) -> str: ... +@overload def identity( n: int, - dtype: DTypeLike = ..., + dtype: None = ..., + *, + like: ArrayLike = ..., +) -> NDArray[float64]: ... +@overload +def identity( + n: int, + dtype: _DTypeLike[_SCT], + *, + like: ArrayLike = ..., +) -> NDArray[_SCT]: ... +@overload +def identity( + n: int, + dtype: DTypeLike, *, like: ArrayLike = ..., -) -> ndarray: ... +) -> NDArray[Any]: ... def allclose( a: ArrayLike, @@ -208,13 +613,22 @@ def allclose( equal_nan: bool = ..., ) -> bool: ... +@overload +def isclose( + a: _ScalarLike_co, + b: _ScalarLike_co, + rtol: float = ..., + atol: float = ..., + equal_nan: bool = ..., +) -> bool_: ... +@overload def isclose( a: ArrayLike, b: ArrayLike, rtol: float = ..., atol: float = ..., equal_nan: bool = ..., -) -> Any: ... +) -> NDArray[bool_]: ... def array_equal(a1: ArrayLike, a2: ArrayLike, equal_nan: bool = ...) -> bool: ... diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 12f424fd4..8e5de852b 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -80,12 +80,10 @@ Exported symbols include: """ import numbers -import warnings from numpy.core.multiarray import ( - typeinfo, ndarray, array, empty, dtype, datetime_data, - datetime_as_string, busday_offset, busday_count, is_busday, - busdaycalendar + ndarray, array, dtype, datetime_data, datetime_as_string, + busday_offset, busday_count, is_busday, busdaycalendar ) from numpy.core.overrides import set_module diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 2b0e33244..1ec178445 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -672,16 +672,38 @@ def configuration(parent_package='',top_path=None): # but we cannot use add_installed_pkg_config here either, so we only # update the substitution dictionary during npymath build config_cmd = config.get_config_cmd() - # Check that the toolchain works, to fail early if it doesn't # (avoid late errors with MATHLIB which are confusing if the # compiler does not work). - st = config_cmd.try_link('int main(void) { return 0;}') - if not st: - # rerun the failing command in verbose mode - config_cmd.compiler.verbose = True - config_cmd.try_link('int main(void) { return 0;}') - raise RuntimeError("Broken toolchain: cannot link a simple C program") + for lang, test_code, note in ( + ('c', 'int main(void) { return 0;}', ''), + ('c++', ( + 'int main(void)' + '{ auto x = 0.0; return static_cast<int>(x); }' + ), ( + 'note: A compiler with support for C++11 language ' + 'features is required.' + ) + ), + ): + is_cpp = lang == 'c++' + if is_cpp: + # this a workround to get rid of invalid c++ flags + # without doing big changes to config. + # c tested first, compiler should be here + bk_c = config_cmd.compiler + config_cmd.compiler = bk_c.cxx_compiler() + st = config_cmd.try_link(test_code, lang=lang) + if not st: + # rerun the failing command in verbose mode + config_cmd.compiler.verbose = True + config_cmd.try_link(test_code, lang=lang) + raise RuntimeError( + f"Broken toolchain: cannot link a simple {lang.upper()} " + f"program. {note}" + ) + if is_cpp: + config_cmd.compiler = bk_c mlibs = check_mathlib(config_cmd) posix_mlib = ' '.join(['-l%s' % l for l in mlibs]) @@ -696,16 +718,24 @@ def configuration(parent_package='',top_path=None): join('src', 'npymath', 'halffloat.c') ] - # Must be true for CRT compilers but not MinGW/cygwin. See gh-9977. - # Intel and Clang also don't seem happy with /GL - is_msvc = (platform.platform().startswith('Windows') and - platform.python_compiler().startswith('MS')) + def gl_if_msvc(build_cmd): + """ Add flag if we are using MSVC compiler + + We can't see this in our scope, because we have not initialized the + distutils build command, so use this deferred calculation to run when + we are building the library. + """ + if build_cmd.compiler.compiler_type == 'msvc': + # explicitly disable whole-program optimization + return ['/GL-'] + return [] + config.add_installed_library('npymath', sources=npymath_sources + [get_mathlib_info], install_dir='lib', build_info={ 'include_dirs' : [], # empty list required for creating npy_math_internal.h - 'extra_compiler_args' : (['/GL-'] if is_msvc else []), + 'extra_compiler_args': [gl_if_msvc], }) config.add_npy_pkg_config("npymath.ini.in", "lib/npy-pkg-config", subst_dict) @@ -732,6 +762,7 @@ def configuration(parent_package='',top_path=None): ####################################################################### common_deps = [ + join('src', 'common', 'dlpack', 'dlpack.h'), join('src', 'common', 'array_assign.h'), join('src', 'common', 'binop_override.h'), join('src', 'common', 'cblasfuncs.h'), @@ -741,6 +772,7 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'npy_cblas.h'), join('src', 'common', 'npy_config.h'), join('src', 'common', 'npy_ctypes.h'), + join('src', 'common', 'npy_dlpack.h'), join('src', 'common', 'npy_extint128.h'), join('src', 'common', 'npy_import.h'), join('src', 'common', 'npy_hashtable.h'), @@ -873,6 +905,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'datetime_busday.c'), join('src', 'multiarray', 'datetime_busdaycal.c'), join('src', 'multiarray', 'descriptor.c'), + join('src', 'multiarray', 'dlpack.c'), join('src', 'multiarray', 'dtypemeta.c'), join('src', 'multiarray', 'dragon4.c'), join('src', 'multiarray', 'dtype_transfer.c'), @@ -909,7 +942,7 @@ def configuration(parent_package='',top_path=None): join('src', 'npysort', 'mergesort.c.src'), join('src', 'npysort', 'timsort.c.src'), join('src', 'npysort', 'heapsort.c.src'), - join('src', 'npysort', 'radixsort.c.src'), + join('src', 'npysort', 'radixsort.cpp'), join('src', 'common', 'npy_partition.h.src'), join('src', 'npysort', 'selection.c.src'), join('src', 'common', 'npy_binsearch.h.src'), @@ -949,8 +982,8 @@ def configuration(parent_package='',top_path=None): join('src', 'umath', 'loops_exponent_log.dispatch.c.src'), join('src', 'umath', 'matmul.h.src'), join('src', 'umath', 'matmul.c.src'), - join('src', 'umath', 'clip.h.src'), - join('src', 'umath', 'clip.c.src'), + join('src', 'umath', 'clip.h'), + join('src', 'umath', 'clip.cpp'), join('src', 'umath', 'dispatching.c'), join('src', 'umath', 'legacy_array_method.c'), join('src', 'umath', 'ufunc_object.c'), @@ -980,6 +1013,9 @@ def configuration(parent_package='',top_path=None): svml_objs = glob.glob(svml_path + '/**/*.s', recursive=True) config.add_extension('_multiarray_umath', + # Forcing C language even though we have C++ sources. + # It forces the C linker and don't link C++ runtime. + language = 'c', sources=multiarray_src + umath_src + common_src + [generate_config_h, @@ -994,7 +1030,11 @@ def configuration(parent_package='',top_path=None): common_deps, libraries=['npymath'], extra_objects=svml_objs, - extra_info=extra_info) + extra_info=extra_info, + extra_cxx_compile_args=['-std=c++11', + '-D__STDC_VERSION__=0', + '-fno-exceptions', + '-fno-rtti']) ####################################################################### # umath_tests module # diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 85c8f16d1..772c87c96 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -43,8 +43,9 @@ C_ABI_VERSION = 0x01000009 # 0x0000000d - 1.19.x # 0x0000000e - 1.20.x # 0x0000000e - 1.21.x -# 0x0000000e - 1.22.x -C_API_VERSION = 0x0000000e +# 0x0000000f - 1.22.x +# 0x0000000f - 1.23.x +C_API_VERSION = 0x0000000f class MismatchCAPIWarning(Warning): pass diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index 54770959c..84de9a059 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -381,7 +381,7 @@ SIMD_IMPL_INTRIN_1(sumup_@sfx@, @esfx@, v@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square# + * #intrin = sqrt, recip, abs, square, ceil, trunc# */ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@) /**end repeat1**/ @@ -615,7 +615,7 @@ SIMD_INTRIN_DEF(sumup_@sfx@) ***************************/ #if @fp_only@ /**begin repeat1 - * #intrin = sqrt, recip, abs, square# + * #intrin = sqrt, recip, abs, square, ceil, trunc# */ SIMD_INTRIN_DEF(@intrin@_@sfx@) /**end repeat1**/ diff --git a/numpy/core/src/common/dlpack/dlpack.h b/numpy/core/src/common/dlpack/dlpack.h new file mode 100644 index 000000000..29209aee1 --- /dev/null +++ b/numpy/core/src/common/dlpack/dlpack.h @@ -0,0 +1,201 @@ +// Taken from: +// https://github.com/dmlc/dlpack/blob/9b6176fdecb55e9bf39b16f08b96913ed3f275b4/include/dlpack/dlpack.h +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 050 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include <stdint.h> +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex<float>: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The opaque data pointer points to the allocated data. This will be + * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always + * aligned to 256 bytes as in CUDA. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/numpy/core/src/common/npy_dlpack.h b/numpy/core/src/common/npy_dlpack.h new file mode 100644 index 000000000..14ca352c0 --- /dev/null +++ b/numpy/core/src/common/npy_dlpack.h @@ -0,0 +1,28 @@ +#include "Python.h" +#include "dlpack/dlpack.h" + +#ifndef NPY_DLPACK_H +#define NPY_DLPACK_H + +// Part of the Array API specification. +#define NPY_DLPACK_CAPSULE_NAME "dltensor" +#define NPY_DLPACK_USED_CAPSULE_NAME "used_dltensor" + +// Used internally by NumPy to store a base object +// as it has to release a reference to the original +// capsule. +#define NPY_DLPACK_INTERNAL_CAPSULE_NAME "numpy_dltensor" + +PyObject * +array_dlpack(PyArrayObject *self, PyObject *const *args, Py_ssize_t len_args, + PyObject *kwnames); + + +PyObject * +array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args)); + + +NPY_NO_EXPORT PyObject * +_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj); + +#endif diff --git a/numpy/core/src/common/npy_sort.h.src b/numpy/core/src/common/npy_sort.h.src index ddbde0c9b..b4a1e9b0c 100644 --- a/numpy/core/src/common/npy_sort.h.src +++ b/numpy/core/src/common/npy_sort.h.src @@ -49,9 +49,14 @@ NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void * * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, * longlong, ulonglong# */ - +#ifdef __cplusplus +extern "C" { +#endif NPY_NO_EXPORT int radixsort_@suff@(void *vec, npy_intp cnt, void *null); NPY_NO_EXPORT int aradixsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); +#ifdef __cplusplus +} +#endif /**end repeat**/ diff --git a/numpy/core/src/common/numpy_tag.h b/numpy/core/src/common/numpy_tag.h new file mode 100644 index 000000000..dc8d5286b --- /dev/null +++ b/numpy/core/src/common/numpy_tag.h @@ -0,0 +1,78 @@ +#ifndef _NPY_COMMON_TAG_H_ +#define _NPY_COMMON_TAG_H_ + +namespace npy { + +struct integral_tag { +}; +struct floating_point_tag { +}; +struct complex_tag { +}; +struct date_tag { +}; + +struct bool_tag : integral_tag { + using type = npy_bool; +}; +struct byte_tag : integral_tag { + using type = npy_byte; +}; +struct ubyte_tag : integral_tag { + using type = npy_ubyte; +}; +struct short_tag : integral_tag { + using type = npy_short; +}; +struct ushort_tag : integral_tag { + using type = npy_ushort; +}; +struct int_tag : integral_tag { + using type = npy_int; +}; +struct uint_tag : integral_tag { + using type = npy_uint; +}; +struct long_tag : integral_tag { + using type = npy_long; +}; +struct ulong_tag : integral_tag { + using type = npy_ulong; +}; +struct longlong_tag : integral_tag { + using type = npy_longlong; +}; +struct ulonglong_tag : integral_tag { + using type = npy_ulonglong; +}; +struct half_tag { + using type = npy_half; +}; +struct float_tag : floating_point_tag { + using type = npy_float; +}; +struct double_tag : floating_point_tag { + using type = npy_double; +}; +struct longdouble_tag : floating_point_tag { + using type = npy_longdouble; +}; +struct cfloat_tag : complex_tag { + using type = npy_cfloat; +}; +struct cdouble_tag : complex_tag { + using type = npy_cdouble; +}; +struct clongdouble_tag : complex_tag { + using type = npy_clongdouble; +}; +struct datetime_tag : date_tag { + using type = npy_datetime; +}; +struct timedelta_tag : date_tag { + using type = npy_timedelta; +}; + +} // namespace npy + +#endif diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h index 9460183df..ec15e50e1 100644 --- a/numpy/core/src/common/simd/avx2/math.h +++ b/numpy/core/src/common/simd/avx2/math.h @@ -105,4 +105,12 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b)); } +// ceil +#define npyv_ceil_f32 _mm256_ceil_ps +#define npyv_ceil_f64 _mm256_ceil_pd + +// trunc +#define npyv_trunc_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_ZERO) +#define npyv_trunc_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_ZERO) + #endif // _NPY_SIMD_AVX2_MATH_H diff --git a/numpy/core/src/common/simd/avx2/memory.h b/numpy/core/src/common/simd/avx2/memory.h index e27bf15fe..5891a270a 100644 --- a/numpy/core/src/common/simd/avx2/memory.h +++ b/numpy/core/src/common/simd/avx2/memory.h @@ -87,7 +87,7 @@ NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) #if 0 // slower NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) { - const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride); + const __m256i idx = npyv_set_s64(0, 1*stride, 2*stride, 3*stride); return _mm256_i64gather_epi64((const void*)ptr, idx, 8); } NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride) @@ -170,9 +170,9 @@ NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) { assert(nlane > 0); - const __m256i vfill = _mm256_set1_epi64x(fill); - const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); - __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + const __m256i vfill = npyv_setall_s64(fill); + const __m256i steps = npyv_set_s64(0, 1, 2, 3); + __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane); __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); __m256i payload = _mm256_maskload_epi64((const void*)ptr, mask); return _mm256_blendv_epi8(vfill, payload, mask); @@ -181,8 +181,8 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane) { assert(nlane > 0); - const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); - __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + const __m256i steps = npyv_set_s64(0, 1, 2, 3); + __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane); __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); return _mm256_maskload_epi64((const void*)ptr, mask); } @@ -211,10 +211,10 @@ NPY_FINLINE npyv_s64 npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) { assert(nlane > 0); - const __m256i vfill = _mm256_set1_epi64x(fill); - const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride); - const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); - __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane); + const __m256i vfill = npyv_setall_s64(fill); + const __m256i idx = npyv_set_s64(0, 1*stride, 2*stride, 3*stride); + const __m256i steps = npyv_set_s64(0, 1, 2, 3); + __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane); __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); return _mm256_mask_i64gather_epi64(vfill, (const void*)ptr, idx, mask, 8); } @@ -238,8 +238,8 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a) { assert(nlane > 0); - const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3); - __m256i vnlane = _mm256_set1_epi64x(nlane > 8 ? 8 : (int)nlane); + const __m256i steps = npyv_set_s64(0, 1, 2, 3); + __m256i vnlane = npyv_setall_s64(nlane > 8 ? 8 : (int)nlane); __m256i mask = _mm256_cmpgt_epi64(vnlane, steps); _mm256_maskstore_epi64((void*)ptr, mask, a); } diff --git a/numpy/core/src/common/simd/avx2/misc.h b/numpy/core/src/common/simd/avx2/misc.h index e96696dc9..5e91e91b3 100644 --- a/numpy/core/src/common/simd/avx2/misc.h +++ b/numpy/core/src/common/simd/avx2/misc.h @@ -24,11 +24,27 @@ #define npyv_setall_s16(VAL) _mm256_set1_epi16((short)VAL) #define npyv_setall_u32(VAL) _mm256_set1_epi32((int)VAL) #define npyv_setall_s32(VAL) _mm256_set1_epi32(VAL) -#define npyv_setall_u64(VAL) _mm256_set1_epi64x(VAL) -#define npyv_setall_s64(VAL) _mm256_set1_epi64x(VAL) #define npyv_setall_f32(VAL) _mm256_set1_ps(VAL) #define npyv_setall_f64(VAL) _mm256_set1_pd(VAL) +NPY_FINLINE __m256i npyv__setr_epi64(npy_int64, npy_int64, npy_int64, npy_int64); +NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a) +{ + npy_int64 ai = (npy_int64)a; +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64(ai, ai, ai, ai); +#else + return _mm256_set1_epi64x(ai); +#endif +} +NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a) +{ +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64(a, a, a, a); +#else + return _mm256_set1_epi64x(a); +#endif +} /* * vector with specific values set to each lane and * set a specific value to all remained lanes @@ -59,7 +75,14 @@ NPY_FINLINE __m256i npyv__setr_epi32(int i0, int i1, int i2, int i3, int i4, int } NPY_FINLINE __m256i npyv__setr_epi64(npy_int64 i0, npy_int64 i1, npy_int64 i2, npy_int64 i3) { +#if defined(_MSC_VER) && defined(_M_IX86) + return _mm256_setr_epi32( + (int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32), + (int)i2, (int)(i2 >> 32), (int)i3, (int)(i3 >> 32) + ); +#else return _mm256_setr_epi64x(i0, i1, i2, i3); +#endif } NPY_FINLINE __m256 npyv__setr_ps(float i0, float i1, float i2, float i3, float i4, float i5, diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h index 0141396d0..f30e50ad0 100644 --- a/numpy/core/src/common/simd/avx512/math.h +++ b/numpy/core/src/common/simd/avx512/math.h @@ -35,7 +35,7 @@ NPY_FINLINE npyv_f64 npyv_abs_f64(npyv_f64 a) return _mm512_range_pd(a, a, 8); #else return npyv_and_f64( - a, _mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffffLL)) + a, _mm512_castsi512_pd(npyv_setall_s64(0x7fffffffffffffffLL)) ); #endif } @@ -112,4 +112,12 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b) #define npyv_min_u64 _mm512_min_epu64 #define npyv_min_s64 _mm512_min_epi64 +// ceil +#define npyv_ceil_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_POS_INF) +#define npyv_ceil_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_POS_INF) + +// trunc +#define npyv_trunc_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_ZERO) +#define npyv_trunc_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_ZERO) + #endif // _NPY_SIMD_AVX512_MATH_H diff --git a/numpy/core/src/common/simd/avx512/memory.h b/numpy/core/src/common/simd/avx512/memory.h index bffd6e907..47095bf72 100644 --- a/numpy/core/src/common/simd/avx512/memory.h +++ b/numpy/core/src/common/simd/avx512/memory.h @@ -110,7 +110,7 @@ NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride) //// 64 NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride) { - const __m512i idx = _mm512_setr_epi64( + const __m512i idx = npyv_set_s64( 0*stride, 1*stride, 2*stride, 3*stride, 4*stride, 5*stride, 6*stride, 7*stride ); @@ -140,7 +140,7 @@ NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a) //// 64 NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a) { - const __m512i idx = _mm512_setr_epi64( + const __m512i idx = npyv_set_s64( 0*stride, 1*stride, 2*stride, 3*stride, 4*stride, 5*stride, 6*stride, 7*stride ); @@ -173,7 +173,7 @@ NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane) NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill) { assert(nlane > 0); - const __m512i vfill = _mm512_set1_epi64(fill); + const __m512i vfill = npyv_setall_s64(fill); const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; return _mm512_mask_loadu_epi64(vfill, mask, (const __m512i*)ptr); } @@ -210,11 +210,11 @@ NPY_FINLINE npyv_s64 npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill) { assert(nlane > 0); - const __m512i idx = _mm512_setr_epi64( + const __m512i idx = npyv_set_s64( 0*stride, 1*stride, 2*stride, 3*stride, 4*stride, 5*stride, 6*stride, 7*stride ); - const __m512i vfill = _mm512_set1_epi64(fill); + const __m512i vfill = npyv_setall_s64(fill); const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1; return _mm512_mask_i64gather_epi64(vfill, mask, idx, (const __m512i*)ptr, 8); } @@ -258,7 +258,7 @@ NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a) { assert(nlane > 0); - const __m512i idx = _mm512_setr_epi64( + const __m512i idx = npyv_set_s64( 0*stride, 1*stride, 2*stride, 3*stride, 4*stride, 5*stride, 6*stride, 7*stride ); diff --git a/numpy/core/src/common/simd/avx512/misc.h b/numpy/core/src/common/simd/avx512/misc.h index 4b6729b05..c3039ecfe 100644 --- a/numpy/core/src/common/simd/avx512/misc.h +++ b/numpy/core/src/common/simd/avx512/misc.h @@ -24,11 +24,30 @@ #define npyv_setall_s16(VAL) _mm512_set1_epi16((short)VAL) #define npyv_setall_u32(VAL) _mm512_set1_epi32((int)VAL) #define npyv_setall_s32(VAL) _mm512_set1_epi32(VAL) -#define npyv_setall_u64(VAL) _mm512_set1_epi64(VAL) -#define npyv_setall_s64(VAL) _mm512_set1_epi64(VAL) #define npyv_setall_f32(VAL) _mm512_set1_ps(VAL) #define npyv_setall_f64(VAL) _mm512_set1_pd(VAL) +NPY_FINLINE __m512i npyv__setr_epi64( + npy_int64, npy_int64, npy_int64, npy_int64, + npy_int64, npy_int64, npy_int64, npy_int64 +); +NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a) +{ + npy_int64 ai = (npy_int64)a; +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64(ai, ai, ai, ai, ai, ai, ai, ai); +#else + return _mm512_set1_epi64(ai); +#endif +} +NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a) +{ +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64(a, a, a, a, a, a, a, a); +#else + return _mm512_set1_epi64(a); +#endif +} /** * vector with specific values set to each lane and * set a specific value to all remained lanes @@ -76,7 +95,16 @@ NPY_FINLINE __m512i npyv__setr_epi32( NPY_FINLINE __m512i npyv__setr_epi64(npy_int64 i0, npy_int64 i1, npy_int64 i2, npy_int64 i3, npy_int64 i4, npy_int64 i5, npy_int64 i6, npy_int64 i7) { +#if defined(_MSC_VER) && defined(_M_IX86) + return _mm512_setr_epi32( + (int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32), + (int)i2, (int)(i2 >> 32), (int)i3, (int)(i3 >> 32), + (int)i4, (int)(i4 >> 32), (int)i5, (int)(i5 >> 32), + (int)i6, (int)(i6 >> 32), (int)i7, (int)(i7 >> 32) + ); +#else return _mm512_setr_epi64(i0, i1, i2, i3, i4, i5, i6, i7); +#endif } NPY_FINLINE __m512 npyv__setr_ps( diff --git a/numpy/core/src/common/simd/avx512/utils.h b/numpy/core/src/common/simd/avx512/utils.h index 8066283c6..c3079283f 100644 --- a/numpy/core/src/common/simd/avx512/utils.h +++ b/numpy/core/src/common/simd/avx512/utils.h @@ -26,7 +26,7 @@ #define npyv512_combine_ps256(A, B) _mm512_insertf32x8(_mm512_castps256_ps512(A), B, 1) #else #define npyv512_combine_ps256(A, B) \ - _mm512_castsi512_ps(npyv512_combine_si256(_mm512_castps_si512(A), _mm512_castps_si512(B))) + _mm512_castsi512_ps(npyv512_combine_si256(_mm256_castps_si256(A), _mm256_castps_si256(B))) #endif #define NPYV_IMPL_AVX512_FROM_AVX2_1ARG(FN_NAME, INTRIN) \ @@ -39,6 +39,26 @@ return npyv512_combine_si256(l_a, h_a); \ } +#define NPYV_IMPL_AVX512_FROM_AVX2_PS_1ARG(FN_NAME, INTRIN) \ + NPY_FINLINE __m512 FN_NAME(__m512 a) \ + { \ + __m256 l_a = npyv512_lower_ps256(a); \ + __m256 h_a = npyv512_higher_ps256(a); \ + l_a = INTRIN(l_a); \ + h_a = INTRIN(h_a); \ + return npyv512_combine_ps256(l_a, h_a); \ + } + +#define NPYV_IMPL_AVX512_FROM_AVX2_PD_1ARG(FN_NAME, INTRIN) \ + NPY_FINLINE __m512d FN_NAME(__m512d a) \ + { \ + __m256d l_a = npyv512_lower_pd256(a); \ + __m256d h_a = npyv512_higher_pd256(a); \ + l_a = INTRIN(l_a); \ + h_a = INTRIN(h_a); \ + return npyv512_combine_pd256(l_a, h_a); \ + } + #define NPYV_IMPL_AVX512_FROM_AVX2_2ARG(FN_NAME, INTRIN) \ NPY_FINLINE __m512i FN_NAME(__m512i a, __m512i b) \ { \ diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h index 5d2ab2906..a7a461721 100644 --- a/numpy/core/src/common/simd/intdiv.h +++ b/numpy/core/src/common/simd/intdiv.h @@ -162,11 +162,12 @@ NPY_FINLINE npy_uint64 npyv__divh128_u64(npy_uint64 high, npy_uint64 divisor) npy_uint32 divisor_hi = divisor >> 32; npy_uint32 divisor_lo = divisor & 0xFFFFFFFF; // compute high quotient digit - npy_uint32 quotient_hi = (npy_uint32)(high / divisor_hi); + npy_uint64 quotient_hi = high / divisor_hi; npy_uint64 remainder = high - divisor_hi * quotient_hi; npy_uint64 base32 = 1ULL << 32; while (quotient_hi >= base32 || quotient_hi*divisor_lo > base32*remainder) { - remainder += --divisor_hi; + --quotient_hi; + remainder += divisor_hi; if (remainder >= base32) { break; } @@ -200,7 +201,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d) default: l = npyv__bitscan_revnz_u32(d - 1) + 1; // ceil(log2(d)) l2 = (npy_uint8)(1 << l); // 2^l, overflow to 0 if l = 8 - m = ((l2 - d) << 8) / d + 1; // multiplier + m = ((npy_uint16)((l2 - d) << 8)) / d + 1; // multiplier sh1 = 1; sh2 = l - 1; // shift counts } npyv_u8x3 divisor; diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h index 19ea6f22f..19e5cd846 100644 --- a/numpy/core/src/common/simd/neon/math.h +++ b/numpy/core/src/common/simd/neon/math.h @@ -88,16 +88,16 @@ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a) #define npyv_max_f64 vmaxq_f64 // Maximum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #ifdef NPY_HAVE_ASIMD #define npyv_maxp_f32 vmaxnmq_f32 #else NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) - { + { npyv_u32 nn_a = vceqq_f32(a, a); npyv_u32 nn_b = vceqq_f32(b, b); return vmaxq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a)); - } + } #endif #if NPY_SIMD_F64 #define npyv_maxp_f64 vmaxnmq_f64 @@ -123,16 +123,16 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b) #define npyv_min_f64 vminq_f64 // Minimum, supports IEEE floating-point arithmetic (IEC 60559), // - If one of the two vectors contains NaN, the equivalent element of the other vector is set -// - Only if both corresponded elements are NaN, NaN is set. +// - Only if both corresponded elements are NaN, NaN is set. #ifdef NPY_HAVE_ASIMD #define npyv_minp_f32 vminnmq_f32 #else NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b) - { + { npyv_u32 nn_a = vceqq_f32(a, a); npyv_u32 nn_b = vceqq_f32(b, b); return vminq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a)); - } + } #endif #if NPY_SIMD_F64 #define npyv_minp_f64 vminnmq_f64 @@ -153,4 +153,74 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return vbslq_s64(npyv_cmplt_s64(a, b), a, b); } +// ceil +#ifdef NPY_HAVE_ASIMD + #define npyv_ceil_f32 vrndpq_f32 +#else + NPY_FINLINE npyv_f32 npyv_ceil_f32(npyv_f32 a) + { + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_u32 one = vreinterpretq_u32_f32(vdupq_n_f32(1.0f)); + const npyv_s32 max_int = vdupq_n_s32(0x7fffffff); + /** + * On armv7, vcvtq.f32 handles special cases as follows: + * NaN return 0 + * +inf or +outrange return 0x80000000(-0.0f) + * -inf or -outrange return 0x7fffffff(nan) + */ + npyv_s32 roundi = vcvtq_s32_f32(a); + npyv_f32 round = vcvtq_f32_s32(roundi); + npyv_f32 ceil = vaddq_f32(round, vreinterpretq_f32_u32( + vandq_u32(vcltq_f32(round, a), one)) + ); + // respect signed zero, e.g. -0.5 -> -0.0 + npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(ceil), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + // if nan or overflow return a + npyv_u32 nnan = npyv_notnan_f32(a); + npyv_u32 overflow = vorrq_u32( + vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int) + ); + return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a); + } +#endif +#if NPY_SIMD_F64 + #define npyv_ceil_f64 vrndpq_f64 +#endif // NPY_SIMD_F64 + +// trunc +#ifdef NPY_HAVE_ASIMD + #define npyv_trunc_f32 vrndq_f32 +#else + NPY_FINLINE npyv_f32 npyv_trunc_f32(npyv_f32 a) + { + const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f)); + const npyv_s32 max_int = vdupq_n_s32(0x7fffffff); + /** + * On armv7, vcvtq.f32 handles special cases as follows: + * NaN return 0 + * +inf or +outrange return 0x80000000(-0.0f) + * -inf or -outrange return 0x7fffffff(nan) + */ + npyv_s32 roundi = vcvtq_s32_f32(a); + npyv_f32 round = vcvtq_f32_s32(roundi); + // respect signed zero, e.g. -0.5 -> -0.0 + npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32( + vreinterpretq_s32_f32(round), + vandq_s32(vreinterpretq_s32_f32(a), szero) + )); + // if nan or overflow return a + npyv_u32 nnan = npyv_notnan_f32(a); + npyv_u32 overflow = vorrq_u32( + vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int) + ); + return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a); + } +#endif +#if NPY_SIMD_F64 + #define npyv_trunc_f64 vrndq_f64 +#endif // NPY_SIMD_F64 + #endif // _NPY_SIMD_NEON_MATH_H diff --git a/numpy/core/src/common/simd/simd.h b/numpy/core/src/common/simd/simd.h index a3e2b95de..08b2a7d00 100644 --- a/numpy/core/src/common/simd/simd.h +++ b/numpy/core/src/common/simd/simd.h @@ -27,6 +27,25 @@ typedef npy_int64 npyv_lanetype_s64; typedef float npyv_lanetype_f32; typedef double npyv_lanetype_f64; +#if defined(_MSC_VER) && defined(_M_IX86) +/* + * Avoid using any of the following intrinsics with MSVC 32-bit, + * even if they are apparently work on newer versions. + * They had bad impact on the generated instructions, + * sometimes the compiler deal with them without the respect + * of 32-bit mode which lead to crush due to execute 64-bit + * instructions and other times generate bad emulated instructions. + */ + #undef _mm512_set1_epi64 + #undef _mm256_set1_epi64x + #undef _mm_set1_epi64x + #undef _mm512_setr_epi64x + #undef _mm256_setr_epi64x + #undef _mm_setr_epi64x + #undef _mm512_set_epi64x + #undef _mm256_set_epi64x + #undef _mm_set_epi64x +#endif #if defined(NPY_HAVE_AVX512F) && !defined(NPY_SIMD_FORCE_256) && !defined(NPY_SIMD_FORCE_128) #include "avx512/avx512.h" #elif defined(NPY_HAVE_AVX2) && !defined(NPY_SIMD_FORCE_128) diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h index 97d35afc5..5daf7711e 100644 --- a/numpy/core/src/common/simd/sse/math.h +++ b/numpy/core/src/common/simd/sse/math.h @@ -143,4 +143,63 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b) return npyv_select_s64(npyv_cmplt_s64(a, b), a, b); } +// ceil +#ifdef NPY_HAVE_SSE41 + #define npyv_ceil_f32 _mm_ceil_ps + #define npyv_ceil_f64 _mm_ceil_pd +#else + NPY_FINLINE npyv_f32 npyv_ceil_f32(npyv_f32 a) + { + const npyv_f32 szero = _mm_set1_ps(-0.0f); + const npyv_f32 one = _mm_set1_ps(1.0f); + npyv_s32 roundi = _mm_cvttps_epi32(a); + npyv_f32 round = _mm_cvtepi32_ps(roundi); + npyv_f32 ceil = _mm_add_ps(round, _mm_and_ps(_mm_cmplt_ps(round, a), one)); + // respect signed zero, e.g. -0.5 -> -0.0 + npyv_f32 rzero = _mm_or_ps(ceil, _mm_and_ps(a, szero)); + // if overflow return a + return npyv_select_f32(_mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)), a, rzero); + } + NPY_FINLINE npyv_f64 npyv_ceil_f64(npyv_f64 a) + { + const npyv_f64 szero = _mm_set1_pd(-0.0); + const npyv_f64 one = _mm_set1_pd(1.0); + const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000); + npyv_f64 sign_two52 = _mm_or_pd(two_power_52, _mm_and_pd(a, szero)); + // round by add magic number 2^52 + npyv_f64 round = _mm_sub_pd(_mm_add_pd(a, sign_two52), sign_two52); + npyv_f64 ceil = _mm_add_pd(round, _mm_and_pd(_mm_cmplt_pd(round, a), one)); + // respect signed zero, e.g. -0.5 -> -0.0 + return _mm_or_pd(ceil, _mm_and_pd(a, szero)); + } +#endif + +// trunc +#ifdef NPY_HAVE_SSE41 + #define npyv_trunc_f32(A) _mm_round_ps(A, _MM_FROUND_TO_ZERO) + #define npyv_trunc_f64(A) _mm_round_pd(A, _MM_FROUND_TO_ZERO) +#else + NPY_FINLINE npyv_f32 npyv_trunc_f32(npyv_f32 a) + { + const npyv_f32 szero = _mm_set1_ps(-0.0f); + npyv_s32 roundi = _mm_cvttps_epi32(a); + npyv_f32 trunc = _mm_cvtepi32_ps(roundi); + // respect signed zero, e.g. -0.5 -> -0.0 + npyv_f32 rzero = _mm_or_ps(trunc, _mm_and_ps(a, szero)); + // if overflow return a + return npyv_select_f32(_mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)), a, rzero); + } + NPY_FINLINE npyv_f64 npyv_trunc_f64(npyv_f64 a) + { + const npyv_f64 szero = _mm_set1_pd(-0.0); + const npyv_f64 one = _mm_set1_pd(1.0); + const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000); + npyv_f64 abs_a = npyv_abs_f64(a); + // round by add magic number 2^52 + npyv_f64 abs_round = _mm_sub_pd(_mm_add_pd(abs_a, two_power_52), two_power_52); + npyv_f64 subtrahend = _mm_and_pd(_mm_cmpgt_pd(abs_round, abs_a), one); + return _mm_or_pd(_mm_sub_pd(abs_round, subtrahend), _mm_and_pd(a, szero)); + } +#endif + #endif // _NPY_SIMD_SSE_MATH_H diff --git a/numpy/core/src/common/simd/sse/misc.h b/numpy/core/src/common/simd/sse/misc.h index 1099c491d..7d13fbf55 100644 --- a/numpy/core/src/common/simd/sse/misc.h +++ b/numpy/core/src/common/simd/sse/misc.h @@ -24,11 +24,28 @@ #define npyv_setall_s16(VAL) _mm_set1_epi16((short)(VAL)) #define npyv_setall_u32(VAL) _mm_set1_epi32((int)(VAL)) #define npyv_setall_s32(VAL) _mm_set1_epi32((int)(VAL)) -#define npyv_setall_u64(VAL) _mm_set1_epi64x((npy_int64)(VAL)) -#define npyv_setall_s64(VAL) _mm_set1_epi64x((npy_int64)(VAL)) #define npyv_setall_f32 _mm_set1_ps #define npyv_setall_f64 _mm_set1_pd +NPY_FINLINE __m128i npyv__setr_epi64(npy_int64 i0, npy_int64 i1); + +NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a) +{ +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64((npy_int64)a, (npy_int64)a); +#else + return _mm_set1_epi64x((npy_int64)a); +#endif +} +NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a) +{ +#if defined(_MSC_VER) && defined(_M_IX86) + return npyv__setr_epi64(a, a); +#else + return _mm_set1_epi64x((npy_int64)a); +#endif +} + /** * vector with specific values set to each lane and * set a specific value to all remained lanes @@ -53,7 +70,11 @@ NPY_FINLINE __m128i npyv__setr_epi32(int i0, int i1, int i2, int i3) } NPY_FINLINE __m128i npyv__setr_epi64(npy_int64 i0, npy_int64 i1) { +#if defined(_MSC_VER) && defined(_M_IX86) + return _mm_setr_epi32((int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32)); +#else return _mm_set_epi64x(i1, i0); +#endif } NPY_FINLINE __m128 npyv__setr_ps(float i0, float i1, float i2, float i3) { diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h index b2e393c7c..d138cae8a 100644 --- a/numpy/core/src/common/simd/vsx/math.h +++ b/numpy/core/src/common/simd/vsx/math.h @@ -69,4 +69,12 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) #define npyv_min_u64 vec_min #define npyv_min_s64 vec_min +// ceil +#define npyv_ceil_f32 vec_ceil +#define npyv_ceil_f64 vec_ceil + +// trunc +#define npyv_trunc_f32 vec_trunc +#define npyv_trunc_f64 vec_trunc + #endif // _NPY_SIMD_VSX_MATH_H diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index e945d0771..9486b7cff 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -2193,7 +2193,7 @@ PrintFloat_Printf_g(PyObject *obj, int precision) } else if (PyArray_IsScalar(obj, LongDouble)) { npy_longdouble x = PyArrayScalar_VAL(obj, LongDouble); - PyOS_snprintf(str, sizeof(str), "%.*Lg", precision, x); + PyOS_snprintf(str, sizeof(str), "%.*" NPY_LONGDOUBLE_FMT, precision, x); } else{ double val = PyFloat_AsDouble(obj); @@ -2363,6 +2363,17 @@ run_intp_converter(PyObject* NPY_UNUSED(self), PyObject *args) return tup; } +/* used to test NPY_ARRAY_ENSURENOCOPY raises ValueError */ +static PyObject* +npy_ensurenocopy(PyObject* NPY_UNUSED(self), PyObject* args) +{ + int flags = NPY_ARRAY_ENSURENOCOPY; + if (!PyArray_CheckFromAny(args, NULL, 0, 0, flags, NULL)) { + return NULL; + } + Py_RETURN_NONE; +} + static PyMethodDef Multiarray_TestsMethods[] = { {"argparse_example_function", (PyCFunction)argparse_example_function, @@ -2424,6 +2435,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"npy_discard", npy_discard, METH_O, NULL}, + {"npy_ensurenocopy", + npy_ensurenocopy, + METH_O, NULL}, {"get_buffer_info", get_buffer_info, METH_VARARGS, NULL}, diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c index adb4ae128..94a7daa83 100644 --- a/numpy/core/src/multiarray/alloc.c +++ b/numpy/core/src/multiarray/alloc.c @@ -133,9 +133,10 @@ npy_alloc_cache(npy_uintp sz) /* zero initialized data, sz is number of bytes to allocate */ NPY_NO_EXPORT void * -npy_alloc_cache_zero(npy_uintp sz) +npy_alloc_cache_zero(size_t nmemb, size_t size) { void * p; + size_t sz = nmemb * size; NPY_BEGIN_THREADS_DEF; if (sz < NBUCKETS) { p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &PyDataMem_NEW); @@ -145,7 +146,7 @@ npy_alloc_cache_zero(npy_uintp sz) return p; } NPY_BEGIN_THREADS; - p = PyDataMem_NEW_ZEROED(sz, 1); + p = PyDataMem_NEW_ZEROED(nmemb, size); NPY_END_THREADS; return p; } @@ -185,10 +186,28 @@ npy_free_cache_dim(void * p, npy_uintp sz) &PyArray_free); } +/* Similar to array_dealloc in arrayobject.c */ +static NPY_INLINE void +WARN_NO_RETURN(PyObject* warning, const char * msg) { + if (PyErr_WarnEx(warning, msg, 1) < 0) { + PyObject * s; + + s = PyUnicode_FromString("PyDataMem_UserFREE"); + if (s) { + PyErr_WriteUnraisable(s); + Py_DECREF(s); + } + else { + PyErr_WriteUnraisable(Py_None); + } + } +} + + /* malloc/free/realloc hook */ -NPY_NO_EXPORT PyDataMem_EventHookFunc *_PyDataMem_eventhook; -NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data; +NPY_NO_EXPORT PyDataMem_EventHookFunc *_PyDataMem_eventhook = NULL; +NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data = NULL; /*NUMPY_API * Sets the allocation event hook for numpy array data. @@ -209,6 +228,8 @@ NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data; * operations that might cause new allocation events (such as the * creation/destruction numpy objects, or creating/destroying Python * objects which might cause a gc) + * + * Deprecated in 1.23 */ NPY_NO_EXPORT PyDataMem_EventHookFunc * PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook, @@ -217,6 +238,10 @@ PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook, PyDataMem_EventHookFunc *temp; NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API + /* 2021-11-18, 1.23 */ + WARN_NO_RETURN(PyExc_DeprecationWarning, + "PyDataMem_SetEventHook is deprecated, use tracemalloc " + "and the 'np.lib.tracemalloc_domain' domain"); temp = _PyDataMem_eventhook; _PyDataMem_eventhook = newhook; if (old_data != NULL) { @@ -254,21 +279,21 @@ PyDataMem_NEW(size_t size) * Allocates zeroed memory for array data. */ NPY_NO_EXPORT void * -PyDataMem_NEW_ZEROED(size_t size, size_t elsize) +PyDataMem_NEW_ZEROED(size_t nmemb, size_t size) { void *result; - result = calloc(size, elsize); + result = calloc(nmemb, size); if (_PyDataMem_eventhook != NULL) { NPY_ALLOW_C_API_DEF NPY_ALLOW_C_API if (_PyDataMem_eventhook != NULL) { - (*_PyDataMem_eventhook)(NULL, result, size * elsize, + (*_PyDataMem_eventhook)(NULL, result, nmemb * size, _PyDataMem_eventhook_user_data); } NPY_DISABLE_C_API } - PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size); + PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, nmemb * size); return result; } @@ -316,3 +341,325 @@ PyDataMem_RENEW(void *ptr, size_t size) } return result; } + +// The default data mem allocator malloc routine does not make use of a ctx. +// It should be called only through PyDataMem_UserNEW +// since itself does not handle eventhook and tracemalloc logic. +static NPY_INLINE void * +default_malloc(void *NPY_UNUSED(ctx), size_t size) +{ + return _npy_alloc_cache(size, 1, NBUCKETS, datacache, &malloc); +} + +// The default data mem allocator calloc routine does not make use of a ctx. +// It should be called only through PyDataMem_UserNEW_ZEROED +// since itself does not handle eventhook and tracemalloc logic. +static NPY_INLINE void * +default_calloc(void *NPY_UNUSED(ctx), size_t nelem, size_t elsize) +{ + void * p; + size_t sz = nelem * elsize; + NPY_BEGIN_THREADS_DEF; + if (sz < NBUCKETS) { + p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &malloc); + if (p) { + memset(p, 0, sz); + } + return p; + } + NPY_BEGIN_THREADS; + p = calloc(nelem, elsize); + NPY_END_THREADS; + return p; +} + +// The default data mem allocator realloc routine does not make use of a ctx. +// It should be called only through PyDataMem_UserRENEW +// since itself does not handle eventhook and tracemalloc logic. +static NPY_INLINE void * +default_realloc(void *NPY_UNUSED(ctx), void *ptr, size_t new_size) +{ + return realloc(ptr, new_size); +} + +// The default data mem allocator free routine does not make use of a ctx. +// It should be called only through PyDataMem_UserFREE +// since itself does not handle eventhook and tracemalloc logic. +static NPY_INLINE void +default_free(void *NPY_UNUSED(ctx), void *ptr, size_t size) +{ + _npy_free_cache(ptr, size, NBUCKETS, datacache, &free); +} + +/* Memory handler global default */ +PyDataMem_Handler default_handler = { + "default_allocator", + 1, + { + NULL, /* ctx */ + default_malloc, /* malloc */ + default_calloc, /* calloc */ + default_realloc, /* realloc */ + default_free /* free */ + } +}; +/* singleton capsule of the default handler */ +PyObject *PyDataMem_DefaultHandler; + +#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600) +PyObject *current_handler; +#endif + +int uo_index=0; /* user_override index */ + +/* Wrappers for the default or any user-assigned PyDataMem_Handler */ + +NPY_NO_EXPORT void * +PyDataMem_UserNEW(size_t size, PyObject *mem_handler) +{ + void *result; + PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + return NULL; + } + assert(size != 0); + result = handler->allocator.malloc(handler->allocator.ctx, size); + if (_PyDataMem_eventhook != NULL) { + NPY_ALLOW_C_API_DEF + NPY_ALLOW_C_API + if (_PyDataMem_eventhook != NULL) { + (*_PyDataMem_eventhook)(NULL, result, size, + _PyDataMem_eventhook_user_data); + } + NPY_DISABLE_C_API + } + PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size); + return result; +} + +NPY_NO_EXPORT void * +PyDataMem_UserNEW_ZEROED(size_t nmemb, size_t size, PyObject *mem_handler) +{ + void *result; + PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + return NULL; + } + result = handler->allocator.calloc(handler->allocator.ctx, nmemb, size); + if (_PyDataMem_eventhook != NULL) { + NPY_ALLOW_C_API_DEF + NPY_ALLOW_C_API + if (_PyDataMem_eventhook != NULL) { + (*_PyDataMem_eventhook)(NULL, result, nmemb * size, + _PyDataMem_eventhook_user_data); + } + NPY_DISABLE_C_API + } + PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, nmemb * size); + return result; +} + + +NPY_NO_EXPORT void +PyDataMem_UserFREE(void *ptr, size_t size, PyObject *mem_handler) +{ + PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + WARN_NO_RETURN(PyExc_RuntimeWarning, + "Could not get pointer to 'mem_handler' from PyCapsule"); + return; + } + PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr); + handler->allocator.free(handler->allocator.ctx, ptr, size); + if (_PyDataMem_eventhook != NULL) { + NPY_ALLOW_C_API_DEF + NPY_ALLOW_C_API + if (_PyDataMem_eventhook != NULL) { + (*_PyDataMem_eventhook)(ptr, NULL, 0, + _PyDataMem_eventhook_user_data); + } + NPY_DISABLE_C_API + } +} + +NPY_NO_EXPORT void * +PyDataMem_UserRENEW(void *ptr, size_t size, PyObject *mem_handler) +{ + void *result; + PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + return NULL; + } + + assert(size != 0); + result = handler->allocator.realloc(handler->allocator.ctx, ptr, size); + if (result != ptr) { + PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr); + } + PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size); + if (_PyDataMem_eventhook != NULL) { + NPY_ALLOW_C_API_DEF + NPY_ALLOW_C_API + if (_PyDataMem_eventhook != NULL) { + (*_PyDataMem_eventhook)(ptr, result, size, + _PyDataMem_eventhook_user_data); + } + NPY_DISABLE_C_API + } + return result; +} + +/*NUMPY_API + * Set a new allocation policy. If the input value is NULL, will reset + * the policy to the default. Return the previous policy, or + * return NULL if an error has occurred. We wrap the user-provided + * functions so they will still call the python and numpy + * memory management callback hooks. + */ +NPY_NO_EXPORT PyObject * +PyDataMem_SetHandler(PyObject *handler) +{ + PyObject *old_handler; +#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600) + PyObject *token; + if (PyContextVar_Get(current_handler, NULL, &old_handler)) { + return NULL; + } + if (handler == NULL) { + handler = PyDataMem_DefaultHandler; + } + token = PyContextVar_Set(current_handler, handler); + if (token == NULL) { + Py_DECREF(old_handler); + return NULL; + } + Py_DECREF(token); + return old_handler; +#else + PyObject *p; + p = PyThreadState_GetDict(); + if (p == NULL) { + return NULL; + } + old_handler = PyDict_GetItemString(p, "current_allocator"); + if (old_handler == NULL) { + old_handler = PyDataMem_DefaultHandler + } + Py_INCREF(old_handler); + if (handler == NULL) { + handler = PyDataMem_DefaultHandler; + } + const int error = PyDict_SetItemString(p, "current_allocator", handler); + if (error) { + Py_DECREF(old_handler); + return NULL; + } + return old_handler; +#endif +} + +/*NUMPY_API + * Return the policy that will be used to allocate data + * for the next PyArrayObject. On failure, return NULL. + */ +NPY_NO_EXPORT PyObject * +PyDataMem_GetHandler() +{ + PyObject *handler; +#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600) + if (PyContextVar_Get(current_handler, NULL, &handler)) { + return NULL; + } + return handler; +#else + PyObject *p = PyThreadState_GetDict(); + if (p == NULL) { + return NULL; + } + handler = PyDict_GetItemString(p, "current_allocator"); + if (handler == NULL) { + handler = PyCapsule_New(&default_handler, "mem_handler", NULL); + if (handler == NULL) { + return NULL; + } + } + else { + Py_INCREF(handler); + } + return handler; +#endif +} + +NPY_NO_EXPORT PyObject * +get_handler_name(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *arr=NULL; + if (!PyArg_ParseTuple(args, "|O:get_handler_name", &arr)) { + return NULL; + } + if (arr != NULL && !PyArray_Check(arr)) { + PyErr_SetString(PyExc_ValueError, "if supplied, argument must be an ndarray"); + return NULL; + } + PyObject *mem_handler; + PyDataMem_Handler *handler; + PyObject *name; + if (arr != NULL) { + mem_handler = PyArray_HANDLER((PyArrayObject *) arr); + if (mem_handler == NULL) { + Py_RETURN_NONE; + } + Py_INCREF(mem_handler); + } + else { + mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + return NULL; + } + } + handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + Py_DECREF(mem_handler); + return NULL; + } + name = PyUnicode_FromString(handler->name); + Py_DECREF(mem_handler); + return name; +} + +NPY_NO_EXPORT PyObject * +get_handler_version(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *arr=NULL; + if (!PyArg_ParseTuple(args, "|O:get_handler_version", &arr)) { + return NULL; + } + if (arr != NULL && !PyArray_Check(arr)) { + PyErr_SetString(PyExc_ValueError, "if supplied, argument must be an ndarray"); + return NULL; + } + PyObject *mem_handler; + PyDataMem_Handler *handler; + PyObject *version; + if (arr != NULL) { + mem_handler = PyArray_HANDLER((PyArrayObject *) arr); + if (mem_handler == NULL) { + Py_RETURN_NONE; + } + Py_INCREF(mem_handler); + } + else { + mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + return NULL; + } + } + handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + Py_DECREF(mem_handler); + return NULL; + } + version = PyLong_FromLong(handler->version); + Py_DECREF(mem_handler); + return version; +} diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h index 1259abca5..13c828458 100644 --- a/numpy/core/src/multiarray/alloc.h +++ b/numpy/core/src/multiarray/alloc.h @@ -11,13 +11,16 @@ NPY_NO_EXPORT PyObject * _set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj); NPY_NO_EXPORT void * -npy_alloc_cache(npy_uintp sz); +PyDataMem_UserNEW(npy_uintp sz, PyObject *mem_handler); NPY_NO_EXPORT void * -npy_alloc_cache_zero(npy_uintp sz); +PyDataMem_UserNEW_ZEROED(size_t nmemb, size_t size, PyObject *mem_handler); NPY_NO_EXPORT void -npy_free_cache(void * p, npy_uintp sd); +PyDataMem_UserFREE(void * p, npy_uintp sd, PyObject *mem_handler); + +NPY_NO_EXPORT void * +PyDataMem_UserRENEW(void *ptr, size_t size, PyObject *mem_handler); NPY_NO_EXPORT void * npy_alloc_cache_dim(npy_uintp sz); @@ -37,4 +40,14 @@ npy_free_cache_dim_array(PyArrayObject * arr) npy_free_cache_dim(PyArray_DIMS(arr), PyArray_NDIM(arr)); } +extern PyDataMem_Handler default_handler; +#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600) +extern PyObject *current_handler; /* PyContextVar/PyCapsule */ +#endif + +NPY_NO_EXPORT PyObject * +get_handler_name(PyObject *NPY_UNUSED(self), PyObject *obj); +NPY_NO_EXPORT PyObject * +get_handler_version(PyObject *NPY_UNUSED(self), PyObject *obj); + #endif /* NUMPY_CORE_SRC_MULTIARRAY_ALLOC_H_ */ diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 847bdafc3..2598e4bde 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -555,6 +555,7 @@ npy_new_coercion_cache( cache = PyMem_Malloc(sizeof(coercion_cache_obj)); } if (cache == NULL) { + Py_DECREF(arr_or_sequence); PyErr_NoMemory(); return -1; } @@ -857,6 +858,7 @@ PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype) * (Initially it is a pointer to the user-provided head pointer). * @param fixed_DType User provided fixed DType class * @param flags Discovery flags (reporting and behaviour flags, see def.) + * @param never_copy Specifies if a copy is allowed during array creation. * @return The updated number of maximum dimensions (i.e. scalars will set * this to the current dimensions). */ @@ -865,7 +867,8 @@ PyArray_DiscoverDTypeAndShape_Recursive( PyObject *obj, int curr_dims, int max_dims, PyArray_Descr**out_descr, npy_intp out_shape[NPY_MAXDIMS], coercion_cache_obj ***coercion_cache_tail_ptr, - PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags) + PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags, + int never_copy) { PyArrayObject *arr = NULL; PyObject *seq; @@ -923,7 +926,7 @@ PyArray_DiscoverDTypeAndShape_Recursive( requested_descr = *out_descr; } arr = (PyArrayObject *)_array_from_array_like(obj, - requested_descr, 0, NULL); + requested_descr, 0, NULL, never_copy); if (arr == NULL) { return -1; } @@ -1117,7 +1120,7 @@ PyArray_DiscoverDTypeAndShape_Recursive( max_dims = PyArray_DiscoverDTypeAndShape_Recursive( objects[i], curr_dims + 1, max_dims, out_descr, out_shape, coercion_cache_tail_ptr, fixed_DType, - flags); + flags, never_copy); if (max_dims < 0) { return -1; @@ -1157,6 +1160,7 @@ PyArray_DiscoverDTypeAndShape_Recursive( * The result may be unchanged (remain NULL) when converting a * sequence with no elements. In this case it is callers responsibility * to choose a default. + * @param never_copy Specifies that a copy is not allowed. * @return dimensions of the discovered object or -1 on error. * WARNING: If (and only if) the output is a single array, the ndim * returned _can_ exceed the maximum allowed number of dimensions. @@ -1169,7 +1173,7 @@ PyArray_DiscoverDTypeAndShape( npy_intp out_shape[NPY_MAXDIMS], coercion_cache_obj **coercion_cache, PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr, - PyArray_Descr **out_descr) + PyArray_Descr **out_descr, int never_copy) { coercion_cache_obj **coercion_cache_head = coercion_cache; *coercion_cache = NULL; @@ -1214,7 +1218,7 @@ PyArray_DiscoverDTypeAndShape( int ndim = PyArray_DiscoverDTypeAndShape_Recursive( obj, 0, max_dims, out_descr, out_shape, &coercion_cache, - fixed_DType, &flags); + fixed_DType, &flags, never_copy); if (ndim < 0) { goto fail; } @@ -1499,7 +1503,7 @@ _discover_array_parameters(PyObject *NPY_UNUSED(self), int ndim = PyArray_DiscoverDTypeAndShape( obj, NPY_MAXDIMS, shape, &coercion_cache, - fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype); + fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype, 0); Py_XDECREF(fixed_DType); Py_XDECREF(fixed_descriptor); if (ndim < 0) { diff --git a/numpy/core/src/multiarray/array_coercion.h b/numpy/core/src/multiarray/array_coercion.h index db0e479fe..f2482cecc 100644 --- a/numpy/core/src/multiarray/array_coercion.h +++ b/numpy/core/src/multiarray/array_coercion.h @@ -31,7 +31,7 @@ PyArray_DiscoverDTypeAndShape( npy_intp out_shape[NPY_MAXDIMS], coercion_cache_obj **coercion_cache, PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr, - PyArray_Descr **out_descr); + PyArray_Descr **out_descr, int never_copy); NPY_NO_EXPORT int PyArray_ExtractDTypeAndDescriptor(PyObject *dtype, diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c index 406b0c6ff..d93dac506 100644 --- a/numpy/core/src/multiarray/array_method.c +++ b/numpy/core/src/multiarray/array_method.c @@ -780,6 +780,13 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata) * This function wraps a regular unmasked strided-loop as a * masked strided-loop, only calling the function for elements * where the mask is True. + * + * TODO: Reductions also use this code to implement masked reductions. + * Before consolidating them, reductions had a special case for + * broadcasts: when the mask stride was 0 the code does not check all + * elements as `npy_memchr` currently does. + * It may be worthwhile to add such an optimization again if broadcasted + * masks are common enough. */ static int generic_masked_strided_loop(PyArrayMethod_Context *context, diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h index b29c7c077..7b7372bd0 100644 --- a/numpy/core/src/multiarray/array_method.h +++ b/numpy/core/src/multiarray/array_method.h @@ -21,6 +21,17 @@ typedef enum { NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2, /* Whether the method supports unaligned access (not runtime) */ NPY_METH_SUPPORTS_UNALIGNED = 1 << 3, + /* + * Private flag for now for *logic* functions. The logical functions + * `logical_or` and `logical_and` can always cast the inputs to booleans + * "safely" (because that is how the cast to bool is defined). + * @seberg: I am not sure this is the best way to handle this, so its + * private for now (also it is very limited anyway). + * There is one "exception". NA aware dtypes cannot cast to bool + * (hopefully), so the `??->?` loop should error even with this flag. + * But a second NA fallback loop will be necessary. + */ + _NPY_METH_FORCE_CAST_INPUTS = 1 << 17, /* All flags which can change at runtime */ NPY_METH_RUNTIME_FLAGS = ( diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 9b9df08f2..1b197d0f2 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -263,7 +263,7 @@ PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object) */ ndim = PyArray_DiscoverDTypeAndShape(src_object, PyArray_NDIM(dest), dims, &cache, - NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype); + NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype, 0); if (ndim < 0) { return -1; } @@ -493,7 +493,28 @@ array_dealloc(PyArrayObject *self) if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) { PyArray_XDECREF(self); } - npy_free_cache(fa->data, PyArray_NBYTES(self)); + /* + * Allocation will never be 0, see comment in ctors.c + * line 820 + */ + size_t nbytes = PyArray_NBYTES(self); + if (nbytes == 0) { + nbytes = fa->descr->elsize ? fa->descr->elsize : 1; + } + if (fa->mem_handler == NULL) { + char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY"); + if ((env != NULL) && (strncmp(env, "1", 1) == 0)) { + char const * msg = "Trying to dealloc data, but a memory policy " + "is not set. If you take ownership of the data, you must " + "set a base owning the data (e.g. a PyCapsule)."; + WARN_IN_DEALLOC(PyExc_RuntimeWarning, msg); + } + // Guess at malloc/free ??? + free(fa->data); + } else { + PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler); + Py_DECREF(fa->mem_handler); + } } /* must match allocation in PyArray_NewFromDescr */ @@ -1705,22 +1726,6 @@ array_iter(PyArrayObject *arr) return PySeqIter_New((PyObject *)arr); } -static PyObject * -array_alloc(PyTypeObject *type, Py_ssize_t NPY_UNUSED(nitems)) -{ - /* nitems will always be 0 */ - PyObject *obj = PyObject_Malloc(type->tp_basicsize); - PyObject_Init(obj, type); - return obj; -} - -static void -array_free(PyObject * v) -{ - /* avoid same deallocator as PyBaseObject, see gentype_free */ - PyObject_Free(v); -} - NPY_NO_EXPORT PyTypeObject PyArray_Type = { PyVarObject_HEAD_INIT(NULL, 0) @@ -1741,7 +1746,5 @@ NPY_NO_EXPORT PyTypeObject PyArray_Type = { .tp_iter = (getiterfunc)array_iter, .tp_methods = array_methods, .tp_getset = array_getsetlist, - .tp_alloc = (allocfunc)array_alloc, .tp_new = (newfunc)array_new, - .tp_free = (freefunc)array_free, }; diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 15782a91b..71808cc48 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -2759,10 +2759,10 @@ VOID_nonzero (char *ip, PyArrayObject *ap) dummy_fields.descr = new; if ((new->alignment > 1) && !__ALIGNED(ip + offset, new->alignment)) { - PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED); + PyArray_CLEARFLAGS(dummy_arr, NPY_ARRAY_ALIGNED); } else { - PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED); + PyArray_ENABLEFLAGS(dummy_arr, NPY_ARRAY_ALIGNED); } if (new->f->nonzero(ip+offset, dummy_arr)) { nonz = NPY_TRUE; @@ -3093,6 +3093,10 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) if (!PyArray_HASFIELDS(ap)) { return STRING_compare(ip1, ip2, ap); } + PyObject *mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + goto finish; + } descr = PyArray_DESCR(ap); /* * Compare on the first-field. If equal, then @@ -3107,15 +3111,19 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) if (_unpack_field(tup, &new, &offset) < 0) { goto finish; } - /* descr is the only field checked by compare or copyswap */ + /* Set the fields needed by compare or copyswap */ dummy_struct.descr = new; + swap = PyArray_ISBYTESWAPPED(dummy); nip1 = ip1 + offset; nip2 = ip2 + offset; if (swap || new->alignment > 1) { if (swap || !npy_is_aligned(nip1, new->alignment)) { - /* create buffer and copy */ - nip1 = npy_alloc_cache(new->elsize); + /* + * create temporary buffer and copy, + * always use the current handler for internal allocations + */ + nip1 = PyDataMem_UserNEW(new->elsize, mem_handler); if (nip1 == NULL) { goto finish; } @@ -3124,11 +3132,15 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) new->f->copyswap(nip1, NULL, swap, dummy); } if (swap || !npy_is_aligned(nip2, new->alignment)) { - /* create buffer and copy */ - nip2 = npy_alloc_cache(new->elsize); + /* + * create temporary buffer and copy, + * always use the current handler for internal allocations + */ + nip2 = PyDataMem_UserNEW(new->elsize, mem_handler); if (nip2 == NULL) { if (nip1 != ip1 + offset) { - npy_free_cache(nip1, new->elsize); + /* destroy temporary buffer */ + PyDataMem_UserFREE(nip1, new->elsize, mem_handler); } goto finish; } @@ -3140,10 +3152,12 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) res = new->f->compare(nip1, nip2, dummy); if (swap || new->alignment > 1) { if (nip1 != ip1 + offset) { - npy_free_cache(nip1, new->elsize); + /* destroy temporary buffer */ + PyDataMem_UserFREE(nip1, new->elsize, mem_handler); } if (nip2 != ip2 + offset) { - npy_free_cache(nip2, new->elsize); + /* destroy temporary buffer */ + PyDataMem_UserFREE(nip2, new->elsize, mem_handler); } } if (res != 0) { @@ -3152,6 +3166,7 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap) } finish: + Py_XDECREF(mem_handler); return res; } diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 82d34193d..aa95d285a 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -119,7 +119,7 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype) int ndim; ndim = PyArray_DiscoverDTypeAndShape( - obj, maxdims, shape, &cache, NULL, NULL, out_dtype); + obj, maxdims, shape, &cache, NULL, NULL, out_dtype, 0); if (ndim < 0) { return -1; } diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 9910fffe6..5853e068b 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1393,7 +1393,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args) { PyObject *obj; PyObject *str; - #if PY_VERSION_HEX >= 0x030700A2 && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300) + #if !defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300 const char *docstr; #else char *docstr; diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c index 6de764fb1..ef101a78b 100644 --- a/numpy/core/src/multiarray/conversion_utils.c +++ b/numpy/core/src/multiarray/conversion_utils.c @@ -163,6 +163,41 @@ PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq) return PyArray_IntpConverter(obj, seq); } +NPY_NO_EXPORT int +PyArray_CopyConverter(PyObject *obj, _PyArray_CopyMode *copymode) { + if (obj == Py_None) { + PyErr_SetString(PyExc_ValueError, + "NoneType copy mode not allowed."); + return NPY_FAIL; + } + + int int_copymode; + PyObject* numpy_CopyMode = NULL; + npy_cache_import("numpy", "_CopyMode", &numpy_CopyMode); + + if (numpy_CopyMode != NULL && (PyObject *)Py_TYPE(obj) == numpy_CopyMode) { + PyObject* mode_value = PyObject_GetAttrString(obj, "value"); + if (mode_value == NULL) { + return NPY_FAIL; + } + + int_copymode = (int)PyLong_AsLong(mode_value); + if (error_converting(int_copymode)) { + return NPY_FAIL; + } + } + else { + npy_bool bool_copymode; + if (!PyArray_BoolConverter(obj, &bool_copymode)) { + return NPY_FAIL; + } + int_copymode = (int)bool_copymode; + } + + *copymode = (_PyArray_CopyMode)int_copymode; + return NPY_SUCCEED; +} + /*NUMPY_API * Get buffer chunk from object * diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h index 89cf2ef27..4072841ee 100644 --- a/numpy/core/src/multiarray/conversion_utils.h +++ b/numpy/core/src/multiarray/conversion_utils.h @@ -9,6 +9,15 @@ PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq); NPY_NO_EXPORT int PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq); +typedef enum { + NPY_COPY_IF_NEEDED = 0, + NPY_COPY_ALWAYS = 1, + NPY_COPY_NEVER = 2, +} _PyArray_CopyMode; + +NPY_NO_EXPORT int +PyArray_CopyConverter(PyObject *obj, _PyArray_CopyMode *copyflag); + NPY_NO_EXPORT int PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf); diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index eeadad374..3135d6989 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -2119,7 +2119,7 @@ PyArray_ObjectType(PyObject *op, int minimum_type) * This function is only used in one place within NumPy and should * generally be avoided. It is provided mainly for backward compatibility. * - * The user of the function has to free the returned array. + * The user of the function has to free the returned array with PyDataMem_FREE. */ NPY_NO_EXPORT PyArrayObject ** PyArray_ConvertToCommonType(PyObject *op, int *retn) diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 9da75fb8a..b62426854 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -726,6 +726,7 @@ PyArray_NewFromDescr_int( fa->nd = nd; fa->dimensions = NULL; fa->data = NULL; + fa->mem_handler = NULL; if (data == NULL) { fa->flags = NPY_ARRAY_DEFAULT; @@ -805,12 +806,19 @@ PyArray_NewFromDescr_int( fa->flags |= NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_F_CONTIGUOUS; } + if (data == NULL) { + /* Store the handler in case the default is modified */ + fa->mem_handler = PyDataMem_GetHandler(); + if (fa->mem_handler == NULL) { + goto fail; + } /* * Allocate something even for zero-space arrays * e.g. shape=(0,) -- otherwise buffer exposure * (a.data) doesn't work as it should. * Could probably just allocate a few bytes here. -- Chuck + * Note: always sync this with calls to PyDataMem_UserFREE */ if (nbytes == 0) { nbytes = descr->elsize ? descr->elsize : 1; @@ -820,21 +828,23 @@ PyArray_NewFromDescr_int( * which could also be sub-fields of a VOID array */ if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) { - data = npy_alloc_cache_zero(nbytes); + data = PyDataMem_UserNEW_ZEROED(nbytes, 1, fa->mem_handler); } else { - data = npy_alloc_cache(nbytes); + data = PyDataMem_UserNEW(nbytes, fa->mem_handler); } if (data == NULL) { raise_memory_error(fa->nd, fa->dimensions, descr); goto fail; } + fa->flags |= NPY_ARRAY_OWNDATA; } else { + /* The handlers should never be called in this case */ + fa->mem_handler = NULL; /* - * If data is passed in, this object won't own it by default. - * Caller must arrange for this to be reset if truly desired + * If data is passed in, this object won't own it. */ fa->flags &= ~NPY_ARRAY_OWNDATA; } @@ -902,6 +912,7 @@ PyArray_NewFromDescr_int( return (PyObject *)fa; fail: + Py_XDECREF(fa->mem_handler); Py_DECREF(fa); return NULL; } @@ -1273,6 +1284,7 @@ fail: * DType may be used, but is not enforced. * @param writeable whether the result must be writeable. * @param context Unused parameter, must be NULL (should be removed later). + * @param never_copy Specifies that a copy is not allowed. * * @returns The array object, Py_NotImplemented if op is not array-like, * or NULL with an error set. (A new reference to Py_NotImplemented @@ -1280,7 +1292,8 @@ fail: */ NPY_NO_EXPORT PyObject * _array_from_array_like(PyObject *op, - PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context) { + PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context, + int never_copy) { PyObject* tmp; /* @@ -1336,7 +1349,7 @@ _array_from_array_like(PyObject *op, * this should be changed! */ if (!writeable && tmp == Py_NotImplemented) { - tmp = PyArray_FromArrayAttr(op, requested_dtype, context); + tmp = PyArray_FromArrayAttr_int(op, requested_dtype, never_copy); if (tmp == NULL) { return NULL; } @@ -1436,7 +1449,7 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s, } /* Try __array__ before using s as a sequence */ - PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL); + PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL, 0); if (tmp == NULL) { goto fail; } @@ -1564,7 +1577,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, Py_XDECREF(newtype); ndim = PyArray_DiscoverDTypeAndShape(op, - NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype); + NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype, + flags & NPY_ARRAY_ENSURENOCOPY); Py_XDECREF(fixed_descriptor); Py_XDECREF(fixed_DType); @@ -1689,7 +1703,17 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, ((PyVoidScalarObject *)op)->flags, NULL, op); } - else if (cache == 0 && newtype != NULL && + /* + * If we got this far, we definitely have to create a copy, since we are + * converting either from a scalar (cache == NULL) or a (nested) sequence. + */ + if (flags & NPY_ARRAY_ENSURENOCOPY ) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while creating an array."); + return NULL; + } + + if (cache == 0 && newtype != NULL && PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) { assert(ndim == 0); /* @@ -1790,7 +1814,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, * NPY_ARRAY_WRITEBACKIFCOPY, * NPY_ARRAY_FORCECAST, * NPY_ARRAY_ENSUREARRAY, - * NPY_ARRAY_ELEMENTSTRIDES + * NPY_ARRAY_ELEMENTSTRIDES, + * NPY_ARRAY_ENSURENOCOPY * * or'd (|) together * @@ -1851,9 +1876,15 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth, if (obj == NULL) { return NULL; } - if ((requires & NPY_ARRAY_ELEMENTSTRIDES) && - !PyArray_ElementStrides(obj)) { + + if ((requires & NPY_ARRAY_ELEMENTSTRIDES) + && !PyArray_ElementStrides(obj)) { PyObject *ret; + if (requires & NPY_ARRAY_ENSURENOCOPY) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while creating a new array."); + return NULL; + } ret = PyArray_NewCopy((PyArrayObject *)obj, NPY_ANYORDER); Py_DECREF(obj); obj = ret; @@ -1928,6 +1959,12 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) !PyArray_EquivTypes(oldtype, newtype); if (copy) { + if (flags & NPY_ARRAY_ENSURENOCOPY ) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while creating an array from given array."); + return NULL; + } + NPY_ORDER order = NPY_KEEPORDER; int subok = 1; @@ -2000,7 +2037,6 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags) if (flags & NPY_ARRAY_ENSUREARRAY) { subtype = &PyArray_Type; } - ret = (PyArrayObject *)PyArray_View(arr, NULL, subtype); if (ret == NULL) { return NULL; @@ -2425,18 +2461,30 @@ PyArray_FromInterface(PyObject *origin) return NULL; } -/*NUMPY_API + +/** + * Check for an __array__ attribute and call it when it exists. + * + * .. warning: + * If returned, `NotImplemented` is borrowed and must not be Decref'd + * + * @param op The Python object to convert to an array. + * @param descr The desired `arr.dtype`, passed into the `__array__` call, + * as information but is not checked/enforced! + * @param never_copy Specifies that a copy is not allowed. + * NOTE: Currently, this means an error is raised instead of calling + * `op.__array__()`. In the future we could call for example call + * `op.__array__(never_copy=True)` instead. + * @returns NotImplemented if `__array__` is not defined or a NumPy array + * (or subclass). On error, return NULL. */ NPY_NO_EXPORT PyObject * -PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context) +PyArray_FromArrayAttr_int( + PyObject *op, PyArray_Descr *descr, int never_copy) { PyObject *new; PyObject *array_meth; - if (context != NULL) { - PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL"); - return NULL; - } array_meth = PyArray_LookupSpecial_OnInstance(op, "__array__"); if (array_meth == NULL) { if (PyErr_Occurred()) { @@ -2452,6 +2500,16 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context) } return Py_NotImplemented; } + if (never_copy) { + /* Currently, we must always assume that `__array__` returns a copy */ + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while converting from an object " + "implementing the `__array__` protocol. NumPy cannot ensure " + "that no copy will be made."); + Py_DECREF(array_meth); + return NULL; + } + if (PyType_Check(op) && PyObject_HasAttrString(array_meth, "__get__")) { /* * If the input is a class `array_meth` may be a property-like object. @@ -2462,11 +2520,11 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context) Py_DECREF(array_meth); return Py_NotImplemented; } - if (typecode == NULL) { + if (descr == NULL) { new = PyObject_CallFunction(array_meth, NULL); } else { - new = PyObject_CallFunction(array_meth, "O", typecode); + new = PyObject_CallFunction(array_meth, "O", descr); } Py_DECREF(array_meth); if (new == NULL) { @@ -2482,6 +2540,21 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context) return new; } + +/*NUMPY_API + */ +NPY_NO_EXPORT PyObject * +PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context) +{ + if (context != NULL) { + PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL"); + return NULL; + } + + return PyArray_FromArrayAttr_int(op, typecode, 0); +} + + /*NUMPY_API * new reference -- accepts NULL for mintype */ @@ -3409,7 +3482,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre dptr += dtype->elsize; if (num < 0 && thisbuf == size) { totalbytes += bytes; - tmp = PyDataMem_RENEW(PyArray_DATA(r), totalbytes); + /* The handler is always valid */ + tmp = PyDataMem_UserRENEW(PyArray_DATA(r), totalbytes, + PyArray_HANDLER(r)); if (tmp == NULL) { err = 1; break; @@ -3431,7 +3506,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre const size_t nsize = PyArray_MAX(*nread,1)*dtype->elsize; if (nsize != 0) { - tmp = PyDataMem_RENEW(PyArray_DATA(r), nsize); + /* The handler is always valid */ + tmp = PyDataMem_UserRENEW(PyArray_DATA(r), nsize, + PyArray_HANDLER(r)); if (tmp == NULL) { err = 1; } @@ -3536,7 +3613,9 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep) const size_t nsize = PyArray_MAX(nread,1) * dtype->elsize; char *tmp; - if ((tmp = PyDataMem_RENEW(PyArray_DATA(ret), nsize)) == NULL) { + /* The handler is always valid */ + if((tmp = PyDataMem_UserRENEW(PyArray_DATA(ret), nsize, + PyArray_HANDLER(ret))) == NULL) { Py_DECREF(dtype); Py_DECREF(ret); return PyErr_NoMemory(); @@ -3820,7 +3899,9 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) */ elcount = (i >> 1) + (i < 4 ? 4 : 2) + i; if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) { - new_data = PyDataMem_RENEW(PyArray_DATA(ret), nbytes); + /* The handler is always valid */ + new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), nbytes, + PyArray_HANDLER(ret)); } else { new_data = NULL; @@ -3858,10 +3939,12 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count) * (assuming realloc is reasonably good about reusing space...) */ if (i == 0 || elsize == 0) { - /* The size cannot be zero for PyDataMem_RENEW. */ + /* The size cannot be zero for realloc. */ goto done; } - new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * elsize); + /* The handler is always valid */ + new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), i * elsize, + PyArray_HANDLER(ret)); if (new_data == NULL) { PyErr_SetString(PyExc_MemoryError, "cannot allocate array memory"); diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h index e59e86e8b..98160b1cc 100644 --- a/numpy/core/src/multiarray/ctors.h +++ b/numpy/core/src/multiarray/ctors.h @@ -32,7 +32,8 @@ PyArray_New( NPY_NO_EXPORT PyObject * _array_from_array_like(PyObject *op, - PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context); + PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context, + int never_copy); NPY_NO_EXPORT PyObject * PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth, @@ -52,6 +53,10 @@ NPY_NO_EXPORT PyObject * PyArray_FromInterface(PyObject *input); NPY_NO_EXPORT PyObject * +PyArray_FromArrayAttr_int( + PyObject *op, PyArray_Descr *descr, int never_copy); + +NPY_NO_EXPORT PyObject * PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context); diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 6a09f92ac..0c539053c 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1326,7 +1326,7 @@ _convert_from_dict(PyObject *obj, int align) goto fail; } /* If align is set, make sure the alignment divides into the size */ - if (align && itemsize % new->alignment != 0) { + if (align && new->alignment > 0 && itemsize % new->alignment != 0) { PyErr_Format(PyExc_ValueError, "NumPy dtype descriptor requires alignment of %d bytes, " "which is not divisible into the specified itemsize %d", @@ -2305,8 +2305,9 @@ arraydescr_new(PyTypeObject *subtype, { if (subtype != &PyArrayDescr_Type) { if (Py_TYPE(subtype) == &PyArrayDTypeMeta_Type && - !(PyType_GetFlags(Py_TYPE(subtype)) & Py_TPFLAGS_HEAPTYPE) && - (NPY_DT_SLOTS((PyArray_DTypeMeta *)subtype)) != NULL) { + (NPY_DT_SLOTS((PyArray_DTypeMeta *)subtype)) != NULL && + !NPY_DT_is_legacy((PyArray_DTypeMeta *)subtype) && + subtype->tp_new != PyArrayDescr_Type.tp_new) { /* * Appears to be a properly initialized user DType. Allocate * it and initialize the main part as best we can. @@ -2333,7 +2334,9 @@ arraydescr_new(PyTypeObject *subtype, } /* The DTypeMeta class should prevent this from happening. */ PyErr_Format(PyExc_SystemError, - "'%S' must not inherit np.dtype.__new__().", subtype); + "'%S' must not inherit np.dtype.__new__(). User DTypes should " + "currently call `PyArrayDescr_Type.tp_new` from their new.", + subtype); return NULL; } diff --git a/numpy/core/src/multiarray/dlpack.c b/numpy/core/src/multiarray/dlpack.c new file mode 100644 index 000000000..291e60a22 --- /dev/null +++ b/numpy/core/src/multiarray/dlpack.c @@ -0,0 +1,408 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include <dlpack/dlpack.h> + +#include "numpy/arrayobject.h" +#include "common/npy_argparse.h" + +#include "common/dlpack/dlpack.h" +#include "common/npy_dlpack.h" + +static void +array_dlpack_deleter(DLManagedTensor *self) +{ + PyArrayObject *array = (PyArrayObject *)self->manager_ctx; + // This will also free the strides as it's one allocation. + PyMem_Free(self->dl_tensor.shape); + PyMem_Free(self); + Py_XDECREF(array); +} + +/* This is exactly as mandated by dlpack */ +static void dlpack_capsule_deleter(PyObject *self) { + if (PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) { + return; + } + + /* an exception may be in-flight, we must save it in case we create another one */ + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME); + if (managed == NULL) { + PyErr_WriteUnraisable(self); + goto done; + } + /* + * the spec says the deleter can be NULL if there is no way for the caller + * to provide a reasonable destructor. + */ + if (managed->deleter) { + managed->deleter(managed); + /* TODO: is the deleter allowed to set a python exception? */ + assert(!PyErr_Occurred()); + } + +done: + PyErr_Restore(type, value, traceback); +} + +/* used internally, almost identical to dlpack_capsule_deleter() */ +static void array_dlpack_internal_capsule_deleter(PyObject *self) +{ + /* an exception may be in-flight, we must save it in case we create another one */ + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_INTERNAL_CAPSULE_NAME); + if (managed == NULL) { + PyErr_WriteUnraisable(self); + goto done; + } + /* + * the spec says the deleter can be NULL if there is no way for the caller + * to provide a reasonable destructor. + */ + if (managed->deleter) { + managed->deleter(managed); + /* TODO: is the deleter allowed to set a python exception? */ + assert(!PyErr_Occurred()); + } + +done: + PyErr_Restore(type, value, traceback); +} + + +// This function cannot return NULL, but it can fail, +// So call PyErr_Occurred to check if it failed after +// calling it. +static DLDevice +array_get_dl_device(PyArrayObject *self) { + DLDevice ret; + ret.device_type = kDLCPU; + ret.device_id = 0; + PyObject *base = PyArray_BASE(self); + // The outer if is due to the fact that NumPy arrays are on the CPU + // by default (if not created from DLPack). + if (PyCapsule_IsValid(base, NPY_DLPACK_INTERNAL_CAPSULE_NAME)) { + DLManagedTensor *managed = PyCapsule_GetPointer( + base, NPY_DLPACK_INTERNAL_CAPSULE_NAME); + if (managed == NULL) { + return ret; + } + return managed->dl_tensor.device; + } + return ret; +} + + +PyObject * +array_dlpack(PyArrayObject *self, + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) +{ + PyObject *stream = Py_None; + NPY_PREPARE_ARGPARSER; + if (npy_parse_arguments("__dlpack__", args, len_args, kwnames, + "$stream", NULL, &stream, NULL, NULL, NULL)) { + return NULL; + } + + if (stream != Py_None) { + PyErr_SetString(PyExc_RuntimeError, "NumPy only supports " + "stream=None."); + return NULL; + } + + if ( !(PyArray_FLAGS(self) & NPY_ARRAY_WRITEABLE)) { + PyErr_SetString(PyExc_TypeError, "NumPy currently only supports " + "dlpack for writeable arrays"); + return NULL; + } + + npy_intp itemsize = PyArray_ITEMSIZE(self); + int ndim = PyArray_NDIM(self); + npy_intp *strides = PyArray_STRIDES(self); + npy_intp *shape = PyArray_SHAPE(self); + + if (!PyArray_IS_C_CONTIGUOUS(self) && PyArray_SIZE(self) != 1) { + for (int i = 0; i < ndim; ++i) { + if (strides[i] % itemsize != 0) { + PyErr_SetString(PyExc_RuntimeError, + "DLPack only supports strides which are a multiple of " + "itemsize."); + return NULL; + } + } + } + + DLDataType managed_dtype; + PyArray_Descr *dtype = PyArray_DESCR(self); + + if (PyDataType_ISBYTESWAPPED(dtype)) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports native " + "byte swapping."); + return NULL; + } + + managed_dtype.bits = 8 * itemsize; + managed_dtype.lanes = 1; + + if (PyDataType_ISSIGNED(dtype)) { + managed_dtype.code = kDLInt; + } + else if (PyDataType_ISUNSIGNED(dtype)) { + managed_dtype.code = kDLUInt; + } + else if (PyDataType_ISFLOAT(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 8) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "floating point types without padding."); + return NULL; + } + managed_dtype.code = kDLFloat; + } + else if (PyDataType_ISCOMPLEX(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 16) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "complex point types without padding."); + return NULL; + } + managed_dtype.code = kDLComplex; + } + else { + PyErr_SetString(PyExc_TypeError, + "DLPack only supports signed/unsigned integers, float " + "and complex dtypes."); + return NULL; + } + + DLDevice device = array_get_dl_device(self); + if (PyErr_Occurred()) { + return NULL; + } + + DLManagedTensor *managed = PyMem_Malloc(sizeof(DLManagedTensor)); + if (managed == NULL) { + PyErr_NoMemory(); + return NULL; + } + + /* + * Note: the `dlpack.h` header suggests/standardizes that `data` must be + * 256-byte aligned. We ignore this intentionally, because `__dlpack__` + * standardizes that `byte_offset` must be 0 (for now) to not break pytorch: + * https://github.com/data-apis/array-api/issues/293#issuecomment-964111413 + * + * We further assume that exporting fully unaligned data is OK even without + * `byte_offset` since the standard does not reject it. + * Presumably, pytorch will support importing `byte_offset != 0` and NumPy + * can choose to use it starting about 2023. At that point, it may be + * that NumPy MUST use `byte_offset` to adhere to the standard (as + * specified in the header)! + */ + managed->dl_tensor.data = PyArray_DATA(self); + managed->dl_tensor.byte_offset = 0; + managed->dl_tensor.device = device; + managed->dl_tensor.dtype = managed_dtype; + + int64_t *managed_shape_strides = PyMem_Malloc(sizeof(int64_t) * ndim * 2); + if (managed_shape_strides == NULL) { + PyErr_NoMemory(); + PyMem_Free(managed); + return NULL; + } + + int64_t *managed_shape = managed_shape_strides; + int64_t *managed_strides = managed_shape_strides + ndim; + for (int i = 0; i < ndim; ++i) { + managed_shape[i] = shape[i]; + // Strides in DLPack are items; in NumPy are bytes. + managed_strides[i] = strides[i] / itemsize; + } + + managed->dl_tensor.ndim = ndim; + managed->dl_tensor.shape = managed_shape; + managed->dl_tensor.strides = NULL; + if (PyArray_SIZE(self) != 1 && !PyArray_IS_C_CONTIGUOUS(self)) { + managed->dl_tensor.strides = managed_strides; + } + managed->dl_tensor.byte_offset = 0; + managed->manager_ctx = self; + managed->deleter = array_dlpack_deleter; + + PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME, + dlpack_capsule_deleter); + if (capsule == NULL) { + PyMem_Free(managed); + PyMem_Free(managed_shape_strides); + return NULL; + } + + // the capsule holds a reference + Py_INCREF(self); + return capsule; +} + +PyObject * +array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args)) +{ + DLDevice device = array_get_dl_device(self); + if (PyErr_Occurred()) { + return NULL; + } + return Py_BuildValue("ii", device.device_type, device.device_id); +} + +NPY_NO_EXPORT PyObject * +_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj) { + PyObject *capsule = PyObject_CallMethod((PyObject *)obj->ob_type, + "__dlpack__", "O", obj); + if (capsule == NULL) { + return NULL; + } + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(capsule, + NPY_DLPACK_CAPSULE_NAME); + + if (managed == NULL) { + Py_DECREF(capsule); + return NULL; + } + + const int ndim = managed->dl_tensor.ndim; + if (ndim > NPY_MAXDIMS) { + PyErr_SetString(PyExc_RuntimeError, + "maxdims of DLPack tensor is higher than the supported " + "maxdims."); + Py_DECREF(capsule); + return NULL; + } + + DLDeviceType device_type = managed->dl_tensor.device.device_type; + if (device_type != kDLCPU && + device_type != kDLCUDAHost && + device_type != kDLROCMHost && + device_type != kDLCUDAManaged) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported device in DLTensor."); + Py_DECREF(capsule); + return NULL; + } + + if (managed->dl_tensor.dtype.lanes != 1) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported lanes in DLTensor dtype."); + Py_DECREF(capsule); + return NULL; + } + + int typenum = -1; + const uint8_t bits = managed->dl_tensor.dtype.bits; + const npy_intp itemsize = bits / 8; + switch (managed->dl_tensor.dtype.code) { + case kDLInt: + switch (bits) + { + case 8: typenum = NPY_INT8; break; + case 16: typenum = NPY_INT16; break; + case 32: typenum = NPY_INT32; break; + case 64: typenum = NPY_INT64; break; + } + break; + case kDLUInt: + switch (bits) + { + case 8: typenum = NPY_UINT8; break; + case 16: typenum = NPY_UINT16; break; + case 32: typenum = NPY_UINT32; break; + case 64: typenum = NPY_UINT64; break; + } + break; + case kDLFloat: + switch (bits) + { + case 16: typenum = NPY_FLOAT16; break; + case 32: typenum = NPY_FLOAT32; break; + case 64: typenum = NPY_FLOAT64; break; + } + break; + case kDLComplex: + switch (bits) + { + case 64: typenum = NPY_COMPLEX64; break; + case 128: typenum = NPY_COMPLEX128; break; + } + break; + } + + if (typenum == -1) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported dtype in DLTensor."); + Py_DECREF(capsule); + return NULL; + } + + npy_intp shape[NPY_MAXDIMS]; + npy_intp strides[NPY_MAXDIMS]; + + for (int i = 0; i < ndim; ++i) { + shape[i] = managed->dl_tensor.shape[i]; + // DLPack has elements as stride units, NumPy has bytes. + if (managed->dl_tensor.strides != NULL) { + strides[i] = managed->dl_tensor.strides[i] * itemsize; + } + } + + char *data = (char *)managed->dl_tensor.data + + managed->dl_tensor.byte_offset; + + PyArray_Descr *descr = PyArray_DescrFromType(typenum); + if (descr == NULL) { + Py_DECREF(capsule); + return NULL; + } + + PyObject *ret = PyArray_NewFromDescr(&PyArray_Type, descr, ndim, shape, + managed->dl_tensor.strides != NULL ? strides : NULL, data, 0, NULL); + if (ret == NULL) { + Py_DECREF(capsule); + return NULL; + } + + PyObject *new_capsule = PyCapsule_New(managed, + NPY_DLPACK_INTERNAL_CAPSULE_NAME, + array_dlpack_internal_capsule_deleter); + if (new_capsule == NULL) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + if (PyArray_SetBaseObject((PyArrayObject *)ret, new_capsule) < 0) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + if (PyCapsule_SetName(capsule, NPY_DLPACK_USED_CAPSULE_NAME) < 0) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + Py_DECREF(capsule); + return ret; +} + + diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h index 05e9e2394..2a61fe39d 100644 --- a/numpy/core/src/multiarray/dtypemeta.h +++ b/numpy/core/src/multiarray/dtypemeta.h @@ -74,9 +74,9 @@ typedef struct { #define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr)) #define NPY_DT_SLOTS(dtype) ((NPY_DType_Slots *)(dtype)->dt_slots) -#define NPY_DT_is_legacy(dtype) ((dtype)->flags & NPY_DT_LEGACY) -#define NPY_DT_is_abstract(dtype) ((dtype)->flags & NPY_DT_ABSTRACT) -#define NPY_DT_is_parametric(dtype) ((dtype)->flags & NPY_DT_PARAMETRIC) +#define NPY_DT_is_legacy(dtype) (((dtype)->flags & NPY_DT_LEGACY) != 0) +#define NPY_DT_is_abstract(dtype) (((dtype)->flags & NPY_DT_ABSTRACT) != 0) +#define NPY_DT_is_parametric(dtype) (((dtype)->flags & NPY_DT_PARAMETRIC) != 0) /* * Macros for convenient classmethod calls, since these require diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src index 29ceabd71..3114a5896 100644 --- a/numpy/core/src/multiarray/einsum_sumprod.c.src +++ b/numpy/core/src/multiarray/einsum_sumprod.c.src @@ -337,13 +337,13 @@ static NPY_GCC_OPT_3 void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ b@i@ = @from@(data[@i@]); - const @type@ c@i@ = @from@(data_out[@i@]); + const @temptype@ b@i@ = @from@(data[@i@]); + const @temptype@ c@i@ = @from@(data_out[@i@]); /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ abc@i@ = scalar * b@i@ + c@i@; + const @temptype@ abc@i@ = scalar * b@i@ + c@i@; /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# @@ -353,8 +353,8 @@ static NPY_GCC_OPT_3 void } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data, ++data_out) { - const @type@ b = @from@(*data); - const @type@ c = @from@(*data_out); + const @temptype@ b = @from@(*data); + const @temptype@ c = @from@(*data_out); *data_out = @to@(scalar * b + c); } #endif // NPYV check for @type@ @@ -417,14 +417,14 @@ static void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ a@i@ = @from@(data0[@i@]); - const @type@ b@i@ = @from@(data1[@i@]); - const @type@ c@i@ = @from@(data_out[@i@]); + const @temptype@ a@i@ = @from@(data0[@i@]); + const @temptype@ b@i@ = @from@(data1[@i@]); + const @temptype@ c@i@ = @from@(data_out[@i@]); /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ abc@i@ = a@i@ * b@i@ + c@i@; + const @temptype@ abc@i@ = a@i@ * b@i@ + c@i@; /**end repeat2**/ /**begin repeat2 * #i = 0, 1, 2, 3# @@ -434,9 +434,9 @@ static void } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data0, ++data1, ++data_out) { - const @type@ a = @from@(*data0); - const @type@ b = @from@(*data1); - const @type@ c = @from@(*data_out); + const @temptype@ a = @from@(*data0); + const @temptype@ b = @from@(*data1); + const @temptype@ c = @from@(*data_out); *data_out = @to@(a * b + c); } #endif // NPYV check for @type@ @@ -521,14 +521,14 @@ static NPY_GCC_OPT_3 void /**begin repeat2 * #i = 0, 1, 2, 3# */ - const @type@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]); + const @temptype@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]); /**end repeat2**/ accum += ab0 + ab1 + ab2 + ab3; } #endif // !NPY_DISABLE_OPTIMIZATION for (; count > 0; --count, ++data0, ++data1) { - const @type@ a = @from@(*data0); - const @type@ b = @from@(*data1); + const @temptype@ a = @from@(*data0); + const @temptype@ b = @from@(*data1); accum += a * b; } #endif // NPYV check for @type@ diff --git a/numpy/core/src/multiarray/experimental_public_dtype_api.c b/numpy/core/src/multiarray/experimental_public_dtype_api.c index 1e8abe9d6..4b9c7199b 100644 --- a/numpy/core/src/multiarray/experimental_public_dtype_api.c +++ b/numpy/core/src/multiarray/experimental_public_dtype_api.c @@ -13,9 +13,10 @@ #include "dtypemeta.h" #include "array_coercion.h" #include "convert_datatype.h" +#include "common_dtype.h" -#define EXPERIMENTAL_DTYPE_API_VERSION 1 +#define EXPERIMENTAL_DTYPE_API_VERSION 2 typedef struct{ @@ -130,6 +131,14 @@ PyArrayInitDTypeMeta_FromSpec( return -1; } + if (((PyTypeObject *)DType)->tp_repr == PyArrayDescr_Type.tp_repr + || ((PyTypeObject *)DType)->tp_str == PyArrayDescr_Type.tp_str) { + PyErr_SetString(PyExc_TypeError, + "A custom DType must implement `__repr__` and `__str__` since " + "the default inherited version (currently) fails."); + return -1; + } + if (spec->typeobj == NULL || !PyType_Check(spec->typeobj)) { PyErr_SetString(PyExc_TypeError, "Not giving a type object is currently not supported, but " @@ -324,13 +333,41 @@ PyUFunc_AddLoopFromSpec(PyObject *ufunc, PyArrayMethod_Spec *spec) } +static int +PyUFunc_AddPromoter( + PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter) +{ + if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) { + PyErr_SetString(PyExc_TypeError, + "ufunc object passed is not a ufunc!"); + return -1; + } + if (!PyCapsule_CheckExact(promoter)) { + PyErr_SetString(PyExc_TypeError, + "promoter must (currently) be a PyCapsule."); + return -1; + } + if (PyCapsule_GetPointer(promoter, "numpy._ufunc_promoter") == NULL) { + return -1; + } + PyObject *info = PyTuple_Pack(2, DType_tuple, promoter); + if (info == NULL) { + return -1; + } + return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0); +} + + NPY_NO_EXPORT PyObject * _get_experimental_dtype_api(PyObject *NPY_UNUSED(mod), PyObject *arg) { static void *experimental_api_table[] = { &PyUFunc_AddLoopFromSpec, + &PyUFunc_AddPromoter, &PyArrayDTypeMeta_Type, &PyArrayInitDTypeMeta_FromSpec, + &PyArray_CommonDType, + &PyArray_PromoteDTypeSequence, NULL, }; diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 2c8d1b3b4..e81ca2947 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -384,7 +384,23 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored)) } if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) { PyArray_XDECREF(self); - PyDataMem_FREE(PyArray_DATA(self)); + size_t nbytes = PyArray_NBYTES(self); + /* + * Allocation will never be 0, see comment in ctors.c + * line 820 + */ + if (nbytes == 0) { + PyArray_Descr *dtype = PyArray_DESCR(self); + nbytes = dtype->elsize ? dtype->elsize : 1; + } + PyObject *handler = PyArray_HANDLER(self); + if (handler == NULL) { + /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */ + PyErr_SetString(PyExc_RuntimeError, + "no memory handler found but OWNDATA flag set"); + return -1; + } + PyDataMem_UserFREE(PyArray_DATA(self), nbytes, handler); } if (PyArray_BASE(self)) { if ((PyArray_FLAGS(self) & NPY_ARRAY_WRITEBACKIFCOPY) || diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index ee66378a9..086b674c8 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -776,6 +776,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis) return NULL; } + /*NUMPY_API */ NPY_NO_EXPORT PyObject * @@ -907,7 +908,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, Py_XDECREF(mps[i]); } Py_DECREF(ap); - npy_free_cache(mps, n * sizeof(mps[0])); + PyDataMem_FREE(mps); if (out != NULL && out != obj) { Py_INCREF(out); PyArray_ResolveWritebackIfCopy(obj); @@ -922,7 +923,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out, Py_XDECREF(mps[i]); } Py_XDECREF(ap); - npy_free_cache(mps, n * sizeof(mps[0])); + PyDataMem_FREE(mps); PyArray_DiscardWritebackIfCopy(obj); Py_XDECREF(obj); return NULL; @@ -962,14 +963,19 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, return 0; } + PyObject *mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + return -1; + } it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)op, &axis); if (it == NULL) { + Py_DECREF(mem_handler); return -1; } size = it->size; if (needcopy) { - buffer = npy_alloc_cache(N * elsize); + buffer = PyDataMem_UserNEW(N * elsize, mem_handler); if (buffer == NULL) { ret = -1; goto fail; @@ -1053,12 +1059,14 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, fail: NPY_END_THREADS_DESCR(PyArray_DESCR(op)); - npy_free_cache(buffer, N * elsize); + /* cleanup internal buffer */ + PyDataMem_UserFREE(buffer, N * elsize, mem_handler); if (ret < 0 && !PyErr_Occurred()) { /* Out of memory during sorting or buffer creation */ PyErr_NoMemory(); } Py_DECREF(it); + Py_DECREF(mem_handler); return ret; } @@ -1090,11 +1098,16 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, NPY_BEGIN_THREADS_DEF; + PyObject *mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + return NULL; + } rop = (PyArrayObject *)PyArray_NewFromDescr( Py_TYPE(op), PyArray_DescrFromType(NPY_INTP), PyArray_NDIM(op), PyArray_DIMS(op), NULL, NULL, 0, (PyObject *)op); if (rop == NULL) { + Py_DECREF(mem_handler); return NULL; } rstride = PyArray_STRIDE(rop, axis); @@ -1102,6 +1115,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, /* Check if there is any argsorting to do */ if (N <= 1 || PyArray_SIZE(op) == 0) { + Py_DECREF(mem_handler); memset(PyArray_DATA(rop), 0, PyArray_NBYTES(rop)); return (PyObject *)rop; } @@ -1115,7 +1129,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, size = it->size; if (needcopy) { - valbuffer = npy_alloc_cache(N * elsize); + valbuffer = PyDataMem_UserNEW(N * elsize, mem_handler); if (valbuffer == NULL) { ret = -1; goto fail; @@ -1123,7 +1137,8 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, } if (needidxbuffer) { - idxbuffer = (npy_intp *)npy_alloc_cache(N * sizeof(npy_intp)); + idxbuffer = (npy_intp *)PyDataMem_UserNEW(N * sizeof(npy_intp), + mem_handler); if (idxbuffer == NULL) { ret = -1; goto fail; @@ -1212,8 +1227,9 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, fail: NPY_END_THREADS_DESCR(PyArray_DESCR(op)); - npy_free_cache(valbuffer, N * elsize); - npy_free_cache(idxbuffer, N * sizeof(npy_intp)); + /* cleanup internal buffers */ + PyDataMem_UserFREE(valbuffer, N * elsize, mem_handler); + PyDataMem_UserFREE(idxbuffer, N * sizeof(npy_intp), mem_handler); if (ret < 0) { if (!PyErr_Occurred()) { /* Out of memory during sorting or buffer creation */ @@ -1224,6 +1240,7 @@ fail: } Py_XDECREF(it); Py_XDECREF(rit); + Py_DECREF(mem_handler); return (PyObject *)rop; } @@ -2398,19 +2415,14 @@ PyArray_CountNonzero(PyArrayObject *self) npy_intp *strideptr, *innersizeptr; NPY_BEGIN_THREADS_DEF; - // Special low-overhead version specific to the boolean/int types dtype = PyArray_DESCR(self); - switch(dtype->kind) { - case 'u': - case 'i': - case 'b': - if (dtype->elsize > 8) { - break; - } - return count_nonzero_int( - PyArray_NDIM(self), PyArray_BYTES(self), PyArray_DIMS(self), - PyArray_STRIDES(self), dtype->elsize - ); + /* Special low-overhead version specific to the boolean/int types */ + if (PyArray_ISALIGNED(self) && ( + PyDataType_ISBOOL(dtype) || PyDataType_ISINTEGER(dtype))) { + return count_nonzero_int( + PyArray_NDIM(self), PyArray_BYTES(self), PyArray_DIMS(self), + PyArray_STRIDES(self), dtype->elsize + ); } nonzero = PyArray_DESCR(self)->f->nonzero; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 391e65f6a..b0b6f42f1 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -26,6 +26,7 @@ #include "shape.h" #include "strfuncs.h" #include "array_assign.h" +#include "npy_dlpack.h" #include "methods.h" #include "alloc.h" @@ -833,15 +834,15 @@ array_astype(PyArrayObject *self, */ NPY_CASTING casting = NPY_UNSAFE_CASTING; NPY_ORDER order = NPY_KEEPORDER; - int forcecopy = 1, subok = 1; + _PyArray_CopyMode forcecopy = 1; + int subok = 1; NPY_PREPARE_ARGPARSER; - if (npy_parse_arguments("astype", args, len_args, kwnames, "dtype", &PyArray_DescrConverter, &dtype, "|order", &PyArray_OrderConverter, &order, "|casting", &PyArray_CastingConverter, &casting, "|subok", &PyArray_PythonPyIntFromInt, &subok, - "|copy", &PyArray_PythonPyIntFromInt, &forcecopy, + "|copy", &PyArray_CopyConverter, &forcecopy, NULL, NULL, NULL) < 0) { Py_XDECREF(dtype); return NULL; @@ -858,20 +859,29 @@ array_astype(PyArrayObject *self, * and it's not a subtype if subok is False, then we * can skip the copy. */ - if (!forcecopy && (order == NPY_KEEPORDER || - (order == NPY_ANYORDER && - (PyArray_IS_C_CONTIGUOUS(self) || - PyArray_IS_F_CONTIGUOUS(self))) || - (order == NPY_CORDER && - PyArray_IS_C_CONTIGUOUS(self)) || - (order == NPY_FORTRANORDER && - PyArray_IS_F_CONTIGUOUS(self))) && - (subok || PyArray_CheckExact(self)) && - PyArray_EquivTypes(dtype, PyArray_DESCR(self))) { + if (forcecopy != NPY_COPY_ALWAYS && + (order == NPY_KEEPORDER || + (order == NPY_ANYORDER && + (PyArray_IS_C_CONTIGUOUS(self) || + PyArray_IS_F_CONTIGUOUS(self))) || + (order == NPY_CORDER && + PyArray_IS_C_CONTIGUOUS(self)) || + (order == NPY_FORTRANORDER && + PyArray_IS_F_CONTIGUOUS(self))) && + (subok || PyArray_CheckExact(self)) && + PyArray_EquivTypes(dtype, PyArray_DESCR(self))) { Py_DECREF(dtype); Py_INCREF(self); return (PyObject *)self; } + + if (forcecopy == NPY_COPY_NEVER) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while casting in never copy mode."); + Py_DECREF(dtype); + return NULL; + } + if (!PyArray_CanCastArrayTo(self, dtype, casting)) { PyErr_Clear(); npy_set_invalid_cast_error( @@ -1821,22 +1831,8 @@ array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol) descr = PyArray_DESCR(self); - /* if the python version is below 3.8, the pickle module does not provide - * built-in support for protocol 5. We try importing the pickle5 - * backport instead */ -#if PY_VERSION_HEX >= 0x03080000 /* we expect protocol 5 to be available in Python 3.8 */ pickle_module = PyImport_ImportModule("pickle"); -#else - pickle_module = PyImport_ImportModule("pickle5"); - if (pickle_module == NULL) { - /* for protocol 5, raise a clear ImportError if pickle5 is not found - */ - PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 " - "requires the pickle5 module for Python >=3.6 and <3.8"); - return NULL; - } -#endif if (pickle_module == NULL){ return NULL; } @@ -1975,6 +1971,16 @@ array_setstate(PyArrayObject *self, PyObject *args) return NULL; } + /* + * Reassigning fa->descr messes with the reallocation strategy, + * since fa could be a 0-d or scalar, and then + * PyDataMem_UserFREE will be confused + */ + size_t n_tofree = PyArray_NBYTES(self); + if (n_tofree == 0) { + PyArray_Descr *dtype = PyArray_DESCR(self); + n_tofree = dtype->elsize ? dtype->elsize : 1; + } Py_XDECREF(PyArray_DESCR(self)); fa->descr = typecode; Py_INCREF(typecode); @@ -2041,7 +2047,18 @@ array_setstate(PyArrayObject *self, PyObject *args) } if ((PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA)) { - PyDataMem_FREE(PyArray_DATA(self)); + /* + * Allocation will never be 0, see comment in ctors.c + * line 820 + */ + PyObject *handler = PyArray_HANDLER(self); + if (handler == NULL) { + /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */ + PyErr_SetString(PyExc_RuntimeError, + "no memory handler found but OWNDATA flag set"); + return NULL; + } + PyDataMem_UserFREE(PyArray_DATA(self), n_tofree, handler); PyArray_CLEARFLAGS(self, NPY_ARRAY_OWNDATA); } Py_XDECREF(PyArray_BASE(self)); @@ -2077,7 +2094,6 @@ array_setstate(PyArrayObject *self, PyObject *args) if (!PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) { int swap = PyArray_ISBYTESWAPPED(self); - fa->data = datastr; /* Bytes should always be considered immutable, but we just grab the * pointer if they are large, to save memory. */ if (!IsAligned(self) || swap || (len <= 1000)) { @@ -2086,8 +2102,16 @@ array_setstate(PyArrayObject *self, PyObject *args) Py_DECREF(rawdata); Py_RETURN_NONE; } - fa->data = PyDataMem_NEW(num); + /* Store the handler in case the default is modified */ + Py_XDECREF(fa->mem_handler); + fa->mem_handler = PyDataMem_GetHandler(); + if (fa->mem_handler == NULL) { + Py_DECREF(rawdata); + return NULL; + } + fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self)); if (PyArray_DATA(self) == NULL) { + Py_DECREF(fa->mem_handler); Py_DECREF(rawdata); return PyErr_NoMemory(); } @@ -2123,7 +2147,12 @@ array_setstate(PyArrayObject *self, PyObject *args) Py_DECREF(rawdata); } else { + /* The handlers should never be called in this case */ + Py_XDECREF(fa->mem_handler); + fa->mem_handler = NULL; + fa->data = datastr; if (PyArray_SetBaseObject(self, rawdata) < 0) { + Py_DECREF(rawdata); return NULL; } } @@ -2134,8 +2163,15 @@ array_setstate(PyArrayObject *self, PyObject *args) if (num == 0 || elsize == 0) { Py_RETURN_NONE; } - fa->data = PyDataMem_NEW(num); + /* Store the functions in case the default handler is modified */ + Py_XDECREF(fa->mem_handler); + fa->mem_handler = PyDataMem_GetHandler(); + if (fa->mem_handler == NULL) { + return NULL; + } + fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self)); if (PyArray_DATA(self) == NULL) { + Py_DECREF(fa->mem_handler); return PyErr_NoMemory(); } if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) { @@ -2144,6 +2180,7 @@ array_setstate(PyArrayObject *self, PyObject *args) PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA); fa->base = NULL; if (_setlist_pkl(self, rawdata) < 0) { + Py_DECREF(fa->mem_handler); return NULL; } } @@ -2209,7 +2246,7 @@ array_dumps(PyArrayObject *self, PyObject *args, PyObject *kwds) static PyObject * -array_sizeof(PyArrayObject *self) +array_sizeof(PyArrayObject *self, PyObject *NPY_UNUSED(args)) { /* object + dimension and strides */ Py_ssize_t nbytes = Py_TYPE(self)->tp_basicsize + @@ -2948,5 +2985,13 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { {"view", (PyCFunction)array_view, METH_FASTCALL | METH_KEYWORDS, NULL}, + // For data interchange between libraries + {"__dlpack__", + (PyCFunction)array_dlpack, + METH_FASTCALL | METH_KEYWORDS, NULL}, + + {"__dlpack_device__", + (PyCFunction)array_dlpack_device, + METH_NOARGS, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index d211f01bc..cf0160a2b 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -70,6 +70,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "get_attr_string.h" #include "experimental_public_dtype_api.h" /* _get_experimental_dtype_api */ +#include "npy_dlpack.h" + /* ***************************************************************************** ** INCLUDE GENERATED CODE ** @@ -1560,7 +1562,7 @@ _prepend_ones(PyArrayObject *arr, int nd, int ndmin, NPY_ORDER order) static NPY_INLINE PyObject * _array_fromobject_generic( - PyObject *op, PyArray_Descr *type, npy_bool copy, NPY_ORDER order, + PyObject *op, PyArray_Descr *type, _PyArray_CopyMode copy, NPY_ORDER order, npy_bool subok, int ndmin) { PyArrayObject *oparr = NULL, *ret = NULL; @@ -1577,12 +1579,17 @@ _array_fromobject_generic( if (PyArray_CheckExact(op) || (subok && PyArray_Check(op))) { oparr = (PyArrayObject *)op; if (type == NULL) { - if (!copy && STRIDING_OK(oparr, order)) { + if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) { ret = oparr; Py_INCREF(ret); goto finish; } else { + if (copy == NPY_COPY_NEVER) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while creating a new array."); + return NULL; + } ret = (PyArrayObject *)PyArray_NewCopy(oparr, order); goto finish; } @@ -1590,12 +1597,17 @@ _array_fromobject_generic( /* One more chance */ oldtype = PyArray_DESCR(oparr); if (PyArray_EquivTypes(oldtype, type)) { - if (!copy && STRIDING_OK(oparr, order)) { + if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) { Py_INCREF(op); ret = oparr; goto finish; } else { + if (copy == NPY_COPY_NEVER) { + PyErr_SetString(PyExc_ValueError, + "Unable to avoid copy while creating a new array."); + return NULL; + } ret = (PyArrayObject *)PyArray_NewCopy(oparr, order); if (oldtype == type || ret == NULL) { goto finish; @@ -1608,9 +1620,12 @@ _array_fromobject_generic( } } - if (copy) { + if (copy == NPY_COPY_ALWAYS) { flags = NPY_ARRAY_ENSURECOPY; } + else if (copy == NPY_COPY_NEVER ) { + flags = NPY_ARRAY_ENSURENOCOPY; + } if (order == NPY_CORDER) { flags |= NPY_ARRAY_C_CONTIGUOUS; } @@ -1654,7 +1669,7 @@ array_array(PyObject *NPY_UNUSED(ignored), { PyObject *op; npy_bool subok = NPY_FALSE; - npy_bool copy = NPY_TRUE; + _PyArray_CopyMode copy = NPY_COPY_ALWAYS; int ndmin = 0; PyArray_Descr *type = NULL; NPY_ORDER order = NPY_KEEPORDER; @@ -1665,7 +1680,7 @@ array_array(PyObject *NPY_UNUSED(ignored), if (npy_parse_arguments("array", args, len_args, kwnames, "object", NULL, &op, "|dtype", &PyArray_DescrConverter2, &type, - "$copy", &PyArray_BoolConverter, ©, + "$copy", &PyArray_CopyConverter, ©, "$order", &PyArray_OrderConverter, &order, "$subok", &PyArray_BoolConverter, &subok, "$ndmin", &PyArray_PythonPyIntFromInt, &ndmin, @@ -4197,7 +4212,7 @@ normalize_axis_index(PyObject *NPY_UNUSED(self), static PyObject * -_reload_guard(PyObject *NPY_UNUSED(self)) { +_reload_guard(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) { static int initialized = 0; #if !defined(PYPY_VERSION) @@ -4231,7 +4246,6 @@ _reload_guard(PyObject *NPY_UNUSED(self)) { Py_RETURN_NONE; } - static struct PyMethodDef array_module_methods[] = { {"_get_implementing_args", (PyCFunction)array__get_implementing_args, @@ -4433,6 +4447,12 @@ static struct PyMethodDef array_module_methods[] = { {"geterrobj", (PyCFunction) ufunc_geterr, METH_VARARGS, NULL}, + {"get_handler_name", + (PyCFunction) get_handler_name, + METH_VARARGS, NULL}, + {"get_handler_version", + (PyCFunction) get_handler_version, + METH_VARARGS, NULL}, {"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc, METH_VARARGS, NULL}, {"_get_sfloat_dtype", @@ -4442,6 +4462,8 @@ static struct PyMethodDef array_module_methods[] = { {"_reload_guard", (PyCFunction)_reload_guard, METH_NOARGS, "Give a warning on reload and big warning in sub-interpreters."}, + {"_from_dlpack", (PyCFunction)_from_dlpack, + METH_O, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; @@ -4672,14 +4694,14 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { PyObject *m, *d, *s; PyObject *c_api; - /* Initialize CPU features */ - if (npy_cpu_init() < 0) { - goto err; - } - /* Create the module and add the functions */ m = PyModule_Create(&moduledef); if (!m) { + return NULL; + } + + /* Initialize CPU features */ + if (npy_cpu_init() < 0) { goto err; } @@ -4910,6 +4932,23 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { if (initumath(m) != 0) { goto err; } + /* + * Initialize the default PyDataMem_Handler capsule singleton. + */ + PyDataMem_DefaultHandler = PyCapsule_New(&default_handler, "mem_handler", NULL); + if (PyDataMem_DefaultHandler == NULL) { + goto err; + } +#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600) + /* + * Initialize the context-local current handler + * with the default PyDataMem_Handler capsule. + */ + current_handler = PyContextVar_New("current_allocator", PyDataMem_DefaultHandler); + if (current_handler == NULL) { + goto err; + } +#endif return m; err: @@ -4917,5 +4956,6 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { PyErr_SetString(PyExc_RuntimeError, "cannot load multiarray module."); } + Py_DECREF(m); return NULL; } diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c index 8e072d5f4..2675496ab 100644 --- a/numpy/core/src/multiarray/nditer_pywrap.c +++ b/numpy/core/src/multiarray/nditer_pywrap.c @@ -1190,7 +1190,7 @@ npyiter_resetbasepointers(NewNpyArrayIterObject *self) } static PyObject * -npyiter_reset(NewNpyArrayIterObject *self) +npyiter_reset(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter == NULL) { PyErr_SetString(PyExc_ValueError, @@ -1227,7 +1227,7 @@ npyiter_reset(NewNpyArrayIterObject *self) * copied. */ static PyObject * -npyiter_copy(NewNpyArrayIterObject *self) +npyiter_copy(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { NewNpyArrayIterObject *iter; @@ -1263,7 +1263,7 @@ npyiter_copy(NewNpyArrayIterObject *self) } static PyObject * -npyiter_iternext(NewNpyArrayIterObject *self) +npyiter_iternext(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter != NULL && self->iternext != NULL && !self->finished && self->iternext(self->iter)) { @@ -1320,7 +1320,8 @@ npyiter_remove_axis(NewNpyArrayIterObject *self, PyObject *args) } static PyObject * -npyiter_remove_multi_index(NewNpyArrayIterObject *self) +npyiter_remove_multi_index( + NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter == NULL) { PyErr_SetString(PyExc_ValueError, @@ -1345,7 +1346,8 @@ npyiter_remove_multi_index(NewNpyArrayIterObject *self) } static PyObject * -npyiter_enable_external_loop(NewNpyArrayIterObject *self) +npyiter_enable_external_loop( + NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter == NULL) { PyErr_SetString(PyExc_ValueError, @@ -1370,7 +1372,7 @@ npyiter_enable_external_loop(NewNpyArrayIterObject *self) } static PyObject * -npyiter_debug_print(NewNpyArrayIterObject *self) +npyiter_debug_print(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter != NULL) { NpyIter_DebugPrint(self->iter); @@ -2315,7 +2317,7 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op, } static PyObject * -npyiter_enter(NewNpyArrayIterObject *self) +npyiter_enter(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { if (self->iter == NULL) { PyErr_SetString(PyExc_RuntimeError, "operation on non-initialized iterator"); @@ -2326,7 +2328,7 @@ npyiter_enter(NewNpyArrayIterObject *self) } static PyObject * -npyiter_close(NewNpyArrayIterObject *self) +npyiter_close(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { NpyIter *iter = self->iter; int ret; @@ -2347,7 +2349,7 @@ static PyObject * npyiter_exit(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args)) { /* even if called via exception handling, writeback any data */ - return npyiter_close(self); + return npyiter_close(self, NULL); } static PyMethodDef npyiter_methods[] = { diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index e409e9874..564352f1f 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -233,8 +233,12 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr, PyArray_VectorUnaryFunc* castfunc; descr = PyArray_DescrFromScalar(scalar); + if (descr == NULL) { + return -1; + } castfunc = PyArray_GetCastFunc(descr, outcode->type_num); if (castfunc == NULL) { + Py_DECREF(descr); return -1; } if (PyTypeNum_ISEXTENDED(descr->type_num) || @@ -254,6 +258,7 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr, NPY_ARRAY_CARRAY, NULL); if (aout == NULL) { Py_DECREF(ain); + Py_DECREF(descr); return -1; } castfunc(PyArray_DATA(ain), PyArray_DATA(aout), 1, ain, aout); diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index 56f17431a..db1e49db8 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -34,6 +34,16 @@ #include "binop_override.h" +/* + * used for allocating a single scalar, so use the default numpy + * memory allocators instead of the (maybe) user overrides + */ +NPY_NO_EXPORT void * +npy_alloc_cache_zero(size_t nmemb, size_t size); + +NPY_NO_EXPORT void +npy_free_cache(void * p, npy_uintp sz); + NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[] = { {PyObject_HEAD_INIT(&PyBoolArrType_Type) 0}, {PyObject_HEAD_INIT(&PyBoolArrType_Type) 1}, @@ -209,6 +219,27 @@ gentype_multiply(PyObject *m1, PyObject *m2) } /**begin repeat + * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT, + * LONG, ULONG, LONGLONG, ULONGLONG# + * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, + * npy_long, npy_ulong, npy_longlong, npy_ulonglong# + * #c = hh, uhh, h, uh,, u, l, ul, ll, ull# + * #Name = Byte, UByte, Short, UShort, Int, UInt, + * Long, ULong, LongLong, ULongLong# + * #convert = Long*8, LongLong*2# + */ +static PyObject * +@type@_bit_count(PyObject *self, PyObject *NPY_UNUSED(args)) +{ + @type@ scalar = PyArrayScalar_VAL(self, @Name@); + uint8_t count = npy_popcount@c@(scalar); + PyObject *result = PyLong_From@convert@(count); + + return result; +} +/**end repeat**/ + +/**begin repeat * * #name = positive, negative, absolute, invert, int, float# */ @@ -1129,7 +1160,7 @@ gentype_size_get(PyObject *NPY_UNUSED(self), void *NPY_UNUSED(ignored)) } static PyObject * -gentype_sizeof(PyObject *self) +gentype_sizeof(PyObject *self, PyObject *NPY_UNUSED(args)) { Py_ssize_t nbytes; PyObject * isz = gentype_itemsize_get(self, NULL); @@ -1321,7 +1352,7 @@ gentype_imag_get(PyObject *self, void *NPY_UNUSED(ignored)) int elsize; typecode = PyArray_DescrFromScalar(self); elsize = typecode->elsize; - temp = npy_alloc_cache_zero(elsize); + temp = npy_alloc_cache_zero(1, elsize); ret = PyArray_Scalar(temp, typecode, NULL); npy_free_cache(temp, elsize); } @@ -1887,7 +1918,7 @@ static PyObject * */ /* Heavily copied from the builtin float.as_integer_ratio */ static PyObject * -@name@_as_integer_ratio(PyObject *self) +@name@_as_integer_ratio(PyObject *self, PyObject *NPY_UNUSED(args)) { #if @is_half@ npy_double val = npy_half_to_double(PyArrayScalar_VAL(self, @Name@)); @@ -1968,7 +1999,7 @@ error: * #c = f, f, , l# */ static PyObject * -@name@_is_integer(PyObject *self) +@name@_is_integer(PyObject *self, PyObject *NPY_UNUSED(args)) { #if @is_half@ npy_double val = npy_half_to_double(PyArrayScalar_VAL(self, @Name@)); @@ -1991,7 +2022,7 @@ static PyObject * /**end repeat**/ static PyObject * -integer_is_integer(PyObject *self) { +integer_is_integer(PyObject *self, PyObject *NPY_UNUSED(args)) { Py_RETURN_TRUE; } @@ -2306,8 +2337,7 @@ static PyMethodDef @name@type_methods[] = { /**end repeat**/ /**begin repeat - * #name = byte, short, int, long, longlong, ubyte, ushort, - * uint, ulong, ulonglong, timedelta, cdouble# + * #name = timedelta, cdouble# */ static PyMethodDef @name@type_methods[] = { /* for typing; requires python >= 3.9 */ @@ -2318,6 +2348,23 @@ static PyMethodDef @name@type_methods[] = { }; /**end repeat**/ +/**begin repeat + * #name = byte, ubyte, short, ushort, int, uint, + * long, ulong, longlong, ulonglong# + */ +static PyMethodDef @name@type_methods[] = { + /* for typing; requires python >= 3.9 */ + {"__class_getitem__", + (PyCFunction)numbertype_class_getitem, + METH_CLASS | METH_O, NULL}, + {"bit_count", + (PyCFunction)npy_@name@_bit_count, + METH_NOARGS, NULL}, + {NULL, NULL, 0, NULL} /* sentinel */ +}; +/**end repeat**/ + + /************* As_mapping functions for void array scalar ************/ static Py_ssize_t @@ -3151,7 +3198,10 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) (int) NPY_MAX_INT); return NULL; } - destptr = npy_alloc_cache_zero(memu); + if (memu == 0) { + memu = 1; + } + destptr = npy_alloc_cache_zero(memu, 1); if (destptr == NULL) { return PyErr_NoMemory(); } @@ -4092,6 +4142,17 @@ initialize_numeric_types(void) /**end repeat**/ /**begin repeat + * #name = byte, short, int, long, longlong, + * ubyte, ushort, uint, ulong, ulonglong# + * #Name = Byte, Short, Int, Long, LongLong, + * UByte, UShort, UInt, ULong, ULongLong# + */ + + Py@Name@ArrType_Type.tp_methods = @name@type_methods; + + /**end repeat**/ + + /**begin repeat * #name = half, float, double, longdouble# * #Name = Half, Float, Double, LongDouble# */ diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index 5a4e8c0f3..162abd6a4 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -121,8 +121,16 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck, } /* Reallocate space if needed - allocating 0 is forbidden */ - new_data = PyDataMem_RENEW( - PyArray_DATA(self), newnbytes == 0 ? elsize : newnbytes); + PyObject *handler = PyArray_HANDLER(self); + if (handler == NULL) { + /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */ + PyErr_SetString(PyExc_RuntimeError, + "no memory handler found but OWNDATA flag set"); + return NULL; + } + new_data = PyDataMem_UserRENEW(PyArray_DATA(self), + newnbytes == 0 ? elsize : newnbytes, + handler); if (new_data == NULL) { PyErr_SetString(PyExc_MemoryError, "cannot allocate memory for array"); diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src index cae84befe..5b418342f 100644 --- a/numpy/core/src/npymath/npy_math_internal.h.src +++ b/numpy/core/src/npymath/npy_math_internal.h.src @@ -55,6 +55,29 @@ */ #include "npy_math_private.h" +/* Magic binary numbers used by bit_count + * For type T, the magic numbers are computed as follows: + * Magic[0]: 01 01 01 01 01 01... = (T)~(T)0/3 + * Magic[1]: 0011 0011 0011... = (T)~(T)0/15 * 3 + * Magic[2]: 00001111 00001111... = (T)~(T)0/255 * 15 + * Magic[3]: 00000001 00000001... = (T)~(T)0/255 + * + * Counting bits set, in parallel + * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + * + * Generic Algorithm for type T: + * a = a - ((a >> 1) & (T)~(T)0/3); + * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3); + * a = (a + (a >> 4)) & (T)~(T)0/255*15; + * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; +*/ + +static const npy_uint8 MAGIC8[] = {0x55u, 0x33u, 0x0Fu, 0x01u}; +static const npy_uint16 MAGIC16[] = {0x5555u, 0x3333u, 0x0F0Fu, 0x0101u}; +static const npy_uint32 MAGIC32[] = {0x55555555ul, 0x33333333ul, 0x0F0F0F0Ful, 0x01010101ul}; +static const npy_uint64 MAGIC64[] = {0x5555555555555555ull, 0x3333333333333333ull, 0x0F0F0F0F0F0F0F0Full, 0x0101010101010101ull}; + + /* ***************************************************************************** ** BASIC MATH FUNCTIONS ** @@ -454,10 +477,16 @@ NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp) /**begin repeat * #type = npy_longdouble, npy_double, npy_float# + * #TYPE = LONGDOUBLE, DOUBLE, FLOAT# * #c = l,,f# * #C = L,,F# */ - +#undef NPY__FP_SFX +#if NPY_SIZEOF_@TYPE@ == NPY_SIZEOF_DOUBLE + #define NPY__FP_SFX(X) X +#else + #define NPY__FP_SFX(X) NPY_CAT(X, @c@) +#endif /* * On arm64 macOS, there's a bug with sin, cos, and tan where they don't * raise "invalid" when given INFINITY as input. @@ -483,7 +512,7 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) return (x - x); } #endif - return @kind@@c@(x); + return NPY__FP_SFX(@kind@)(x); } #endif @@ -498,7 +527,7 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x) #ifdef HAVE_@KIND@@C@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) { - return @kind@@c@(x, y); + return NPY__FP_SFX(@kind@)(x, y); } #endif /**end repeat1**/ @@ -506,21 +535,21 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y) #ifdef HAVE_MODF@C@ NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr) { - return modf@c@(x, iptr); + return NPY__FP_SFX(modf)(x, iptr); } #endif #ifdef HAVE_LDEXP@C@ NPY_INPLACE @type@ npy_ldexp@c@(@type@ x, int exp) { - return ldexp@c@(x, exp); + return NPY__FP_SFX(ldexp)(x, exp); } #endif #ifdef HAVE_FREXP@C@ NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp) { - return frexp@c@(x, exp); + return NPY__FP_SFX(frexp)(x, exp); } #endif @@ -543,10 +572,10 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) #else NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) { - return cbrt@c@(x); + return NPY__FP_SFX(cbrt)(x); } #endif - +#undef NPY__FP_SFX /**end repeat**/ @@ -556,10 +585,16 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) /**begin repeat * #type = npy_float, npy_double, npy_longdouble# + * #TYPE = FLOAT, DOUBLE, LONGDOUBLE# * #c = f, ,l# * #C = F, ,L# */ - +#undef NPY__FP_SFX +#if NPY_SIZEOF_@TYPE@ == NPY_SIZEOF_DOUBLE + #define NPY__FP_SFX(X) X +#else + #define NPY__FP_SFX(X) NPY_CAT(X, @c@) +#endif @type@ npy_heaviside@c@(@type@ x, @type@ h0) { if (npy_isnan(x)) { @@ -576,10 +611,10 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x) } } -#define LOGE2 NPY_LOGE2@c@ -#define LOG2E NPY_LOG2E@c@ -#define RAD2DEG (180.0@c@/NPY_PI@c@) -#define DEG2RAD (NPY_PI@c@/180.0@c@) +#define LOGE2 NPY__FP_SFX(NPY_LOGE2) +#define LOG2E NPY__FP_SFX(NPY_LOG2E) +#define RAD2DEG (NPY__FP_SFX(180.0)/NPY__FP_SFX(NPY_PI)) +#define DEG2RAD (NPY__FP_SFX(NPY_PI)/NPY__FP_SFX(180.0)) NPY_INPLACE @type@ npy_rad2deg@c@(@type@ x) { @@ -733,7 +768,7 @@ npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus) #undef LOG2E #undef RAD2DEG #undef DEG2RAD - +#undef NPY__FP_SFX /**end repeat**/ /**begin repeat @@ -814,3 +849,66 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b) } /**end repeat1**/ /**end repeat**/ + + +#define __popcnt32 __popcnt +/**begin repeat + * + * #type = ubyte, ushort, uint, ulong, ulonglong# + * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG# + * #c = hh, h, , l, ll# + */ +#undef TO_BITS_LEN +#if 0 +/**begin repeat1 + * #len = 8, 16, 32, 64# + */ +#elif NPY_BITSOF_@STYPE@ == @len@ + #define TO_BITS_LEN(X) X##@len@ +/**end repeat1**/ +#endif + + +NPY_INPLACE uint8_t +npy_popcount_parallel@c@(npy_@type@ a) +{ + a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]); + a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]); + a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2]; + return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT); +} + +NPY_INPLACE uint8_t +npy_popcountu@c@(npy_@type@ a) +{ +/* use built-in popcount if present, else use our implementation */ +#if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32 + return __builtin_popcount@c@(a); +#elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16 + /* no builtin __popcnt64 for 32 bits */ + #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64) + return TO_BITS_LEN(__popcnt)(a); + /* split 64 bit number into two 32 bit ints and return sum of counts */ + #elif (defined(_WIN32) && NPY_BITSOF_@STYPE@ == 64) + npy_uint32 left = (npy_uint32) (a>>32); + npy_uint32 right = (npy_uint32) a; + return __popcnt32(left) + __popcnt32(right); + #endif +#else + return npy_popcount_parallel@c@(a); +#endif +} +/**end repeat**/ + +/**begin repeat + * + * #type = byte, short, int, long, longlong# + * #c = hh, h, , l, ll# + */ +NPY_INPLACE uint8_t +npy_popcount@c@(npy_@type@ a) +{ + /* Return popcount of abs(a) */ + return npy_popcountu@c@(a < 0 ? -a : a); +} +/**end repeat**/ diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h index 212d11a0b..7ca0c5ba0 100644 --- a/numpy/core/src/npymath/npy_math_private.h +++ b/numpy/core/src/npymath/npy_math_private.h @@ -19,7 +19,13 @@ #define _NPY_MATH_PRIVATE_H_ #include <Python.h> +#ifdef __cplusplus +#include <cmath> +using std::isgreater; +using std::isless; +#else #include <math.h> +#endif #include "npy_config.h" #include "npy_fpmath.h" @@ -507,17 +513,29 @@ typedef union { #else /* !_MSC_VER */ typedef union { npy_cdouble npy_z; +#ifdef __cplusplus + std::complex<double> c99z; +#else complex double c99_z; +#endif } __npy_cdouble_to_c99_cast; typedef union { npy_cfloat npy_z; +#ifdef __cplusplus + std::complex<float> c99z; +#else complex float c99_z; +#endif } __npy_cfloat_to_c99_cast; typedef union { npy_clongdouble npy_z; +#ifdef __cplusplus + std::complex<long double> c99_z; +#else complex long double c99_z; +#endif } __npy_clongdouble_to_c99_cast; #endif /* !_MSC_VER */ diff --git a/numpy/core/src/npysort/radixsort.c.src b/numpy/core/src/npysort/radixsort.c.src deleted file mode 100644 index 99d8ed42a..000000000 --- a/numpy/core/src/npysort/radixsort.c.src +++ /dev/null @@ -1,231 +0,0 @@ -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "npy_sort.h" -#include "npysort_common.h" -#include <stdlib.h> - -/* - ***************************************************************************** - ** INTEGER SORTS ** - ***************************************************************************** - */ - - -/**begin repeat - * - * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, - * LONGLONG, ULONGLONG# - * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, - * longlong, ulonglong# - * #type = npy_ubyte, npy_ubyte, npy_ubyte, npy_ushort, npy_ushort, npy_uint, - * npy_uint, npy_ulong, npy_ulong, npy_ulonglong, npy_ulonglong# - * #sign = 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0# - * #floating = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0# - */ - -// Reference: https://github.com/eloj/radix-sorting#-key-derivation -#if @sign@ - // Floating-point is currently disabled. - // Floating-point tests succeed for double and float on macOS but not on Windows/Linux. - // Basic sorting tests succeed but others relying on sort fail. - // Possibly related to floating-point normalisation or multiple NaN reprs? Not sure. - #if @floating@ - // For floats, we invert the key if the sign bit is set, else we invert the sign bit. - #define KEY_OF(x) ((x) ^ (-((x) >> (sizeof(@type@) * 8 - 1)) | ((@type@)1 << (sizeof(@type@) * 8 - 1)))) - #else - // For signed ints, we flip the sign bit so the negatives are below the positives. - #define KEY_OF(x) ((x) ^ ((@type@)1 << (sizeof(@type@) * 8 - 1))) - #endif -#else - // For unsigned ints, the key is as-is - #define KEY_OF(x) (x) -#endif - -static inline npy_ubyte -nth_byte_@suff@(@type@ key, npy_intp l) { - return (key >> (l << 3)) & 0xFF; -} - -static @type@* -radixsort0_@suff@(@type@ *arr, @type@ *aux, npy_intp num) -{ - npy_intp cnt[sizeof(@type@)][1 << 8] = { { 0 } }; - npy_intp i; - size_t l; - @type@ key0 = KEY_OF(arr[0]); - size_t ncols = 0; - npy_ubyte cols[sizeof(@type@)]; - - for (i = 0; i < num; i++) { - @type@ k = KEY_OF(arr[i]); - - for (l = 0; l < sizeof(@type@); l++) { - cnt[l][nth_byte_@suff@(k, l)]++; - } - } - - for (l = 0; l < sizeof(@type@); l++) { - if (cnt[l][nth_byte_@suff@(key0, l)] != num) { - cols[ncols++] = l; - } - } - - for (l = 0; l < ncols; l++) { - npy_intp a = 0; - for (i = 0; i < 256; i++) { - npy_intp b = cnt[cols[l]][i]; - cnt[cols[l]][i] = a; - a += b; - } - } - - for (l = 0; l < ncols; l++) { - @type@* temp; - for (i = 0; i < num; i++) { - @type@ k = KEY_OF(arr[i]); - npy_intp dst = cnt[cols[l]][nth_byte_@suff@(k, cols[l])]++; - aux[dst] = arr[i]; - } - - temp = aux; - aux = arr; - arr = temp; - } - - return arr; -} - -NPY_NO_EXPORT int -radixsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr)) -{ - void *sorted; - @type@ *aux; - @type@ *arr = start; - @type@ k1, k2; - npy_bool all_sorted = 1; - - if (num < 2) { - return 0; - } - - k1 = KEY_OF(arr[0]); - for (npy_intp i = 1; i < num; i++) { - k2 = KEY_OF(arr[i]); - if (k1 > k2) { - all_sorted = 0; - break; - } - k1 = k2; - } - - if (all_sorted) { - return 0; - } - - aux = malloc(num * sizeof(@type@)); - if (aux == NULL) { - return -NPY_ENOMEM; - } - - sorted = radixsort0_@suff@(start, aux, num); - if (sorted != start) { - memcpy(start, sorted, num * sizeof(@type@)); - } - - free(aux); - return 0; -} - -static npy_intp* -aradixsort0_@suff@(@type@ *arr, npy_intp *aux, npy_intp *tosort, npy_intp num) -{ - npy_intp cnt[sizeof(@type@)][1 << 8] = { { 0 } }; - npy_intp i; - size_t l; - @type@ key0 = KEY_OF(arr[0]); - size_t ncols = 0; - npy_ubyte cols[sizeof(@type@)]; - - for (i = 0; i < num; i++) { - @type@ k = KEY_OF(arr[i]); - - for (l = 0; l < sizeof(@type@); l++) { - cnt[l][nth_byte_@suff@(k, l)]++; - } - } - - for (l = 0; l < sizeof(@type@); l++) { - if (cnt[l][nth_byte_@suff@(key0, l)] != num) { - cols[ncols++] = l; - } - } - - for (l = 0; l < ncols; l++) { - npy_intp a = 0; - for (i = 0; i < 256; i++) { - npy_intp b = cnt[cols[l]][i]; - cnt[cols[l]][i] = a; - a += b; - } - } - - for (l = 0; l < ncols; l++) { - npy_intp* temp; - for (i = 0; i < num; i++) { - @type@ k = KEY_OF(arr[tosort[i]]); - npy_intp dst = cnt[cols[l]][nth_byte_@suff@(k, cols[l])]++; - aux[dst] = tosort[i]; - } - - temp = aux; - aux = tosort; - tosort = temp; - } - - return tosort; -} - -NPY_NO_EXPORT int -aradixsort_@suff@(void *start, npy_intp* tosort, npy_intp num, void *NPY_UNUSED(varr)) -{ - npy_intp *sorted; - npy_intp *aux; - @type@ *arr = start; - @type@ k1, k2; - npy_bool all_sorted = 1; - - if (num < 2) { - return 0; - } - - k1 = KEY_OF(arr[tosort[0]]); - for (npy_intp i = 1; i < num; i++) { - k2 = KEY_OF(arr[tosort[i]]); - if (k1 > k2) { - all_sorted = 0; - break; - } - k1 = k2; - } - - if (all_sorted) { - return 0; - } - - aux = malloc(num * sizeof(npy_intp)); - if (aux == NULL) { - return -NPY_ENOMEM; - } - - sorted = aradixsort0_@suff@(start, aux, tosort, num); - if (sorted != tosort) { - memcpy(tosort, sorted, num * sizeof(npy_intp)); - } - - free(aux); - return 0; -} - -#undef KEY_OF - -/**end repeat**/ diff --git a/numpy/core/src/npysort/radixsort.cpp b/numpy/core/src/npysort/radixsort.cpp new file mode 100644 index 000000000..017ea43b6 --- /dev/null +++ b/numpy/core/src/npysort/radixsort.cpp @@ -0,0 +1,354 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#include "npy_sort.h" +#include "npysort_common.h" + +#include "../common/numpy_tag.h" +#include <stdlib.h> +#include <type_traits> + +/* + ***************************************************************************** + ** INTEGER SORTS ** + ***************************************************************************** + */ + +// Reference: https://github.com/eloj/radix-sorting#-key-derivation +template <class T> +T +KEY_OF(T x) +{ + // Floating-point is currently disabled. + // Floating-point tests succeed for double and float on macOS but not on + // Windows/Linux. Basic sorting tests succeed but others relying on sort + // fail. Possibly related to floating-point normalisation or multiple NaN + // reprs? Not sure. + if (std::is_floating_point<T>::value) { + // For floats, we invert the key if the sign bit is set, else we invert + // the sign bit. + return ((x) ^ (-((x) >> (sizeof(T) * 8 - 1)) | + ((T)1 << (sizeof(T) * 8 - 1)))); + } + else if (std::is_signed<T>::value) { + // For signed ints, we flip the sign bit so the negatives are below the + // positives. + return ((x) ^ ((T)1 << (sizeof(T) * 8 - 1))); + } + else { + return x; + } +} + +template <class T> +static inline npy_ubyte +nth_byte(T key, npy_intp l) +{ + return (key >> (l << 3)) & 0xFF; +} + +template <class T> +static T * +radixsort0(T *start, T *aux, npy_intp num) +{ + npy_intp cnt[sizeof(T)][1 << 8] = {{0}}; + T key0 = KEY_OF(start[0]); + + for (npy_intp i = 0; i < num; i++) { + T k = KEY_OF(start[i]); + + for (size_t l = 0; l < sizeof(T); l++) { + cnt[l][nth_byte(k, l)]++; + } + } + + size_t ncols = 0; + npy_ubyte cols[sizeof(T)]; + for (size_t l = 0; l < sizeof(T); l++) { + if (cnt[l][nth_byte(key0, l)] != num) { + cols[ncols++] = l; + } + } + + for (size_t l = 0; l < ncols; l++) { + npy_intp a = 0; + for (npy_intp i = 0; i < 256; i++) { + npy_intp b = cnt[cols[l]][i]; + cnt[cols[l]][i] = a; + a += b; + } + } + + for (size_t l = 0; l < ncols; l++) { + T *temp; + for (npy_intp i = 0; i < num; i++) { + T k = KEY_OF(start[i]); + npy_intp dst = cnt[cols[l]][nth_byte(k, cols[l])]++; + aux[dst] = start[i]; + } + + temp = aux; + aux = start; + start = temp; + } + + return start; +} + +template <class T> +static int +radixsort_(T *start, npy_intp num) +{ + if (num < 2) { + return 0; + } + + npy_bool all_sorted = 1; + T k1 = KEY_OF(start[0]), k2; + for (npy_intp i = 1; i < num; i++) { + k2 = KEY_OF(start[i]); + if (k1 > k2) { + all_sorted = 0; + break; + } + k1 = k2; + } + + if (all_sorted) { + return 0; + } + + T *aux = (T *)malloc(num * sizeof(T)); + if (aux == nullptr) { + return -NPY_ENOMEM; + } + + T *sorted = radixsort0(start, aux, num); + if (sorted != start) { + memcpy(start, sorted, num * sizeof(T)); + } + + free(aux); + return 0; +} + +template <class T> +static int +radixsort(void *start, npy_intp num) +{ + return radixsort_((T *)start, num); +} + +template <class T> +static npy_intp * +aradixsort0(T *start, npy_intp *aux, npy_intp *tosort, npy_intp num) +{ + npy_intp cnt[sizeof(T)][1 << 8] = {{0}}; + T key0 = KEY_OF(start[0]); + + for (npy_intp i = 0; i < num; i++) { + T k = KEY_OF(start[i]); + + for (size_t l = 0; l < sizeof(T); l++) { + cnt[l][nth_byte(k, l)]++; + } + } + + size_t ncols = 0; + npy_ubyte cols[sizeof(T)]; + for (size_t l = 0; l < sizeof(T); l++) { + if (cnt[l][nth_byte(key0, l)] != num) { + cols[ncols++] = l; + } + } + + for (size_t l = 0; l < ncols; l++) { + npy_intp a = 0; + for (npy_intp i = 0; i < 256; i++) { + npy_intp b = cnt[cols[l]][i]; + cnt[cols[l]][i] = a; + a += b; + } + } + + for (size_t l = 0; l < ncols; l++) { + npy_intp *temp; + for (npy_intp i = 0; i < num; i++) { + T k = KEY_OF(start[tosort[i]]); + npy_intp dst = cnt[cols[l]][nth_byte(k, cols[l])]++; + aux[dst] = tosort[i]; + } + + temp = aux; + aux = tosort; + tosort = temp; + } + + return tosort; +} + +template <class T> +static int +aradixsort_(T *start, npy_intp *tosort, npy_intp num) +{ + npy_intp *sorted; + npy_intp *aux; + T k1, k2; + npy_bool all_sorted = 1; + + if (num < 2) { + return 0; + } + + k1 = KEY_OF(start[tosort[0]]); + for (npy_intp i = 1; i < num; i++) { + k2 = KEY_OF(start[tosort[i]]); + if (k1 > k2) { + all_sorted = 0; + break; + } + k1 = k2; + } + + if (all_sorted) { + return 0; + } + + aux = (npy_intp *)malloc(num * sizeof(npy_intp)); + if (aux == NULL) { + return -NPY_ENOMEM; + } + + sorted = aradixsort0(start, aux, tosort, num); + if (sorted != tosort) { + memcpy(tosort, sorted, num * sizeof(npy_intp)); + } + + free(aux); + return 0; +} + +template <class T> +static int +aradixsort(void *start, npy_intp *tosort, npy_intp num) +{ + return aradixsort_((T *)start, tosort, num); +} + +extern "C" { +NPY_NO_EXPORT int +radixsort_bool(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_bool>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_byte(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_byte>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_ubyte(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_ubyte>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_short(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_short>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_ushort(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_ushort>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_int(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_int>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_uint(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_uint>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_long(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_long>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_ulong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_ulong>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_longlong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_longlong>(vec, cnt); +} +NPY_NO_EXPORT int +radixsort_ulonglong(void *vec, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return radixsort<npy_ulonglong>(vec, cnt); +} +NPY_NO_EXPORT int +aradixsort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return aradixsort<npy_bool>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return aradixsort<npy_byte>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_ubyte(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_ubyte>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_short(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_short>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_ushort(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_ushort>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_int(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return aradixsort<npy_int>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return aradixsort<npy_uint>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_long(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null)) +{ + return aradixsort<npy_long>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_ulong(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_ulong>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_longlong(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_longlong>(vec, ind, cnt); +} +NPY_NO_EXPORT int +aradixsort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt, + void *NPY_UNUSED(null)) +{ + return aradixsort<npy_ulonglong>(vec, ind, cnt); +} +} diff --git a/numpy/core/src/umath/_scaled_float_dtype.c b/numpy/core/src/umath/_scaled_float_dtype.c index eeef33a3d..b6c19362a 100644 --- a/numpy/core/src/umath/_scaled_float_dtype.c +++ b/numpy/core/src/umath/_scaled_float_dtype.c @@ -398,6 +398,42 @@ float_to_from_sfloat_resolve_descriptors( } +/* + * Cast to boolean (for testing the logical functions a bit better). + */ +static int +cast_sfloat_to_bool(PyArrayMethod_Context *NPY_UNUSED(context), + char *const data[], npy_intp const dimensions[], + npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in = data[0]; + char *out = data[1]; + for (npy_intp i = 0; i < N; i++) { + *(npy_bool *)out = *(double *)in != 0; + in += strides[0]; + out += strides[1]; + } + return 0; +} + +static NPY_CASTING +sfloat_to_bool_resolve_descriptors( + PyArrayMethodObject *NPY_UNUSED(self), + PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]), + PyArray_Descr *given_descrs[2], + PyArray_Descr *loop_descrs[2]) +{ + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + if (loop_descrs[0] == NULL) { + return -1; + } + loop_descrs[1] = PyArray_DescrFromType(NPY_BOOL); /* cannot fail */ + return NPY_UNSAFE_CASTING; +} + + static int init_casts(void) { @@ -453,6 +489,22 @@ init_casts(void) return -1; } + slots[0].slot = NPY_METH_resolve_descriptors; + slots[0].pfunc = &sfloat_to_bool_resolve_descriptors; + slots[1].slot = NPY_METH_strided_loop; + slots[1].pfunc = &cast_sfloat_to_bool; + slots[2].slot = 0; + slots[2].pfunc = NULL; + + spec.name = "sfloat_to_bool_cast"; + dtypes[0] = &PyArray_SFloatDType; + dtypes[1] = PyArray_DTypeFromTypeNum(NPY_BOOL); + Py_DECREF(dtypes[1]); /* immortal anyway */ + + if (PyArray_AddCastingImplementation_FromSpec(&spec, 0)) { + return -1; + } + return 0; } diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src index 33d8539d5..ce42fc271 100644 --- a/numpy/core/src/umath/_umath_tests.c.src +++ b/numpy/core/src/umath/_umath_tests.c.src @@ -400,6 +400,16 @@ addUfuncs(PyObject *dictionary) { } PyDict_SetItemString(dictionary, "always_error", f); Py_DECREF(f); + f = PyUFunc_FromFuncAndDataAndSignature(always_error_functions, + always_error_data, always_error_signatures, 1, 2, 1, PyUFunc_None, + "always_error_gufunc", + "simply, broken, gufunc that sets an error (but releases the GIL).", + 0, "(i),()->()"); + if (f == NULL) { + return -1; + } + PyDict_SetItemString(dictionary, "always_error_gufunc", f); + Py_DECREF(f); f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data, inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d", "inner on the last dimension and broadcast on the rest \n" diff --git a/numpy/core/src/umath/clip.c.src b/numpy/core/src/umath/clip.c.src deleted file mode 100644 index bc966b7ac..000000000 --- a/numpy/core/src/umath/clip.c.src +++ /dev/null @@ -1,120 +0,0 @@ -/** - * This module provides the inner loops for the clip ufunc - */ -#define PY_SSIZE_T_CLEAN -#include <Python.h> - -#define _UMATHMODULE -#define _MULTIARRAYMODULE -#define NPY_NO_DEPRECATED_API NPY_API_VERSION - -#include "numpy/halffloat.h" -#include "numpy/npy_math.h" -#include "numpy/ndarraytypes.h" -#include "numpy/npy_common.h" -#include "numpy/utils.h" -#include "fast_loop_macros.h" - -/* - * Produce macros that perform nan/nat-propagating min and max - */ - -/**begin repeat - * #name = BOOL, - * BYTE, UBYTE, SHORT, USHORT, INT, UINT, - * LONG, ULONG, LONGLONG, ULONGLONG# - */ -#define _NPY_@name@_MIN(a, b) PyArray_MIN(a, b) -#define _NPY_@name@_MAX(a, b) PyArray_MAX(a, b) -/**end repeat**/ - -#define _NPY_HALF_MIN(a, b) (npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b)) -#define _NPY_HALF_MAX(a, b) (npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b)) - -/**begin repeat - * #name = FLOAT, DOUBLE, LONGDOUBLE# - */ -#define _NPY_@name@_MIN(a, b) (npy_isnan(a) ? (a) : PyArray_MIN(a, b)) -#define _NPY_@name@_MAX(a, b) (npy_isnan(a) ? (a) : PyArray_MAX(a, b)) -/**end repeat**/ - -/**begin repeat - * #name = CFLOAT, CDOUBLE, CLONGDOUBLE# - */ -#define _NPY_@name@_MIN(a, b) (npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CLT(a, b) ? (a) : (b)) -#define _NPY_@name@_MAX(a, b) (npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CGT(a, b) ? (a) : (b)) -/**end repeat**/ - -/**begin repeat - * #name = DATETIME, TIMEDELTA# - */ -#define _NPY_@name@_MIN(a, b) ( \ - (a) == NPY_DATETIME_NAT ? (a) : \ - (b) == NPY_DATETIME_NAT ? (b) : \ - (a) < (b) ? (a) : (b) \ -) -#define _NPY_@name@_MAX(a, b) ( \ - (a) == NPY_DATETIME_NAT ? (a) : \ - (b) == NPY_DATETIME_NAT ? (b) : \ - (a) > (b) ? (a) : (b) \ -) -/**end repeat**/ - -/**begin repeat - * - * #name = BOOL, - * BYTE, UBYTE, SHORT, USHORT, INT, UINT, - * LONG, ULONG, LONGLONG, ULONGLONG, - * HALF, FLOAT, DOUBLE, LONGDOUBLE, - * CFLOAT, CDOUBLE, CLONGDOUBLE, - * DATETIME, TIMEDELTA# - * #type = npy_bool, - * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, - * npy_long, npy_ulong, npy_longlong, npy_ulonglong, - * npy_half, npy_float, npy_double, npy_longdouble, - * npy_cfloat, npy_cdouble, npy_clongdouble, - * npy_datetime, npy_timedelta# - */ - -#define _NPY_CLIP(x, min, max) \ - _NPY_@name@_MIN(_NPY_@name@_MAX((x), (min)), (max)) - -NPY_NO_EXPORT void -@name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) -{ - if (steps[1] == 0 && steps[2] == 0) { - /* min and max are constant throughout the loop, the most common case */ - /* NOTE: it may be possible to optimize these checks for nan */ - @type@ min_val = *(@type@ *)args[1]; - @type@ max_val = *(@type@ *)args[2]; - - char *ip1 = args[0], *op1 = args[3]; - npy_intp is1 = steps[0], os1 = steps[3]; - npy_intp n = dimensions[0]; - - /* contiguous, branch to let the compiler optimize */ - if (is1 == sizeof(@type@) && os1 == sizeof(@type@)) { - for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); - } - } - else { - for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val); - } - } - } - else { - TERNARY_LOOP { - *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, *(@type@ *)ip2, *(@type@ *)ip3); - } - } - npy_clear_floatstatus_barrier((char*)dimensions); -} - -// clean up the macros we defined above -#undef _NPY_CLIP -#undef _NPY_@name@_MAX -#undef _NPY_@name@_MIN - -/**end repeat**/ diff --git a/numpy/core/src/umath/clip.cpp b/numpy/core/src/umath/clip.cpp new file mode 100644 index 000000000..19d05c848 --- /dev/null +++ b/numpy/core/src/umath/clip.cpp @@ -0,0 +1,282 @@ +/** + * This module provides the inner loops for the clip ufunc + */ +#define _UMATHMODULE +#define _MULTIARRAYMODULE +#define NPY_NO_DEPRECATED_API NPY_API_VERSION + +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#include "numpy/halffloat.h" +#include "numpy/ndarraytypes.h" +#include "numpy/npy_common.h" +#include "numpy/npy_math.h" +#include "numpy/utils.h" + +#include "fast_loop_macros.h" + +#include "../common/numpy_tag.h" + +template <class T> +T +_NPY_MIN(T a, T b, npy::integral_tag const &) +{ + return PyArray_MIN(a, b); +} +template <class T> +T +_NPY_MAX(T a, T b, npy::integral_tag const &) +{ + return PyArray_MAX(a, b); +} + +npy_half +_NPY_MIN(npy_half a, npy_half b, npy::half_tag const &) +{ + return npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b); +} +npy_half +_NPY_MAX(npy_half a, npy_half b, npy::half_tag const &) +{ + return npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b); +} + +template <class T> +T +_NPY_MIN(T a, T b, npy::floating_point_tag const &) +{ + return npy_isnan(a) ? (a) : PyArray_MIN(a, b); +} +template <class T> +T +_NPY_MAX(T a, T b, npy::floating_point_tag const &) +{ + return npy_isnan(a) ? (a) : PyArray_MAX(a, b); +} + +template <class T> +T +_NPY_MIN(T a, T b, npy::complex_tag const &) +{ + return npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CLT(a, b) + ? (a) + : (b); +} +template <class T> +T +_NPY_MAX(T a, T b, npy::complex_tag const &) +{ + return npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CGT(a, b) + ? (a) + : (b); +} + +template <class T> +T +_NPY_MIN(T a, T b, npy::date_tag const &) +{ + return (a) == NPY_DATETIME_NAT ? (a) + : (b) == NPY_DATETIME_NAT ? (b) + : (a) < (b) ? (a) + : (b); +} +template <class T> +T +_NPY_MAX(T a, T b, npy::date_tag const &) +{ + return (a) == NPY_DATETIME_NAT ? (a) + : (b) == NPY_DATETIME_NAT ? (b) + : (a) > (b) ? (a) + : (b); +} + +/* generic dispatcher */ +template <class Tag, class T = typename Tag::type> +T +_NPY_MIN(T const &a, T const &b) +{ + return _NPY_MIN(a, b, Tag{}); +} +template <class Tag, class T = typename Tag::type> +T +_NPY_MAX(T const &a, T const &b) +{ + return _NPY_MAX(a, b, Tag{}); +} + +template <class Tag, class T> +T +_NPY_CLIP(T x, T min, T max) +{ + return _NPY_MIN<Tag>(_NPY_MAX<Tag>((x), (min)), (max)); +} + +template <class Tag, class T = typename Tag::type> +static void +_npy_clip_(T **args, npy_intp const *dimensions, npy_intp const *steps) +{ + npy_intp n = dimensions[0]; + if (steps[1] == 0 && steps[2] == 0) { + /* min and max are constant throughout the loop, the most common case + */ + /* NOTE: it may be possible to optimize these checks for nan */ + T min_val = *args[1]; + T max_val = *args[2]; + + T *ip1 = args[0], *op1 = args[3]; + npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T); + + /* contiguous, branch to let the compiler optimize */ + if (is1 == 1 && os1 == 1) { + for (npy_intp i = 0; i < n; i++, ip1++, op1++) { + *op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val); + } + } + else { + for (npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) { + *op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val); + } + } + } + else { + T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3]; + npy_intp is1 = steps[0] / sizeof(T), is2 = steps[1] / sizeof(T), + is3 = steps[2] / sizeof(T), os1 = steps[3] / sizeof(T); + for (npy_intp i = 0; i < n; + i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1) + *op1 = _NPY_CLIP<Tag>(*ip1, *ip2, *ip3); + } + npy_clear_floatstatus_barrier((char *)dimensions); +} + +template <class Tag> +static void +_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps) +{ + using T = typename Tag::type; + return _npy_clip_<Tag>((T **)args, dimensions, steps); +} + +extern "C" { +NPY_NO_EXPORT void +BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::bool_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::byte_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::ubyte_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::short_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::ushort_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::int_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::uint_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::long_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::ulong_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::longlong_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::ulonglong_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::half_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::float_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::double_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::longdouble_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::cfloat_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::cdouble_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +CLONGDOUBLE_clip(char **args, npy_intp const *dimensions, + npy_intp const *steps, void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::clongdouble_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::datetime_tag>(args, dimensions, steps); +} +NPY_NO_EXPORT void +TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)) +{ + return _npy_clip<npy::timedelta_tag>(args, dimensions, steps); +} +} diff --git a/numpy/core/src/umath/clip.h b/numpy/core/src/umath/clip.h new file mode 100644 index 000000000..f69ebd1e3 --- /dev/null +++ b/numpy/core/src/umath/clip.h @@ -0,0 +1,73 @@ +#ifndef _NPY_UMATH_CLIP_H_ +#define _NPY_UMATH_CLIP_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +NPY_NO_EXPORT void +BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +CLONGDOUBLE_clip(char **args, npy_intp const *dimensions, + npy_intp const *steps, void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); +NPY_NO_EXPORT void +TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, + void *NPY_UNUSED(func)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/numpy/core/src/umath/clip.h.src b/numpy/core/src/umath/clip.h.src deleted file mode 100644 index f16856cdf..000000000 --- a/numpy/core/src/umath/clip.h.src +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _NPY_UMATH_CLIP_H_ -#define _NPY_UMATH_CLIP_H_ - - -/**begin repeat - * - * #name = BOOL, - * BYTE, UBYTE, SHORT, USHORT, INT, UINT, - * LONG, ULONG, LONGLONG, ULONGLONG, - * HALF, FLOAT, DOUBLE, LONGDOUBLE, - * CFLOAT, CDOUBLE, CLONGDOUBLE, - * DATETIME, TIMEDELTA# - */ -NPY_NO_EXPORT void -@name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)); -/**end repeat**/ - -#endif diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c index 40de28754..8e99c0420 100644 --- a/numpy/core/src/umath/dispatching.c +++ b/numpy/core/src/umath/dispatching.c @@ -193,6 +193,10 @@ resolve_implementation_info(PyUFuncObject *ufunc, /* Unspecified out always matches (see below for inputs) */ continue; } + if (resolver_dtype == (PyArray_DTypeMeta *)Py_None) { + /* always matches */ + continue; + } if (given_dtype == resolver_dtype) { continue; } @@ -267,8 +271,39 @@ resolve_implementation_info(PyUFuncObject *ufunc, * the subclass should be considered a better match * (subclasses are always more specific). */ + /* Whether this (normally output) dtype was specified at all */ + if (op_dtypes[i] == NULL) { + /* + * When DType is completely unspecified, prefer abstract + * over concrete, assuming it will resolve. + * Furthermore, we cannot decide which abstract/None + * is "better", only concrete ones which are subclasses + * of Abstract ones are defined as worse. + */ + npy_bool prev_is_concrete = NPY_FALSE; + npy_bool new_is_concrete = NPY_FALSE; + if ((prev_dtype != Py_None) && + !NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) { + prev_is_concrete = NPY_TRUE; + } + if ((new_dtype != Py_None) && + !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) { + new_is_concrete = NPY_TRUE; + } + if (prev_is_concrete == new_is_concrete) { + best = -1; + } + else if (prev_is_concrete) { + unambiguously_equally_good = 0; + best = 1; + } + else { + unambiguously_equally_good = 0; + best = 0; + } + } /* If either is None, the other is strictly more specific */ - if (prev_dtype == Py_None) { + else if (prev_dtype == Py_None) { unambiguously_equally_good = 0; best = 1; } @@ -289,13 +324,29 @@ resolve_implementation_info(PyUFuncObject *ufunc, */ best = -1; } + else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) { + /* old is not abstract, so better (both not possible) */ + unambiguously_equally_good = 0; + best = 0; + } + else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) { + /* new is not abstract, so better (both not possible) */ + unambiguously_equally_good = 0; + best = 1; + } /* - * TODO: Unreachable, but we will need logic for abstract - * DTypes to decide if one is a subclass of the other - * (And their subclass relation is well defined.) + * TODO: This will need logic for abstract DTypes to decide if + * one is a subclass of the other (And their subclass + * relation is well defined). For now, we bail out + * in cas someone manages to get here. */ else { - assert(0); + PyErr_SetString(PyExc_NotImplementedError, + "deciding which one of two abstract dtypes is " + "a better match is not yet implemented. This " + "will pick the better (or bail) in the future."); + *out_info = NULL; + return -1; } if ((current_best != -1) && (current_best != best)) { @@ -612,6 +663,35 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc, } return info; } + else if (info == NULL && op_dtypes[0] == NULL) { + /* + * If we have a reduction, fill in the unspecified input/array + * assuming it should have the same dtype as the operand input + * (or the output one if given). + * Then, try again. In some cases, this will choose different + * paths, such as `ll->?` instead of an `??->?` loop for `np.equal` + * when the input is `.l->.` (`.` meaning undefined). This will + * then cause an error. But cast to `?` would always lose + * information, and in many cases important information: + * + * ```python + * from operator import eq + * from functools import reduce + * + * reduce(eq, [1, 2, 3]) != reduce(eq, [True, True, True]) + * ``` + * + * The special cases being `logical_(and|or|xor)` which can always + * cast to boolean ahead of time and still give the right answer + * (unsafe cast to bool is fine here). We special case these at + * the time of this comment (NumPy 1.21). + */ + assert(ufunc->nin == 2 && ufunc->nout == 1); + op_dtypes[0] = op_dtypes[2] != NULL ? op_dtypes[2] : op_dtypes[1]; + Py_INCREF(op_dtypes[0]); + return promote_and_get_info_and_ufuncimpl(ufunc, + ops, signature, op_dtypes, allow_legacy_promotion, 1); + } } /* @@ -743,3 +823,94 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc, return method; } + + +/* + * Special promoter for the logical ufuncs. The logical ufuncs can always + * use the ??->? and still get the correct output (as long as the output + * is not supposed to be `object`). + */ +static int +logical_ufunc_promoter(PyUFuncObject *NPY_UNUSED(ufunc), + PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[], + PyArray_DTypeMeta *new_op_dtypes[]) +{ + /* + * If we find any object DType at all, we currently force to object. + * However, if the output is specified and not object, there is no point, + * it should be just as well to cast the input rather than doing the + * unsafe out cast. + */ + int force_object = 0; + + for (int i = 0; i < 3; i++) { + PyArray_DTypeMeta *item; + if (signature[i] != NULL) { + item = signature[i]; + Py_INCREF(item); + if (item->type_num == NPY_OBJECT) { + force_object = 1; + } + } + else { + /* Always override to boolean */ + item = PyArray_DTypeFromTypeNum(NPY_BOOL); + if (op_dtypes[i] != NULL && op_dtypes[i]->type_num == NPY_OBJECT) { + force_object = 1; + } + } + new_op_dtypes[i] = item; + } + + if (!force_object || (op_dtypes[2] != NULL + && op_dtypes[2]->type_num != NPY_OBJECT)) { + return 0; + } + /* + * Actually, we have to use the OBJECT loop after all, set all we can + * to object (that might not work out, but try). + * + * NOTE: Change this to check for `op_dtypes[0] == NULL` to STOP + * returning `object` for `np.logical_and.reduce(obj_arr)` + * which will also affect `np.all` and `np.any`! + */ + for (int i = 0; i < 3; i++) { + if (signature[i] != NULL) { + continue; + } + Py_SETREF(new_op_dtypes[i], PyArray_DTypeFromTypeNum(NPY_OBJECT)); + } + return 0; +} + + +NPY_NO_EXPORT int +install_logical_ufunc_promoter(PyObject *ufunc) +{ + if (PyObject_Type(ufunc) != (PyObject *)&PyUFunc_Type) { + PyErr_SetString(PyExc_RuntimeError, + "internal numpy array, logical ufunc was not a ufunc?!"); + return -1; + } + PyObject *dtype_tuple = PyTuple_Pack(3, + &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type, NULL); + if (dtype_tuple == NULL) { + return -1; + } + PyObject *promoter = PyCapsule_New(&logical_ufunc_promoter, + "numpy._ufunc_promoter", NULL); + if (promoter == NULL) { + Py_DECREF(dtype_tuple); + return -1; + } + + PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter); + Py_DECREF(dtype_tuple); + Py_DECREF(promoter); + if (info == NULL) { + return -1; + } + + return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0); +} + diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h index 8d116873c..2f314615d 100644 --- a/numpy/core/src/umath/dispatching.h +++ b/numpy/core/src/umath/dispatching.h @@ -26,4 +26,8 @@ NPY_NO_EXPORT PyObject * add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc, PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate); +NPY_NO_EXPORT int +install_logical_ufunc_promoter(PyObject *ufunc); + + #endif /*_NPY_DISPATCHING_H */ diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c index 77b1b9013..a423823d4 100644 --- a/numpy/core/src/umath/legacy_array_method.c +++ b/numpy/core/src/umath/legacy_array_method.c @@ -217,6 +217,25 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc, */ int any_output_flexible = 0; NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufunc->nargs == 3 && + signature[0]->type_num == NPY_BOOL && + signature[1]->type_num == NPY_BOOL && + signature[2]->type_num == NPY_BOOL && ( + strcmp(ufunc->name, "logical_or") == 0 || + strcmp(ufunc->name, "logical_and") == 0 || + strcmp(ufunc->name, "logical_xor") == 0)) { + /* + * This is a logical ufunc, and the `??->?` loop`. It is always OK + * to cast any input to bool, because that cast is defined by + * truthiness. + * This allows to ensure two things: + * 1. `np.all`/`np.any` know that force casting the input is OK + * (they must do this since there are no `?l->?`, etc. loops) + * 2. The logical functions automatically work for any DType + * implementing a cast to boolean. + */ + flags = _NPY_METH_FORCE_CAST_INPUTS; + } for (int i = 0; i < ufunc->nin+ufunc->nout; i++) { if (signature[i]->singleton->flags & ( diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index fa7844014..6076e0b2d 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1506,8 +1506,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const * */ /**begin repeat - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ /**begin repeat1 @@ -1542,8 +1542,8 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void */ /**begin repeat2 - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ NPY_NO_EXPORT NPY_GCC_OPT_3 void diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index 90115006f..3eafbdf66 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -186,7 +186,7 @@ NPY_NO_EXPORT void * #TYPE = FLOAT, DOUBLE# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# + * #kind = ceil, sqrt, absolute, square, reciprocal# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))) @@ -227,7 +227,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, /**end repeat**/ /**begin repeat - * #func = sin, cos# + * #func = sin, cos# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@, @@ -274,7 +274,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, ( /**end repeat**/ /**begin repeat - * #func = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# */ /**begin repeat1 diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src index 8c2c83e7c..cd9b2ed54 100644 --- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src +++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src @@ -209,7 +209,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@) const npy_intp ssrc = steps[0] / lsize; const npy_intp sdst = steps[1] / lsize; npy_intp len = dimensions[0]; - assert(steps[0] % lsize == 0 && steps[1] % lsize == 0); + assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); #if NPY_SIMD_FMA3 if (is_mem_overlap(src, steps[0], dst, steps[1], len) || !npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst) diff --git a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src index 852604655..a8289fc51 100644 --- a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src @@ -96,7 +96,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@) const npy_intp ssrc = steps[0] / lsize; const npy_intp sdst = steps[1] / lsize; const npy_intp len = dimensions[0]; - assert(steps[0] % lsize == 0 && steps[1] % lsize == 0); + assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); if (!is_mem_overlap(src, steps[0], dst, steps[1], len) && npyv_loadable_stride_@sfx@(ssrc) && npyv_storable_stride_@sfx@(sdst)) { @@ -125,7 +125,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@) const npy_intp ssrc = steps[0] / lsize; const npy_intp sdst = steps[1] / lsize; const npy_intp len = dimensions[0]; - assert(steps[0] % lsize == 0 && steps[1] % lsize == 0); + assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0)); if (!is_mem_overlap(src, steps[0], dst, steps[1], len) && npyv_loadable_stride_f64(ssrc) && npyv_storable_stride_f64(sdst)) { diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src index 2d5917282..93761b98c 100644 --- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src @@ -1,6 +1,8 @@ /*@targets ** $maxopt baseline - ** sse2 vsx2 neon + ** sse2 sse41 + ** vsx2 + ** neon asimd **/ /** * Force use SSE only on x86, even if AVX2 or AVX512F are enabled @@ -65,6 +67,9 @@ NPY_FINLINE double c_square_f64(double a) #define c_sqrt_f64 npy_sqrt #endif +#define c_ceil_f32 npy_ceilf +#define c_ceil_f64 npy_ceil + /******************************************************************************** ** Defining the SIMD kernels ********************************************************************************/ @@ -134,10 +139,10 @@ NPY_FINLINE double c_square_f64(double a) */ #if @VCHK@ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #repl_0w1 = 0, 0, 0, 1# - * #RECIP_WORKAROUND = 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #repl_0w1 = 0, 0, 0, 0, 1# + * #RECIP_WORKAROUND = 0, 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# */ /**begin repeat2 * #STYPE = CONTIG, NCONTIG, CONTIG, NCONTIG# @@ -245,9 +250,9 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@ * #VCHK = NPY_SIMD, NPY_SIMD_F64# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #clear = 0, 1, 0, 0# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #clear = 0, 0, 1, 0, 0# */ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) @@ -258,7 +263,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) npy_intp len = dimensions[0]; #if @VCHK@ const int lsize = sizeof(npyv_lanetype_@sfx@); - assert(src_step % lsize == 0 && dst_step % lsize == 0); + assert(len <= 1 || (src_step % lsize == 0 && dst_step % lsize == 0)); if (is_mem_overlap(src, src_step, dst, dst_step, len)) { goto no_unroll; } diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index d5a251368..c28c8abd8 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -145,14 +145,12 @@ PyArray_CopyInitialReduceValues( * boilerplate code, just calling the appropriate inner loop function where * necessary. * + * context : The ArrayMethod context (with ufunc, method, and descriptors). * operand : The array to be reduced. * out : NULL, or the array into which to place the result. * wheremask : NOT YET SUPPORTED, but this parameter is placed here * so that support can be added in the future without breaking * API compatibility. Pass in NULL. - * operand_dtype : The dtype the inner loop expects for the operand. - * result_dtype : The dtype the inner loop expects for the result. - * casting : The casting rule to apply to the operands. * axis_flags : Flags indicating the reduction axes of 'operand'. * reorderable : If True, the reduction being done is reorderable, which * means specifying multiple axes of reduction at once is ok, @@ -182,10 +180,8 @@ PyArray_CopyInitialReduceValues( * generalized ufuncs!) */ NPY_NO_EXPORT PyArrayObject * -PyUFunc_ReduceWrapper( +PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, - PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype, - NPY_CASTING casting, npy_bool *axis_flags, int reorderable, int keepdims, PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, int errormask) @@ -199,6 +195,8 @@ PyUFunc_ReduceWrapper( PyArrayObject *op[3]; PyArray_Descr *op_dtypes[3]; npy_uint32 it_flags, op_flags[3]; + /* Loop auxdata (must be freed on error) */ + NpyAuxData *auxdata = NULL; /* More than one axis means multiple orders are possible */ if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) { @@ -221,8 +219,8 @@ PyUFunc_ReduceWrapper( /* Set up the iterator */ op[0] = out; op[1] = operand; - op_dtypes[0] = result_dtype; - op_dtypes[1] = operand_dtype; + op_dtypes[0] = context->descriptors[0]; + op_dtypes[1] = context->descriptors[1]; it_flags = NPY_ITER_BUFFERED | NPY_ITER_EXTERNAL_LOOP | @@ -291,7 +289,7 @@ PyUFunc_ReduceWrapper( } iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags, - NPY_KEEPORDER, casting, + NPY_KEEPORDER, NPY_UNSAFE_CASTING, op_flags, op_dtypes, PyArray_NDIM(operand), op_axes, NULL, buffersize); @@ -301,9 +299,29 @@ PyUFunc_ReduceWrapper( result = NpyIter_GetOperandArray(iter)[0]; - int needs_api = NpyIter_IterationNeedsAPI(iter); - /* Start with the floating-point exception flags cleared */ - npy_clear_floatstatus_barrier((char*)&iter); + PyArrayMethod_StridedLoop *strided_loop; + NPY_ARRAYMETHOD_FLAGS flags = 0; + npy_intp fixed_strides[3]; + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + if (wheremask != NULL) { + if (PyArrayMethod_GetMaskedStridedLoop(context, + 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + else { + if (context->method->get_strided_loop(context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + } + + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + needs_api |= NpyIter_IterationNeedsAPI(iter); + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } /* * Initialize the result to the reduction unit if possible, @@ -345,16 +363,18 @@ PyUFunc_ReduceWrapper( strideptr = NpyIter_GetInnerStrideArray(iter); countptr = NpyIter_GetInnerLoopSizePtr(iter); - if (loop(iter, dataptr, strideptr, countptr, - iternext, needs_api, skip_first_count, data) < 0) { + if (loop(context, strided_loop, auxdata, + iter, dataptr, strideptr, countptr, iternext, + needs_api, skip_first_count) < 0) { goto fail; } } - /* Check whether any errors occurred during the loop */ - if (PyErr_Occurred() || - _check_ufunc_fperr(errormask, NULL, "reduce") < 0) { - goto fail; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even on error */ + if (_check_ufunc_fperr(errormask, NULL, "reduce") < 0) { + goto fail; + } } if (out != NULL) { @@ -369,6 +389,7 @@ PyUFunc_ReduceWrapper( return result; fail: + NPY_AUXDATA_FREE(auxdata); if (iter != NULL) { NpyIter_Deallocate(iter); } diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h index 372605dba..2170e27a7 100644 --- a/numpy/core/src/umath/reduction.h +++ b/numpy/core/src/umath/reduction.h @@ -19,93 +19,17 @@ typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result, void *data); /* - * This is a function for the reduce loop. + * Inner definition of the reduce loop, only used for a static function. + * At some point around NumPy 1.6, there was probably an intention to make + * the reduce loop customizable at this level (per ufunc?). * - * The needs_api parameter indicates whether it's ok to release the GIL during - * the loop, such as when the iternext() function never calls - * a function which could raise a Python exception. - * - * The skip_first_count parameter indicates how many elements need to be - * skipped based on NpyIter_IsFirstVisit checks. This can only be positive - * when the 'assign_identity' parameter was NULL when calling - * PyArray_ReduceWrapper. - * - * The loop gets two data pointers and two strides, and should - * look roughly like this: - * { - * NPY_BEGIN_THREADS_DEF; - * if (!needs_api) { - * NPY_BEGIN_THREADS; - * } - * // This first-visit loop can be skipped if 'assign_identity' was non-NULL - * if (skip_first_count > 0) { - * do { - * char *data0 = dataptr[0], *data1 = dataptr[1]; - * npy_intp stride0 = strideptr[0], stride1 = strideptr[1]; - * npy_intp count = *countptr; - * - * // Skip any first-visit elements - * if (NpyIter_IsFirstVisit(iter, 0)) { - * if (stride0 == 0) { - * --count; - * --skip_first_count; - * data1 += stride1; - * } - * else { - * skip_first_count -= count; - * count = 0; - * } - * } - * - * while (count--) { - * *(result_t *)data0 = my_reduce_op(*(result_t *)data0, - * *(operand_t *)data1); - * data0 += stride0; - * data1 += stride1; - * } - * - * // Jump to the faster loop when skipping is done - * if (skip_first_count == 0) { - * if (iternext(iter)) { - * break; - * } - * else { - * goto finish_loop; - * } - * } - * } while (iternext(iter)); - * } - * do { - * char *data0 = dataptr[0], *data1 = dataptr[1]; - * npy_intp stride0 = strideptr[0], stride1 = strideptr[1]; - * npy_intp count = *countptr; - * - * while (count--) { - * *(result_t *)data0 = my_reduce_op(*(result_t *)data0, - * *(operand_t *)data1); - * data0 += stride0; - * data1 += stride1; - * } - * } while (iternext(iter)); - * finish_loop: - * if (!needs_api) { - * NPY_END_THREADS; - * } - * return (needs_api && PyErr_Occurred()) ? -1 : 0; - * } - * - * If needs_api is True, this function should call PyErr_Occurred() - * to check if an error occurred during processing, and return -1 for - * error, 0 for success. + * TODO: This should be refactored/removed. */ -typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, - char **dataptr, - npy_intp const *strideptr, - npy_intp const *countptr, - NpyIter_IterNextFunc *iternext, - int needs_api, - npy_intp skip_first_count, - void *data); +typedef int (PyArray_ReduceLoopFunc)(PyArrayMethod_Context *context, + PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata, + NpyIter *iter, char **dataptrs, npy_intp const *strides, + npy_intp const *countptr, NpyIter_IterNextFunc *iternext, + int needs_api, npy_intp skip_first_count); /* * This function executes all the standard NumPy reduction function @@ -138,16 +62,10 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, * errormask : forwarded from _get_bufsize_errmask */ NPY_NO_EXPORT PyArrayObject * -PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, - PyArrayObject *wheremask, - PyArray_Descr *operand_dtype, - PyArray_Descr *result_dtype, - NPY_CASTING casting, - npy_bool *axis_flags, int reorderable, - int keepdims, - PyObject *identity, - PyArray_ReduceLoopFunc *loop, - void *data, npy_intp buffersize, const char *funcname, - int errormask); +PyUFunc_ReduceWrapper(PyArrayMethod_Context *context, + PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask, + npy_bool *axis_flags, int reorderable, int keepdims, + PyObject *identity, PyArray_ReduceLoopFunc *loop, + void *data, npy_intp buffersize, const char *funcname, int errormask); #endif diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index d47be9a30..0e2c1ab8b 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -169,7 +169,7 @@ run_@func@_avx512_skx_@TYPE@(char **args, npy_intp const *dimensions, npy_intp c */ /**begin repeat2 - * #func = rint, floor, ceil, trunc# + * #func = rint, floor, trunc# */ #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS @@ -850,12 +850,6 @@ fma_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ -fma_ceil_@vsub@(@vtype@ x) -{ - return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ fma_trunc_@vsub@(@vtype@ x) { return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO); @@ -988,12 +982,6 @@ avx512_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ -avx512_ceil_@vsub@(@vtype@ x) -{ - return _mm512_roundscale_@vsub@(x, 0x0A); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ avx512_trunc_@vsub@(@vtype@ x) { return _mm512_roundscale_@vsub@(x, 0x0B); @@ -1327,8 +1315,8 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ @@ -1398,8 +1386,8 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ diff --git a/numpy/core/src/umath/svml b/numpy/core/src/umath/svml -Subproject 9f8af767ed6c75455d9a382af829048f8dd1806 +Subproject 1c5260a61e7dce6be48073dfa96291edb0a11d7 diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 42290e8c9..186f18a62 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -616,9 +616,24 @@ _is_same_name(const char* s1, const char* s2) } /* - * Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets, - * and core_signature in PyUFuncObject "ufunc". Returns 0 unless an - * error occurred. + * Sets the following fields in the PyUFuncObject 'ufunc': + * + * Field Type Array Length + * core_enabled int (effectively bool) N/A + * core_num_dim_ix int N/A + * core_dim_flags npy_uint32 * core_num_dim_ix + * core_dim_sizes npy_intp * core_num_dim_ix + * core_num_dims int * nargs (i.e. nin+nout) + * core_offsets int * nargs + * core_dim_ixs int * sum(core_num_dims) + * core_signature char * strlen(signature) + 1 + * + * The function assumes that the values that are arrays have not + * been set already, and sets these pointers to memory allocated + * with PyArray_malloc. These are freed when the ufunc dealloc + * method is called. + * + * Returns 0 unless an error occurred. */ static int _parse_signature(PyUFuncObject *ufunc, const char *signature) @@ -990,6 +1005,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc, } /* Convert and fill in output arguments */ + memset(out_op_DTypes + nin, 0, nout * sizeof(*out_op_DTypes)); if (full_args.out != NULL) { for (int i = 0; i < nout; i++) { obj = PyTuple_GET_ITEM(full_args.out, i); @@ -1047,6 +1063,7 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, PyArrayObject **op, PyArray_Descr **dtypes, NPY_CASTING casting, npy_intp buffersize) { + int force_cast_input = ufuncimpl->flags & _NPY_METH_FORCE_CAST_INPUTS; int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout; for (i = 0; i < nop; ++i) { @@ -1070,7 +1087,13 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl, must_copy = 1; } - if (PyArray_MinCastSafety(safety, casting) != casting) { + if (force_cast_input && i < nin) { + /* + * ArrayMethod flagged to ignore casting (logical funcs + * can force cast to bool) + */ + } + else if (PyArray_MinCastSafety(safety, casting) != casting) { return 0; /* the cast is not safe enough */ } } @@ -1360,8 +1383,15 @@ validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc, */ return 0; } - if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { - return -1; + if (method->flags & _NPY_METH_FORCE_CAST_INPUTS) { + if (PyUFunc_ValidateOutCasting(ufunc, casting, ops, descriptors) < 0) { + return -1; + } + } + else { + if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) { + return -1; + } } return 0; } @@ -2470,9 +2500,9 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { - .caller = (PyObject *)ufunc, - .method = ufuncimpl, - .descriptors = operation_descrs, + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, }; PyArrayMethod_StridedLoop *strided_loop; NPY_ARRAYMETHOD_FLAGS flags = 0; @@ -2527,7 +2557,7 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc, PyArray_free(inner_strides); NPY_AUXDATA_FREE(auxdata); - if (NpyIter_Deallocate(iter) < 0) { + if (!NpyIter_Deallocate(iter)) { retval = -1; } @@ -2592,9 +2622,9 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc, /* Final preparation of the arraymethod call */ PyArrayMethod_Context context = { - .caller = (PyObject *)ufunc, - .method = ufuncimpl, - .descriptors = operation_descrs, + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, }; /* Do the ufunc loop */ @@ -2661,195 +2691,129 @@ PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc), /* - * Given the output type, finds the specified binary op. The - * ufunc must have nin==2 and nout==1. The function may modify - * otype if the given type isn't found. + * Promote and resolve a reduction like operation. * - * Returns 0 on success, -1 on failure. + * @param ufunc + * @param arr The operation array + * @param out The output array or NULL if not provided. Note that NumPy always + * used out to mean the same as `dtype=out.dtype` and never passed + * the array itself to the type-resolution. + * @param signature The DType signature, which may already be set due to the + * dtype passed in by the user, or the special cases (add, multiply). + * (Contains strong references and may be modified.) + * @param enforce_uniform_args If `NPY_TRUE` fully uniform dtypes/descriptors + * are enforced as required for accumulate and (currently) reduceat. + * @param out_descrs New references to the resolved descriptors (on success). + * @param method The ufunc method, "reduce", "reduceat", or "accumulate". + + * @returns ufuncimpl The `ArrayMethod` implemention to use. Or NULL if an + * error occurred. */ -static int -get_binary_op_function(PyUFuncObject *ufunc, int *otype, - PyUFuncGenericFunction *out_innerloop, - void **out_innerloopdata) +static PyArrayMethodObject * +reducelike_promote_and_resolve(PyUFuncObject *ufunc, + PyArrayObject *arr, PyArrayObject *out, + PyArray_DTypeMeta *signature[3], + npy_bool enforce_uniform_args, PyArray_Descr *out_descrs[3], + char *method) { - int i; - - NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n", - *otype); - - /* If the type is custom and there are userloops, search for it here */ - if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) { - PyObject *key, *obj; - key = PyLong_FromLong(*otype); - if (key == NULL) { - return -1; - } - obj = PyDict_GetItemWithError(ufunc->userloops, key); - Py_DECREF(key); - if (obj == NULL && PyErr_Occurred()) { - return -1; - } - else if (obj != NULL) { - PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL); - if (funcdata == NULL) { - return -1; - } - while (funcdata != NULL) { - int *types = funcdata->arg_types; - - if (types[0] == *otype && types[1] == *otype && - types[2] == *otype) { - *out_innerloop = funcdata->func; - *out_innerloopdata = funcdata->data; - return 0; - } + /* + * Note that the `ops` is not realy correct. But legacy resolution + * cannot quite handle the correct ops (e.g. a NULL first item if `out` + * is NULL), and it should only matter in very strange cases. + */ + PyArrayObject *ops[3] = {arr, arr, NULL}; + /* + * TODO: If `out` is not provided, arguably `initial` could define + * the first DType (and maybe also the out one), that way + * `np.add.reduce([1, 2, 3], initial=3.4)` would return a float + * value. As of 1.20, it returned an integer, so that should + * probably go to an error/warning first. + */ + PyArray_DTypeMeta *operation_DTypes[3] = { + NULL, NPY_DTYPE(PyArray_DESCR(arr)), NULL}; + Py_INCREF(operation_DTypes[1]); - funcdata = funcdata->next; - } - } + if (out != NULL) { + operation_DTypes[0] = NPY_DTYPE(PyArray_DESCR(out)); + Py_INCREF(operation_DTypes[0]); + operation_DTypes[2] = operation_DTypes[0]; + Py_INCREF(operation_DTypes[2]); } - /* Search for a function with compatible inputs */ - for (i = 0; i < ufunc->ntypes; ++i) { - char *types = ufunc->types + i*ufunc->nargs; - - NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n", - types[0], types[1], types[2]); - - if (PyArray_CanCastSafely(*otype, types[0]) && - types[0] == types[1] && - (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) { - /* If the signature is "xx->x", we found the loop */ - if (types[2] == types[0]) { - *out_innerloop = ufunc->functions[i]; - *out_innerloopdata = ufunc->data[i]; - *otype = types[0]; - return 0; - } - /* - * Otherwise, we found the natural type of the reduction, - * replace otype and search again - */ - else { - *otype = types[2]; - break; - } - } + PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, + ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE); + Py_DECREF(operation_DTypes[1]); + if (out != NULL) { + Py_DECREF(operation_DTypes[0]); + Py_DECREF(operation_DTypes[2]); } - - /* Search for the exact function */ - for (i = 0; i < ufunc->ntypes; ++i) { - char *types = ufunc->types + i*ufunc->nargs; - - if (PyArray_CanCastSafely(*otype, types[0]) && - types[0] == types[1] && - types[1] == types[2] && - (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) { - /* Since the signature is "xx->x", we found the loop */ - *out_innerloop = ufunc->functions[i]; - *out_innerloopdata = ufunc->data[i]; - *otype = types[0]; - return 0; - } + if (ufuncimpl == NULL) { + return NULL; } - return -1; -} - -static int -reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, - PyArray_Descr *odtype, PyArray_Descr **out_dtype) -{ - int i, retcode; - PyArrayObject *op[3] = {arr, arr, NULL}; - PyArray_Descr *dtypes[3] = {NULL, NULL, NULL}; - const char *ufunc_name = ufunc_get_name_cstr(ufunc); - PyObject *type_tup = NULL; - - *out_dtype = NULL; - /* - * If odtype is specified, make a type tuple for the type - * resolution. + * Find the correct descriptors for the operation. We use unsafe casting + * for historic reasons: The logic ufuncs required it to cast everything to + * boolean. However, we now special case the logical ufuncs, so that the + * casting safety could in principle be set to the default same-kind. + * (although this should possibly happen through a deprecation) */ - if (odtype != NULL) { - type_tup = PyTuple_Pack(3, odtype, odtype, Py_None); - if (type_tup == NULL) { - return -1; - } - } - - /* Use the type resolution function to find our loop */ - retcode = ufunc->type_resolver( - ufunc, NPY_UNSAFE_CASTING, - op, type_tup, dtypes); - Py_DECREF(type_tup); - if (retcode == -1) { - return -1; - } - else if (retcode == -2) { - PyErr_Format(PyExc_RuntimeError, - "type resolution returned NotImplemented to " - "reduce ufunc %s", ufunc_name); - return -1; + if (resolve_descriptors(3, ufunc, ufuncimpl, + ops, out_descrs, signature, NPY_UNSAFE_CASTING) < 0) { + return NULL; } /* - * The first two type should be equivalent. Because of how - * reduce has historically behaved in NumPy, the return type - * could be different, and it is the return type on which the - * reduction occurs. + * The first operand and output should be the same array, so they should + * be identical. The second argument can be different for reductions, + * but is checked to be identical for accumulate and reduceat. */ - if (!PyArray_EquivTypes(dtypes[0], dtypes[1])) { - for (i = 0; i < 3; ++i) { - Py_DECREF(dtypes[i]); - } - PyErr_Format(PyExc_RuntimeError, - "could not find a type resolution appropriate for " - "reduce ufunc %s", ufunc_name); - return -1; + if (out_descrs[0] != out_descrs[2] || ( + enforce_uniform_args && out_descrs[0] != out_descrs[1])) { + PyErr_Format(PyExc_TypeError, + "the resolved dtypes are not compatible with %s.%s", + ufunc_get_name_cstr(ufunc), method); + goto fail; + } + /* TODO: This really should _not_ be unsafe casting (same above)! */ + if (validate_casting(ufuncimpl, + ufunc, ops, out_descrs, NPY_UNSAFE_CASTING) < 0) { + goto fail; } - Py_DECREF(dtypes[0]); - Py_DECREF(dtypes[1]); - *out_dtype = dtypes[2]; + return ufuncimpl; - return 0; + fail: + for (int i = 0; i < 3; ++i) { + Py_DECREF(out_descrs[i]); + } + return NULL; } + static int -reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, - npy_intp const *countptr, NpyIter_IterNextFunc *iternext, - int needs_api, npy_intp skip_first_count, void *data) +reduce_loop(PyArrayMethod_Context *context, + PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata, + NpyIter *iter, char **dataptrs, npy_intp const *strides, + npy_intp const *countptr, NpyIter_IterNextFunc *iternext, + int needs_api, npy_intp skip_first_count) { - PyArray_Descr *dtypes[3], **iter_dtypes; - PyUFuncObject *ufunc = (PyUFuncObject *)data; - char *dataptrs_copy[3]; - npy_intp strides_copy[3]; + int retval; + char *dataptrs_copy[4]; + npy_intp strides_copy[4]; npy_bool masked; - /* The normal selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; - NPY_BEGIN_THREADS_DEF; /* Get the number of operands, to determine whether "where" is used */ masked = (NpyIter_GetNOp(iter) == 3); - /* Get the inner loop */ - iter_dtypes = NpyIter_GetDescrArray(iter); - dtypes[0] = iter_dtypes[0]; - dtypes[1] = iter_dtypes[1]; - dtypes[2] = iter_dtypes[0]; - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { - return -1; + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); } - NPY_BEGIN_THREADS_NDITER(iter); - if (skip_first_count > 0) { - do { + assert(!masked); /* Path currently not available for masked */ + while (1) { npy_intp count = *countptr; /* Skip any first-visit elements */ @@ -2872,27 +2836,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - innerloop(dataptrs_copy, &count, - strides_copy, innerloopdata); - if (needs_api && PyErr_Occurred()) { + retval = strided_loop(context, + dataptrs_copy, &count, strides_copy, auxdata); + if (retval < 0) { goto finish_loop; } - /* Jump to the faster loop when skipping is done */ - if (skip_first_count == 0) { - if (iternext(iter)) { - break; - } - else { - goto finish_loop; - } + /* Advance loop, and abort on error (or finish) */ + if (!iternext(iter)) { + goto finish_loop; } - } while (iternext(iter)); - } - if (needs_api && PyErr_Occurred()) { - goto finish_loop; + /* When skipping is done break and continue with faster loop */ + if (skip_first_count == 0) { + break; + } + } } do { @@ -2903,42 +2863,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides, strides_copy[0] = strides[0]; strides_copy[1] = strides[1]; strides_copy[2] = strides[0]; - - if (!masked) { - innerloop(dataptrs_copy, countptr, - strides_copy, innerloopdata); + if (masked) { + dataptrs_copy[3] = dataptrs[2]; + strides_copy[3] = strides[2]; } - else { - npy_intp count = *countptr; - char *maskptr = dataptrs[2]; - npy_intp mask_stride = strides[2]; - /* Optimization for when the mask is broadcast */ - npy_intp n = mask_stride == 0 ? count : 1; - while (count) { - char mask = *maskptr; - maskptr += mask_stride; - while (n < count && mask == *maskptr) { - n++; - maskptr += mask_stride; - } - /* If mask set, apply inner loop on this contiguous region */ - if (mask) { - innerloop(dataptrs_copy, &n, - strides_copy, innerloopdata); - } - dataptrs_copy[0] += n * strides[0]; - dataptrs_copy[1] += n * strides[1]; - dataptrs_copy[2] = dataptrs_copy[0]; - count -= n; - n = 1; - } + + retval = strided_loop(context, + dataptrs_copy, countptr, strides_copy, auxdata); + if (retval < 0) { + goto finish_loop; } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + + } while (iternext(iter)); finish_loop: NPY_END_THREADS; - return (needs_api && PyErr_Occurred()) ? -1 : 0; + return retval; } /* @@ -2959,15 +2900,14 @@ finish_loop: * this function does not validate them. */ static PyArrayObject * -PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, - int naxes, int *axes, PyArray_Descr *odtype, int keepdims, +PyUFunc_Reduce(PyUFuncObject *ufunc, + PyArrayObject *arr, PyArrayObject *out, + int naxes, int *axes, PyArray_DTypeMeta *signature[3], int keepdims, PyObject *initial, PyArrayObject *wheremask) { int iaxes, ndim; npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; - PyArray_Descr *dtype; - PyArrayObject *result; PyObject *identity; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ @@ -2994,6 +2934,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, } /* Get the identity */ + /* TODO: Both of these should be provided by the ArrayMethod! */ identity = _get_identity(ufunc, &reorderable); if (identity == NULL) { return NULL; @@ -3017,21 +2958,27 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, Py_INCREF(initial); /* match the reference count in the if above */ } - /* Get the reduction dtype */ - if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) { + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_FALSE, descrs, "reduce"); + if (ufuncimpl == NULL) { Py_DECREF(initial); return NULL; } - result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype, - NPY_UNSAFE_CASTING, - axis_flags, reorderable, - keepdims, - initial, - reduce_loop, - ufunc, buffersize, ufunc_name, errormask); + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; - Py_DECREF(dtype); + PyArrayObject *result = PyUFunc_ReduceWrapper(&context, + arr, out, wheremask, axis_flags, reorderable, keepdims, + initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); + + for (int i = 0; i < 3; i++) { + Py_DECREF(descrs[i]); + } Py_DECREF(initial); return result; } @@ -3039,23 +2986,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, static PyObject * PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, - int axis, int otype) + int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[2]; - PyArray_Descr *op_dtypes[2] = {NULL, NULL}; int op_axes_arrays[2][NPY_MAXDIMS]; int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]}; npy_uint32 op_flags[2]; - int idim, ndim, otype_final; + int idim, ndim; int needs_api, need_outer_iterator; - NpyIter *iter = NULL; + int res = 0; - /* The selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; - const char *ufunc_name = ufunc_get_name_cstr(ufunc); + NpyIter *iter = NULL; /* These parameters come from extobj= or from a TLS global */ int buffersize = 0, errormask = 0; @@ -3077,42 +3022,32 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, /* Take a reference to out for later returning */ Py_XINCREF(out); - otype_final = otype; - if (get_binary_op_function(ufunc, &otype_final, - &innerloop, &innerloopdata) < 0) { - PyArray_Descr *dtype = PyArray_DescrFromType(otype); - PyErr_Format(PyExc_ValueError, - "could not find a matching type for %s.accumulate, " - "requested type has type code '%c'", - ufunc_name, dtype ? dtype->type : '-'); - Py_XDECREF(dtype); - goto fail; + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_TRUE, descrs, "accumulate"); + if (ufuncimpl == NULL) { + return NULL; } - ndim = PyArray_NDIM(arr); + /* The below code assumes that all descriptors are identical: */ + assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]); - /* - * Set up the output data type, using the input's exact - * data type if the type number didn't change to preserve - * metadata - */ - if (PyArray_DESCR(arr)->type_num == otype_final) { - if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) { - op_dtypes[0] = PyArray_DESCR(arr); - Py_INCREF(op_dtypes[0]); - } - else { - op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr), - NPY_NATIVE); - } - } - else { - op_dtypes[0] = PyArray_DescrFromType(otype_final); - } - if (op_dtypes[0] == NULL) { + if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { + /* This can be removed, but the initial element copy needs fixing */ + PyErr_SetString(PyExc_TypeError, + "accumulation currently only supports `object` dtype with " + "references"); goto fail; } + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; + + ndim = PyArray_NDIM(arr); + #if NPY_UF_DBG_TRACING printf("Found %s.accumulate inner loop with dtype : ", ufunc_name); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); @@ -3138,9 +3073,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, need_outer_iterator = (ndim > 1); /* We can't buffer, so must do UPDATEIFCOPY */ if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) || - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr)) || + !PyArray_EquivTypes(descrs[1], PyArray_DESCR(arr)) || (out && - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) { + !PyArray_EquivTypes(descrs[0], PyArray_DESCR(out)))) { need_outer_iterator = 1; } /* If input and output overlap in memory, use iterator to figure it out */ @@ -3153,7 +3088,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_COPY_IF_OVERLAP; - PyArray_Descr **op_dtypes_param = NULL; /* * The way accumulate is set up, we can't do buffering, @@ -3170,13 +3104,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, */ op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - op_dtypes_param = op_dtypes; - op_dtypes[1] = op_dtypes[0]; + NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(2, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, - op_flags, - op_dtypes_param, + op_flags, descrs, ndim_iter, op_axes, NULL, 0); if (iter == NULL) { goto fail; @@ -3194,14 +3126,14 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, } } - /* Get the output */ + /* Get the output from the iterator if it was allocated */ if (out == NULL) { if (iter) { op[0] = out = NpyIter_GetOperandArray(iter)[0]; Py_INCREF(out); } else { - PyArray_Descr *dtype = op_dtypes[0]; + PyArray_Descr *dtype = descrs[0]; Py_INCREF(dtype); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( &PyArray_Type, dtype, @@ -3210,10 +3142,31 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, if (out == NULL) { goto fail; } - } } + npy_intp fixed_strides[3]; + if (need_outer_iterator) { + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + } + else { + fixed_strides[0] = PyArray_STRIDES(op[0])[axis]; + fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; + fixed_strides[2] = fixed_strides[0]; + } + + + NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufuncimpl->get_strided_loop(&context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } + /* * If the reduction axis has size zero, either return the reduction * unit for UFUNC_REDUCE, or return the zero-sized output array @@ -3234,7 +3187,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NpyIter_IterNextFunc *iternext; char **dataptr; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -3242,8 +3195,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, goto fail; } dataptr = NpyIter_GetDataPtrArray(iter); - needs_api = NpyIter_IterationNeedsAPI(iter); - + needs_api |= NpyIter_IterationNeedsAPI(iter); /* Execute the loop with just the outer iterator */ count_m1 = PyArray_DIM(op[1], axis)-1; @@ -3257,7 +3209,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, stride_copy[1] = stride1; stride_copy[2] = stride0; - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } do { dataptr_copy[0] = dataptr[0]; @@ -3270,7 +3224,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, * Output (dataptr[0]) and input (dataptr[1]) may point to * the same memory, e.g. np.add.accumulate(a, out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3290,18 +3244,17 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, dataptr_copy[2] += stride0; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count_m1); - innerloop(dataptr_copy, &count_m1, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count_m1, stride_copy, auxdata); } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; - npy_intp stride_copy[3]; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; /* Execute the loop with no iterators */ npy_intp count = PyArray_DIM(op[1], axis); @@ -3315,15 +3268,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, PyArray_NDIM(op[0]))) { PyErr_SetString(PyExc_ValueError, "provided out is the wrong size " - "for the reduction"); + "for the accumulation."); goto fail; } stride0 = PyArray_STRIDE(op[0], axis); - stride_copy[0] = stride0; - stride_copy[1] = stride1; - stride_copy[2] = stride0; - /* Turn the two items into three for the inner loop */ dataptr_copy[0] = PyArray_BYTES(op[0]); dataptr_copy[1] = PyArray_BYTES(op[1]); @@ -3335,7 +3284,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, * Output (dataptr[0]) and input (dataptr[1]) may point to the * same memory, e.g. np.add.accumulate(a, out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3356,25 +3305,34 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - needs_api = PyDataType_REFCHK(op_dtypes[0]); + needs_api = PyDataType_REFCHK(descrs[0]); if (!needs_api) { NPY_BEGIN_THREADS_THRESHOLDED(count); } - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, fixed_strides, auxdata); NPY_END_THREADS; } } finish: - Py_XDECREF(op_dtypes[0]); - int res = 0; + NPY_AUXDATA_FREE(auxdata); + Py_DECREF(descrs[0]); + Py_DECREF(descrs[1]); + Py_DECREF(descrs[2]); + if (!NpyIter_Deallocate(iter)) { res = -1; } + + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + res = _check_ufunc_fperr(errormask, NULL, "accumulate"); + } + if (res < 0) { Py_DECREF(out); return NULL; @@ -3384,7 +3342,11 @@ finish: fail: Py_XDECREF(out); - Py_XDECREF(op_dtypes[0]); + + NPY_AUXDATA_FREE(auxdata); + Py_XDECREF(descrs[0]); + Py_XDECREF(descrs[1]); + Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); @@ -3409,28 +3371,31 @@ fail: * indices[1::2] = range(1,len(array)) * * output shape is based on the size of indices + * + * TODO: Reduceat duplicates too much code from accumulate! */ static PyObject * PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, - PyArrayObject *out, int axis, int otype) + PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3]) { PyArrayObject *op[3]; - PyArray_Descr *op_dtypes[3] = {NULL, NULL, NULL}; int op_axes_arrays[3][NPY_MAXDIMS]; int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1], op_axes_arrays[2]}; npy_uint32 op_flags[3]; - int idim, ndim, otype_final; - int need_outer_iterator = 0; + int idim, ndim; + int needs_api, need_outer_iterator = 0; + + int res = 0; NpyIter *iter = NULL; + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; + /* The reduceat indices - ind must be validated outside this call */ npy_intp *reduceat_ind; npy_intp i, ind_size, red_axis_size; - /* The selected inner loop */ - PyUFuncGenericFunction innerloop = NULL; - void *innerloopdata = NULL; const char *ufunc_name = ufunc_get_name_cstr(ufunc); char *opname = "reduceat"; @@ -3470,42 +3435,32 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, /* Take a reference to out for later returning */ Py_XINCREF(out); - otype_final = otype; - if (get_binary_op_function(ufunc, &otype_final, - &innerloop, &innerloopdata) < 0) { - PyArray_Descr *dtype = PyArray_DescrFromType(otype); - PyErr_Format(PyExc_ValueError, - "could not find a matching type for %s.%s, " - "requested type has type code '%c'", - ufunc_name, opname, dtype ? dtype->type : '-'); - Py_XDECREF(dtype); - goto fail; + PyArray_Descr *descrs[3]; + PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc, + arr, out, signature, NPY_TRUE, descrs, "reduceat"); + if (ufuncimpl == NULL) { + return NULL; } - ndim = PyArray_NDIM(arr); + /* The below code assumes that all descriptors are identical: */ + assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]); - /* - * Set up the output data type, using the input's exact - * data type if the type number didn't change to preserve - * metadata - */ - if (PyArray_DESCR(arr)->type_num == otype_final) { - if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) { - op_dtypes[0] = PyArray_DESCR(arr); - Py_INCREF(op_dtypes[0]); - } - else { - op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr), - NPY_NATIVE); - } - } - else { - op_dtypes[0] = PyArray_DescrFromType(otype_final); - } - if (op_dtypes[0] == NULL) { + if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) { + /* This can be removed, but the initial element copy needs fixing */ + PyErr_SetString(PyExc_TypeError, + "reduceat currently only supports `object` dtype with " + "references"); goto fail; } + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = descrs, + }; + + ndim = PyArray_NDIM(arr); + #if NPY_UF_DBG_TRACING printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname); PyObject_Print((PyObject *)op_dtypes[0], stdout, 0); @@ -3532,11 +3487,13 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, op[2] = ind; if (out != NULL || ndim > 1 || !PyArray_ISALIGNED(arr) || - !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr))) { + !PyArray_EquivTypes(descrs[0], PyArray_DESCR(arr))) { need_outer_iterator = 1; } if (need_outer_iterator) { + PyArray_Descr *op_dtypes[3] = {descrs[0], descrs[1], NULL}; + npy_uint32 flags = NPY_ITER_ZEROSIZE_OK| NPY_ITER_REFS_OK| NPY_ITER_MULTI_INDEX| @@ -3565,8 +3522,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, NPY_UF_DBG_PRINT("Allocating outer iterator\n"); iter = NpyIter_AdvancedNew(3, op, flags, NPY_KEEPORDER, NPY_UNSAFE_CASTING, - op_flags, - op_dtypes, + op_flags, op_dtypes, ndim, op_axes, NULL, 0); if (iter == NULL) { goto fail; @@ -3590,11 +3546,15 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, Py_INCREF(out); } } - /* Allocate the output for when there's no outer iterator */ - else if (out == NULL) { - Py_INCREF(op_dtypes[0]); + else { + /* + * Allocate the output for when there's no outer iterator, we always + * use the outer_iteration path when `out` is passed. + */ + assert(out == NULL); + Py_INCREF(descrs[0]); op[0] = out = (PyArrayObject *)PyArray_NewFromDescr( - &PyArray_Type, op_dtypes[0], + &PyArray_Type, descrs[0], 1, &ind_size, NULL, NULL, 0, NULL); if (out == NULL) { @@ -3602,6 +3562,28 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, } } + npy_intp fixed_strides[3]; + if (need_outer_iterator) { + NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); + } + else { + fixed_strides[1] = PyArray_STRIDES(op[1])[axis]; + } + /* The reduce axis does not advance here in the strided-loop */ + fixed_strides[0] = 0; + fixed_strides[2] = 0; + + NPY_ARRAYMETHOD_FLAGS flags = 0; + if (ufuncimpl->get_strided_loop(&context, + 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } + /* * If the output has zero elements, return now. */ @@ -3619,8 +3601,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, npy_intp stride0, stride1; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); - int itemsize = op_dtypes[0]->elsize; - int needs_api = NpyIter_IterationNeedsAPI(iter); + int itemsize = descrs[0]->elsize; + needs_api |= NpyIter_IterationNeedsAPI(iter); /* Get the variables needed for the loop */ iternext = NpyIter_GetIterNext(iter, NULL); @@ -3640,10 +3622,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, stride_copy[1] = stride1; stride_copy[2] = stride0; - NPY_BEGIN_THREADS_NDITER(iter); + if (!needs_api) { + NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter)); + } do { - for (i = 0; i < ind_size; ++i) { npy_intp start = reduceat_ind[i], end = (i == ind_size-1) ? count_m1+1 : @@ -3661,7 +3644,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, * to the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of * the reference count being zero temporarily. @@ -3681,33 +3664,24 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, stride_copy, auxdata); } } - } while (!(needs_api && PyErr_Occurred()) && iternext(iter)); + } while (res == 0 && iternext(iter)); NPY_END_THREADS; } else if (iter == NULL) { char *dataptr_copy[3]; - npy_intp stride_copy[3]; - int itemsize = op_dtypes[0]->elsize; + int itemsize = descrs[0]->elsize; npy_intp stride0_ind = PyArray_STRIDE(op[0], axis); - - /* Execute the loop with no iterators */ - npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis); - - int needs_api = PyDataType_REFCHK(op_dtypes[0]); + npy_intp stride1 = PyArray_STRIDE(op[1], axis); NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n"); - stride_copy[0] = stride0; - stride_copy[1] = stride1; - stride_copy[2] = stride0; - if (!needs_api) { NPY_BEGIN_THREADS; } @@ -3729,7 +3703,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, * the same memory, e.g. * np.add.reduceat(a, np.arange(len(a)), out=a). */ - if (otype == NPY_OBJECT) { + if (descrs[2]->type_num == NPY_OBJECT) { /* * Incref before decref to avoid the possibility of the * reference count being zero temporarily. @@ -3749,8 +3723,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, dataptr_copy[1] += stride1; NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count); - innerloop(dataptr_copy, &count, - stride_copy, innerloopdata); + res = strided_loop(&context, + dataptr_copy, &count, fixed_strides, auxdata); + if (res != 0) { + break; + } } } @@ -3758,8 +3735,21 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind, } finish: - Py_XDECREF(op_dtypes[0]); + NPY_AUXDATA_FREE(auxdata); + Py_DECREF(descrs[0]); + Py_DECREF(descrs[1]); + Py_DECREF(descrs[2]); + if (!NpyIter_Deallocate(iter)) { + res = -1; + } + + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + res = _check_ufunc_fperr(errormask, NULL, "reduceat"); + } + + if (res < 0) { Py_DECREF(out); return NULL; } @@ -3768,9 +3758,14 @@ finish: fail: Py_XDECREF(out); - Py_XDECREF(op_dtypes[0]); + + NPY_AUXDATA_FREE(auxdata); + Py_XDECREF(descrs[0]); + Py_XDECREF(descrs[1]); + Py_XDECREF(descrs[2]); NpyIter_Deallocate(iter); + return NULL; } @@ -3868,7 +3863,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL; PyObject *op = NULL; PyArrayObject *indices = NULL; - PyArray_Descr *otype = NULL; + PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL}; PyArrayObject *out = NULL; int keepdims = 0; PyObject *initial = NULL; @@ -4012,13 +4007,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } if (otype_obj && otype_obj != Py_None) { /* Use `_get_dtype` because `dtype` is a DType and not the instance */ - PyArray_DTypeMeta *dtype = _get_dtype(otype_obj); - if (dtype == NULL) { + signature[0] = _get_dtype(otype_obj); + if (signature[0] == NULL) { goto fail; } - otype = dtype->singleton; - Py_INCREF(otype); - Py_DECREF(dtype); } if (out_obj && !PyArray_OutputConverter(out_obj, &out)) { goto fail; @@ -4038,15 +4030,6 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, ndim = PyArray_NDIM(mp); - /* Check to see that type (and otype) is not FLEXIBLE */ - if (PyArray_ISFLEXIBLE(mp) || - (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) { - PyErr_Format(PyExc_TypeError, - "cannot perform %s with flexible type", - _reduce_type[operation]); - goto fail; - } - /* Convert the 'axis' parameter into a list of axes */ if (axes_obj == NULL) { /* apply defaults */ @@ -4109,14 +4092,12 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } /* - * If out is specified it determines otype - * unless otype already specified. + * If no dtype is specified and out is not specified, we override the + * integer and bool dtype used for add and multiply. + * + * TODO: The following should be handled by a promoter! */ - if (otype == NULL && out != NULL) { - otype = PyArray_DESCR(out); - Py_INCREF(otype); - } - if (otype == NULL) { + if (signature[0] == NULL && out == NULL) { /* * For integer types --- make sure at least a long * is used for add and multiply reduction to avoid overflow @@ -4136,16 +4117,17 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, typenum = NPY_LONG; } } + signature[0] = PyArray_DTypeFromTypeNum(typenum); } - otype = PyArray_DescrFromType(typenum); } - + Py_XINCREF(signature[0]); + signature[2] = signature[0]; switch(operation) { case UFUNC_REDUCE: - ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, - otype, keepdims, initial, wheremask); - Py_XDECREF(wheremask); + ret = PyUFunc_Reduce(ufunc, + mp, out, naxes, axes, signature, keepdims, initial, wheremask); + Py_XSETREF(wheremask, NULL); break; case UFUNC_ACCUMULATE: if (ndim == 0) { @@ -4157,8 +4139,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, "accumulate does not allow multiple axes"); goto fail; } - ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0], - otype->type_num); + ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, + mp, out, axes[0], signature); break; case UFUNC_REDUCEAT: if (ndim == 0) { @@ -4171,19 +4153,22 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, goto fail; } ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc, - mp, indices, out, axes[0], otype->type_num); + mp, indices, out, axes[0], signature); Py_SETREF(indices, NULL); break; } + if (ret == NULL) { + goto fail; + } + + Py_DECREF(signature[0]); + Py_DECREF(signature[1]); + Py_DECREF(signature[2]); + Py_DECREF(mp); - Py_DECREF(otype); Py_XDECREF(full_args.in); Py_XDECREF(full_args.out); - if (ret == NULL) { - return NULL; - } - /* Wrap and return the output */ { /* Find __array_wrap__ - note that these rules are different to the @@ -4211,7 +4196,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, } fail: - Py_XDECREF(otype); + Py_XDECREF(signature[0]); + Py_XDECREF(signature[1]); + Py_XDECREF(signature[2]); + Py_XDECREF(mp); Py_XDECREF(wheremask); Py_XDECREF(indices); @@ -4938,65 +4926,6 @@ fail: /* - * TODO: The implementation below can be replaced with PyVectorcall_Call - * when available (should be Python 3.8+). - */ -static PyObject * -ufunc_generic_call( - PyUFuncObject *ufunc, PyObject *args, PyObject *kwds) -{ - Py_ssize_t len_args = PyTuple_GET_SIZE(args); - /* - * Wrapper for tp_call to tp_fastcall, to support both on older versions - * of Python. (and generally simplifying support of both versions in the - * same codebase. - */ - if (kwds == NULL) { - return ufunc_generic_fastcall(ufunc, - PySequence_Fast_ITEMS(args), len_args, NULL, NPY_FALSE); - } - - PyObject *new_args[NPY_MAXARGS]; - Py_ssize_t len_kwds = PyDict_Size(kwds); - - if (NPY_UNLIKELY(len_args + len_kwds > NPY_MAXARGS)) { - /* - * We do not have enough scratch-space, so we have to abort; - * In practice this error should not be seen by users. - */ - PyErr_Format(PyExc_ValueError, - "%s() takes from %d to %d positional arguments but " - "%zd were given", - ufunc_get_name_cstr(ufunc) , ufunc->nin, ufunc->nargs, len_args); - return NULL; - } - - /* Copy args into the scratch space */ - for (Py_ssize_t i = 0; i < len_args; i++) { - new_args[i] = PyTuple_GET_ITEM(args, i); - } - - PyObject *kwnames = PyTuple_New(len_kwds); - - PyObject *key, *value; - Py_ssize_t pos = 0; - Py_ssize_t i = 0; - while (PyDict_Next(kwds, &pos, &key, &value)) { - Py_INCREF(key); - PyTuple_SET_ITEM(kwnames, i, key); - new_args[i + len_args] = value; - i++; - } - - PyObject *res = ufunc_generic_fastcall(ufunc, - new_args, len_args, kwnames, NPY_FALSE); - Py_DECREF(kwnames); - return res; -} - - -#if PY_VERSION_HEX >= 0x03080000 -/* * Implement vectorcallfunc which should be defined with Python 3.8+. * In principle this could be backported, but the speed gain seems moderate * since ufunc calls often do not have keyword arguments and always have @@ -5013,7 +4942,6 @@ ufunc_generic_vectorcall(PyObject *ufunc, return ufunc_generic_fastcall((PyUFuncObject *)ufunc, args, PyVectorcall_NARGS(len_args), kwnames, NPY_FALSE); } -#endif /* PY_VERSION_HEX >= 0x03080000 */ NPY_NO_EXPORT PyObject * @@ -5190,11 +5118,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi ufunc->core_dim_flags = NULL; ufunc->userloops = NULL; ufunc->ptr = NULL; -#if PY_VERSION_HEX >= 0x03080000 ufunc->vectorcall = &ufunc_generic_vectorcall; -#else - ufunc->reserved2 = NULL; -#endif ufunc->reserved1 = 0; ufunc->iter_flags = 0; @@ -5892,15 +5816,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) PyArrayObject *op2_array = NULL; PyArrayMapIterObject *iter = NULL; PyArrayIterObject *iter2 = NULL; - PyArray_Descr *dtypes[3] = {NULL, NULL, NULL}; PyArrayObject *operands[3] = {NULL, NULL, NULL}; PyArrayObject *array_operands[3] = {NULL, NULL, NULL}; - int needs_api = 0; + PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL}; + PyArray_DTypeMeta *operand_DTypes[3] = {NULL, NULL, NULL}; + PyArray_Descr *operation_descrs[3] = {NULL, NULL, NULL}; - PyUFuncGenericFunction innerloop; - void *innerloopdata; - npy_intp i; int nop; /* override vars */ @@ -5913,6 +5835,10 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) int buffersize; int errormask = 0; char * err_msg = NULL; + + PyArrayMethod_StridedLoop *strided_loop; + NpyAuxData *auxdata = NULL; + NPY_BEGIN_THREADS_DEF; if (ufunc->nin > 2) { @@ -6000,26 +5926,51 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) /* * Create dtypes array for either one or two input operands. - * The output operand is set to the first input operand + * Compare to the logic in `convert_ufunc_arguments`. + * TODO: It may be good to review some of this behaviour, since the + * operand array is special (it is written to) similar to reductions. + * Using unsafe-casting as done here, is likely not desirable. */ operands[0] = op1_array; + operand_DTypes[0] = NPY_DTYPE(PyArray_DESCR(op1_array)); + Py_INCREF(operand_DTypes[0]); + int force_legacy_promotion = 0; + int allow_legacy_promotion = NPY_DT_is_legacy(operand_DTypes[0]); + if (op2_array != NULL) { operands[1] = op2_array; - operands[2] = op1_array; + operand_DTypes[1] = NPY_DTYPE(PyArray_DESCR(op2_array)); + Py_INCREF(operand_DTypes[1]); + allow_legacy_promotion &= NPY_DT_is_legacy(operand_DTypes[1]); + operands[2] = operands[0]; + operand_DTypes[2] = operand_DTypes[0]; + Py_INCREF(operand_DTypes[2]); + nop = 3; + if (allow_legacy_promotion && ((PyArray_NDIM(op1_array) == 0) + != (PyArray_NDIM(op2_array) == 0))) { + /* both are legacy and only one is 0-D: force legacy */ + force_legacy_promotion = should_use_min_scalar(2, operands, 0, NULL); + } } else { - operands[1] = op1_array; + operands[1] = operands[0]; + operand_DTypes[1] = operand_DTypes[0]; + Py_INCREF(operand_DTypes[1]); operands[2] = NULL; nop = 2; } - if (ufunc->type_resolver(ufunc, NPY_UNSAFE_CASTING, - operands, NULL, dtypes) < 0) { + PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc, + operands, signature, operand_DTypes, + force_legacy_promotion, allow_legacy_promotion); + if (ufuncimpl == NULL) { goto fail; } - if (ufunc->legacy_inner_loop_selector(ufunc, dtypes, - &innerloop, &innerloopdata, &needs_api) < 0) { + + /* Find the correct descriptors for the operation */ + if (resolve_descriptors(nop, ufunc, ufuncimpl, + operands, operation_descrs, signature, NPY_UNSAFE_CASTING) < 0) { goto fail; } @@ -6080,21 +6031,44 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) NPY_ITER_GROWINNER| NPY_ITER_DELAY_BUFALLOC, NPY_KEEPORDER, NPY_UNSAFE_CASTING, - op_flags, dtypes, + op_flags, operation_descrs, -1, NULL, NULL, buffersize); if (iter_buffer == NULL) { goto fail; } - needs_api = needs_api | NpyIter_IterationNeedsAPI(iter_buffer); - iternext = NpyIter_GetIterNext(iter_buffer, NULL); if (iternext == NULL) { NpyIter_Deallocate(iter_buffer); goto fail; } + PyArrayMethod_Context context = { + .caller = (PyObject *)ufunc, + .method = ufuncimpl, + .descriptors = operation_descrs, + }; + + NPY_ARRAYMETHOD_FLAGS flags; + /* Use contiguous strides; if there is such a loop it may be faster */ + npy_intp strides[3] = { + operation_descrs[0]->elsize, operation_descrs[1]->elsize, 0}; + if (nop == 3) { + strides[2] = operation_descrs[2]->elsize; + } + + if (ufuncimpl->get_strided_loop(&context, 1, 0, strides, + &strided_loop, &auxdata, &flags) < 0) { + goto fail; + } + int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0; + needs_api |= NpyIter_IterationNeedsAPI(iter_buffer); + if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* Start with the floating-point exception flags cleared */ + npy_clear_floatstatus_barrier((char*)&iter); + } + if (!needs_api) { NPY_BEGIN_THREADS; } @@ -6103,14 +6077,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) * Iterate over first and second operands and call ufunc * for each pair of inputs */ - i = iter->size; - while (i > 0) + int res = 0; + for (npy_intp i = iter->size; i > 0; i--) { char *dataptr[3]; char **buffer_dataptr; /* one element at a time, no stride required but read by innerloop */ - npy_intp count[3] = {1, 0xDEADBEEF, 0xDEADBEEF}; - npy_intp stride[3] = {0xDEADBEEF, 0xDEADBEEF, 0xDEADBEEF}; + npy_intp count = 1; /* * Set up data pointers for either one or two input operands. @@ -6129,14 +6102,14 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) /* Reset NpyIter data pointers which will trigger a buffer copy */ NpyIter_ResetBasePointers(iter_buffer, dataptr, &err_msg); if (err_msg) { + res = -1; break; } buffer_dataptr = NpyIter_GetDataPtrArray(iter_buffer); - innerloop(buffer_dataptr, count, stride, innerloopdata); - - if (needs_api && PyErr_Occurred()) { + res = strided_loop(&context, buffer_dataptr, &count, strides, auxdata); + if (res != 0) { break; } @@ -6150,32 +6123,35 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args) if (iter2 != NULL) { PyArray_ITER_NEXT(iter2); } - - i--; } NPY_END_THREADS; - if (err_msg) { + if (res != 0 && err_msg) { PyErr_SetString(PyExc_ValueError, err_msg); } + if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) { + /* NOTE: We could check float errors even when `res < 0` */ + res = _check_ufunc_fperr(errormask, NULL, "at"); + } + NPY_AUXDATA_FREE(auxdata); NpyIter_Deallocate(iter_buffer); Py_XDECREF(op2_array); Py_XDECREF(iter); Py_XDECREF(iter2); - for (i = 0; i < 3; i++) { - Py_XDECREF(dtypes[i]); + for (int i = 0; i < 3; i++) { + Py_XDECREF(operation_descrs[i]); Py_XDECREF(array_operands[i]); } /* - * An error should only be possible if needs_api is true, but this is not - * strictly correct for old-style ufuncs (e.g. `power` released the GIL - * but manually set an Exception). + * An error should only be possible if needs_api is true or `res != 0`, + * but this is not strictly correct for old-style ufuncs + * (e.g. `power` released the GIL but manually set an Exception). */ - if (PyErr_Occurred()) { + if (res != 0 || PyErr_Occurred()) { return NULL; } else { @@ -6190,10 +6166,11 @@ fail: Py_XDECREF(op2_array); Py_XDECREF(iter); Py_XDECREF(iter2); - for (i = 0; i < 3; i++) { - Py_XDECREF(dtypes[i]); + for (int i = 0; i < 3; i++) { + Py_XDECREF(operation_descrs[i]); Py_XDECREF(array_operands[i]); } + NPY_AUXDATA_FREE(auxdata); return NULL; } @@ -6396,19 +6373,15 @@ NPY_NO_EXPORT PyTypeObject PyUFunc_Type = { .tp_basicsize = sizeof(PyUFuncObject), .tp_dealloc = (destructor)ufunc_dealloc, .tp_repr = (reprfunc)ufunc_repr, - .tp_call = (ternaryfunc)ufunc_generic_call, + .tp_call = &PyVectorcall_Call, .tp_str = (reprfunc)ufunc_repr, .tp_flags = Py_TPFLAGS_DEFAULT | -#if PY_VERSION_HEX >= 0x03080000 _Py_TPFLAGS_HAVE_VECTORCALL | -#endif Py_TPFLAGS_HAVE_GC, .tp_traverse = (traverseproc)ufunc_traverse, .tp_methods = ufunc_methods, .tp_getset = ufunc_getset, -#if PY_VERSION_HEX >= 0x03080000 .tp_vectorcall_offset = offsetof(PyUFuncObject, vectorcall), -#endif }; /* End of code for ufunc objects */ diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 7e24bc493..9ed923cf5 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -247,6 +247,28 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc, } +/* + * Same as `PyUFunc_ValidateCasting` but only checks output casting. + */ +NPY_NO_EXPORT int +PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc, + NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes) +{ + int i, nin = ufunc->nin, nop = nin + ufunc->nout; + + for (i = nin; i < nop; ++i) { + if (operands[i] == NULL) { + continue; + } + if (!PyArray_CanCastTypeTo(dtypes[i], + PyArray_DESCR(operands[i]), casting)) { + return raise_output_casting_error( + ufunc, casting, dtypes[i], PyArray_DESCR(operands[i]), i); + } + } + return 0; +} + /*UFUNC_API * * This function applies the default type resolution rules @@ -2142,6 +2164,10 @@ type_tuple_type_resolver(PyUFuncObject *self, * `signature=(None,)*nin + (dtype,)*nout`. If the signature matches that * exactly (could be relaxed but that is not necessary for backcompat), * we also try `signature=(dtype,)*(nin+nout)`. + * Since reduction pass in `(dtype, None, dtype)` we broaden this to + * replacing all unspecified dtypes with the homogeneous output one. + * Note that this can (and often will) lead to unsafe casting. This is + * normally rejected (but not currently for reductions!). * This used to be the main meaning for `dtype=dtype`, but some calls broke * the expectation, and changing it allows for `dtype=dtype` to be useful * for ufuncs like `np.ldexp` in the future while also normalizing it to @@ -2160,13 +2186,12 @@ type_tuple_type_resolver(PyUFuncObject *self, if (homogeneous_type != NPY_NOTYPE) { for (int i = 0; i < nin; i++) { if (specified_types[i] != NPY_NOTYPE) { - homogeneous_type = NPY_NOTYPE; - break; + /* Never replace a specified type! */ + continue; } specified_types[i] = homogeneous_type; } - } - if (homogeneous_type != NPY_NOTYPE) { + /* Try again with the homogeneous specified types. */ res = type_tuple_type_resolver_core(self, op, input_casting, casting, specified_types, any_object, diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index dd88a081a..84a2593f4 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -99,6 +99,10 @@ PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc, PyObject *type_tup, PyArray_Descr **out_dtypes); +NPY_NO_EXPORT int +PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc, + NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes); + /* * Does a linear search for the best inner loop of the ufunc. * diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c index a9954dfc1..272555704 100644 --- a/numpy/core/src/umath/umathmodule.c +++ b/numpy/core/src/umath/umathmodule.c @@ -22,6 +22,7 @@ #include "numpy/npy_math.h" #include "number.h" +#include "dispatching.h" static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om}; @@ -305,5 +306,33 @@ int initumath(PyObject *m) return -1; } + /* + * Set up promoters for logical functions + * TODO: This should probably be done at a better place, or even in the + * code generator directly. + */ + s = _PyDict_GetItemStringWithError(d, "logical_and"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } + + s = _PyDict_GetItemStringWithError(d, "logical_or"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } + + s = _PyDict_GetItemStringWithError(d, "logical_xor"); + if (s == NULL) { + return -1; + } + if (install_logical_ufunc_promoter(s) < 0) { + return -1; + } return 0; } diff --git a/numpy/core/tests/data/generate_umath_validation_data.cpp b/numpy/core/tests/data/generate_umath_validation_data.cpp index 9d97ff4ab..418eae670 100644 --- a/numpy/core/tests/data/generate_umath_validation_data.cpp +++ b/numpy/core/tests/data/generate_umath_validation_data.cpp @@ -1,41 +1,46 @@ -#include<math.h> -#include<stdio.h> -#include<iostream> -#include<algorithm> -#include<vector> -#include<random> -#include<fstream> -#include<time.h> +#include <algorithm> +#include <fstream> +#include <iostream> +#include <math.h> +#include <random> +#include <stdio.h> +#include <time.h> +#include <vector> struct ufunc { std::string name; - double (*f32func) (double); - long double (*f64func) (long double); + double (*f32func)(double); + long double (*f64func)(long double); float f32ulp; float f64ulp; }; -template<typename T> -T RandomFloat(T a, T b) { - T random = ((T) rand()) / (T) RAND_MAX; +template <typename T> +T +RandomFloat(T a, T b) +{ + T random = ((T)rand()) / (T)RAND_MAX; T diff = b - a; T r = random * diff; return a + r; } -template<typename T> -void append_random_array(std::vector<T>& arr, T min, T max, size_t N) +template <typename T> +void +append_random_array(std::vector<T> &arr, T min, T max, size_t N) { for (size_t ii = 0; ii < N; ++ii) arr.emplace_back(RandomFloat<T>(min, max)); } -template<typename T1, typename T2> -std::vector<T1> computeTrueVal(const std::vector<T1>& in, T2(*mathfunc)(T2)) { +template <typename T1, typename T2> +std::vector<T1> +computeTrueVal(const std::vector<T1> &in, T2 (*mathfunc)(T2)) +{ std::vector<T1> out; for (T1 elem : in) { - T2 elem_d = (T2) elem; - T1 out_elem = (T1) mathfunc(elem_d); + T2 elem_d = (T2)elem; + T1 out_elem = (T1)mathfunc(elem_d); out.emplace_back(out_elem); } return out; @@ -49,17 +54,20 @@ std::vector<T1> computeTrueVal(const std::vector<T1>& in, T2(*mathfunc)(T2)) { #define MINDEN std::numeric_limits<T>::denorm_min() #define MINFLT std::numeric_limits<T>::min() #define MAXFLT std::numeric_limits<T>::max() -#define INF std::numeric_limits<T>::infinity() -#define qNAN std::numeric_limits<T>::quiet_NaN() -#define sNAN std::numeric_limits<T>::signaling_NaN() +#define INF std::numeric_limits<T>::infinity() +#define qNAN std::numeric_limits<T>::quiet_NaN() +#define sNAN std::numeric_limits<T>::signaling_NaN() -template<typename T> -std::vector<T> generate_input_vector(std::string func) { - std::vector<T> input = {MINDEN, -MINDEN, MINFLT, -MINFLT, MAXFLT, -MAXFLT, - INF, -INF, qNAN, sNAN, -1.0, 1.0, 0.0, -0.0}; +template <typename T> +std::vector<T> +generate_input_vector(std::string func) +{ + std::vector<T> input = {MINDEN, -MINDEN, MINFLT, -MINFLT, MAXFLT, + -MAXFLT, INF, -INF, qNAN, sNAN, + -1.0, 1.0, 0.0, -0.0}; // [-1.0, 1.0] - if ((func == "arcsin") || (func == "arccos") || (func == "arctanh")){ + if ((func == "arcsin") || (func == "arccos") || (func == "arctanh")) { append_random_array<T>(input, -1.0, 1.0, 700); } // (0.0, INF] @@ -98,57 +106,62 @@ std::vector<T> generate_input_vector(std::string func) { return input; } -int main() { - srand (42); +int +main() +{ + srand(42); std::vector<struct ufunc> umathfunc = { - {"sin",sin,sin,2.37,3.3}, - {"cos",cos,cos,2.36,3.38}, - {"tan",tan,tan,3.91,3.93}, - {"arcsin",asin,asin,3.12,2.55}, - {"arccos",acos,acos,2.1,1.67}, - {"arctan",atan,atan,2.3,2.52}, - {"sinh",sinh,sinh,1.55,1.89}, - {"cosh",cosh,cosh,2.48,1.97}, - {"tanh",tanh,tanh,1.38,1.19}, - {"arcsinh",asinh,asinh,1.01,1.48}, - {"arccosh",acosh,acosh,1.16,1.05}, - {"arctanh",atanh,atanh,1.45,1.46}, - {"cbrt",cbrt,cbrt,1.94,1.82}, - //{"exp",exp,exp,3.76,1.53}, - {"exp2",exp2,exp2,1.01,1.04}, - {"expm1",expm1,expm1,2.62,2.1}, - //{"log",log,log,1.84,1.67}, - {"log10",log10,log10,3.5,1.92}, - {"log1p",log1p,log1p,1.96,1.93}, - {"log2",log2,log2,2.12,1.84}, + {"sin", sin, sin, 2.37, 3.3}, + {"cos", cos, cos, 2.36, 3.38}, + {"tan", tan, tan, 3.91, 3.93}, + {"arcsin", asin, asin, 3.12, 2.55}, + {"arccos", acos, acos, 2.1, 1.67}, + {"arctan", atan, atan, 2.3, 2.52}, + {"sinh", sinh, sinh, 1.55, 1.89}, + {"cosh", cosh, cosh, 2.48, 1.97}, + {"tanh", tanh, tanh, 1.38, 1.19}, + {"arcsinh", asinh, asinh, 1.01, 1.48}, + {"arccosh", acosh, acosh, 1.16, 1.05}, + {"arctanh", atanh, atanh, 1.45, 1.46}, + {"cbrt", cbrt, cbrt, 1.94, 1.82}, + //{"exp",exp,exp,3.76,1.53}, + {"exp2", exp2, exp2, 1.01, 1.04}, + {"expm1", expm1, expm1, 2.62, 2.1}, + //{"log",log,log,1.84,1.67}, + {"log10", log10, log10, 3.5, 1.92}, + {"log1p", log1p, log1p, 1.96, 1.93}, + {"log2", log2, log2, 2.12, 1.84}, }; for (int ii = 0; ii < umathfunc.size(); ++ii) { - // ignore sin/cos + // ignore sin/cos if ((umathfunc[ii].name != "sin") && (umathfunc[ii].name != "cos")) { - std::string fileName = "umath-validation-set-" + umathfunc[ii].name + ".csv"; + std::string fileName = + "umath-validation-set-" + umathfunc[ii].name + ".csv"; std::ofstream txtOut; - txtOut.open (fileName, std::ofstream::trunc); + txtOut.open(fileName, std::ofstream::trunc); txtOut << "dtype,input,output,ulperrortol" << std::endl; // Single Precision auto f32in = generate_input_vector<float>(umathfunc[ii].name); - auto f32out = computeTrueVal<float, double>(f32in, umathfunc[ii].f32func); + auto f32out = computeTrueVal<float, double>(f32in, + umathfunc[ii].f32func); for (int jj = 0; jj < f32in.size(); ++jj) { - txtOut << "np.float32" << std::hex << - ",0x" << *reinterpret_cast<uint32_t*>(&f32in[jj]) << - ",0x" << *reinterpret_cast<uint32_t*>(&f32out[jj]) << - "," << ceil(umathfunc[ii].f32ulp) << std::endl; + txtOut << "np.float32" << std::hex << ",0x" + << *reinterpret_cast<uint32_t *>(&f32in[jj]) << ",0x" + << *reinterpret_cast<uint32_t *>(&f32out[jj]) << "," + << ceil(umathfunc[ii].f32ulp) << std::endl; } // Double Precision auto f64in = generate_input_vector<double>(umathfunc[ii].name); - auto f64out = computeTrueVal<double, long double>(f64in, umathfunc[ii].f64func); + auto f64out = computeTrueVal<double, long double>( + f64in, umathfunc[ii].f64func); for (int jj = 0; jj < f64in.size(); ++jj) { - txtOut << "np.float64" << std::hex << - ",0x" << *reinterpret_cast<uint64_t*>(&f64in[jj]) << - ",0x" << *reinterpret_cast<uint64_t*>(&f64out[jj]) << - "," << ceil(umathfunc[ii].f64ulp) << std::endl; + txtOut << "np.float64" << std::hex << ",0x" + << *reinterpret_cast<uint64_t *>(&f64in[jj]) << ",0x" + << *reinterpret_cast<uint64_t *>(&f64out[jj]) << "," + << ceil(umathfunc[ii].f64ulp) << std::endl; } txtOut.close(); } diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 291cdae89..d3c7211cd 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -598,3 +598,31 @@ def test_broadcast_arrays(): def test_full_from_list(shape, fill_value, expected_output): output = np.full(shape, fill_value) assert_equal(output, expected_output) + +def test_astype_copyflag(): + # test the various copyflag options + arr = np.arange(10, dtype=np.intp) + + res_true = arr.astype(np.intp, copy=True) + assert not np.may_share_memory(arr, res_true) + res_always = arr.astype(np.intp, copy=np._CopyMode.ALWAYS) + assert not np.may_share_memory(arr, res_always) + + res_false = arr.astype(np.intp, copy=False) + # `res_false is arr` currently, but check `may_share_memory`. + assert np.may_share_memory(arr, res_false) + res_if_needed = arr.astype(np.intp, copy=np._CopyMode.IF_NEEDED) + # `res_if_needed is arr` currently, but check `may_share_memory`. + assert np.may_share_memory(arr, res_if_needed) + + res_never = arr.astype(np.intp, copy=np._CopyMode.NEVER) + assert np.may_share_memory(arr, res_never) + + # Simple tests for when a copy is necessary: + res_false = arr.astype(np.float64, copy=False) + assert_array_equal(res_false, arr) + res_if_needed = arr.astype(np.float64, + copy=np._CopyMode.IF_NEEDED) + assert_array_equal(res_if_needed, arr) + assert_raises(ValueError, arr.astype, np.float64, + copy=np._CopyMode.NEVER) diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py index b0d8ff503..cb4792090 100644 --- a/numpy/core/tests/test_casting_unittests.py +++ b/numpy/core/tests/test_casting_unittests.py @@ -9,7 +9,6 @@ than integration tests. import pytest import textwrap import enum -import itertools import random import numpy as np diff --git a/numpy/core/tests/test_custom_dtypes.py b/numpy/core/tests/test_custom_dtypes.py index 5eb82bc93..6bcc45d6b 100644 --- a/numpy/core/tests/test_custom_dtypes.py +++ b/numpy/core/tests/test_custom_dtypes.py @@ -101,18 +101,52 @@ class TestSFloat: expected_view = a.view(np.float64) * b.view(np.float64) assert_array_equal(res.view(np.float64), expected_view) + def test_possible_and_impossible_reduce(self): + # For reductions to work, the first and last operand must have the + # same dtype. For this parametric DType that is not necessarily true. + a = self._get_array(2.) + # Addition reductin works (as of writing requires to pass initial + # because setting a scaled-float from the default `0` fails). + res = np.add.reduce(a, initial=0.) + assert res == a.astype(np.float64).sum() + + # But each multiplication changes the factor, so a reduction is not + # possible (the relaxed version of the old refusal to handle any + # flexible dtype). + with pytest.raises(TypeError, + match="the resolved dtypes are not compatible"): + np.multiply.reduce(a) + + def test_basic_ufunc_at(self): + float_a = np.array([1., 2., 3.]) + b = self._get_array(2.) + + float_b = b.view(np.float64).copy() + np.multiply.at(float_b, [1, 1, 1], float_a) + np.multiply.at(b, [1, 1, 1], float_a) + + assert_array_equal(b.view(np.float64), float_b) + def test_basic_multiply_promotion(self): float_a = np.array([1., 2., 3.]) b = self._get_array(2.) res1 = float_a * b res2 = b * float_a + # one factor is one, so we get the factor of b: assert res1.dtype == res2.dtype == b.dtype expected_view = float_a * b.view(np.float64) assert_array_equal(res1.view(np.float64), expected_view) assert_array_equal(res2.view(np.float64), expected_view) + # Check that promotion works when `out` is used: + np.multiply(b, float_a, out=res2) + with pytest.raises(TypeError): + # The promoter accepts this (maybe it should not), but the SFloat + # result cannot be cast to integer: + np.multiply(b, float_a, out=np.arange(3)) + def test_basic_addition(self): a = self._get_array(2.) b = self._get_array(4.) @@ -145,3 +179,23 @@ class TestSFloat: # Check that casting the output fails also (done by the ufunc here) with pytest.raises(TypeError): np.add(a, a, out=c, casting="safe") + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_casts_to_bool(self, ufunc): + a = self._get_array(2.) + a[0] = 0. # make sure first element is considered False. + + float_equiv = a.astype(float) + expected = ufunc(float_equiv, float_equiv) + res = ufunc(a, a) + assert_array_equal(res, expected) + + # also check that the same works for reductions: + expected = ufunc.reduce(float_equiv) + res = ufunc.reduce(a) + assert_array_equal(res, expected) + + # The output casting does not match the bool, bool -> bool loop: + with pytest.raises(TypeError): + ufunc(a, a, out=np.empty(a.shape, dtype=int), casting="equiv") diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 69eba7ba0..b95d669a8 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -2029,6 +2029,21 @@ class TestDateTime: assert_equal(np.maximum.reduce(a), np.timedelta64(7, 's')) + def test_datetime_no_subtract_reducelike(self): + # subtracting two datetime64 works, but we cannot reduce it, since + # the result of that subtraction will have a different dtype. + arr = np.array(["2021-12-02", "2019-05-12"], dtype="M8[ms]") + msg = r"the resolved dtypes are not compatible with subtract\." + + with pytest.raises(TypeError, match=msg + "reduce"): + np.subtract.reduce(arr) + + with pytest.raises(TypeError, match=msg + "accumulate"): + np.subtract.accumulate(arr) + + with pytest.raises(TypeError, match=msg + "reduceat"): + np.subtract.reduceat(arr, [0]) + def test_datetime_busday_offset(self): # First Monday in June assert_equal( diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 898ff8075..e0b66defc 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -13,7 +13,8 @@ import sys import numpy as np from numpy.testing import ( - assert_raises, assert_warns, assert_, assert_array_equal, SkipTest, KnownFailureException + assert_raises, assert_warns, assert_, assert_array_equal, SkipTest, + KnownFailureException, break_cycles, ) from numpy.core._multiarray_tests import fromstring_null_term_c_api @@ -1215,3 +1216,57 @@ class TestPartitionBoolIndex(_DeprecationTestCase): def test_not_deprecated(self, func): self.assert_not_deprecated(lambda: func(1)) self.assert_not_deprecated(lambda: func([0, 1])) + + +class TestMachAr(_DeprecationTestCase): + # Deprecated 2021-10-19, NumPy 1.22 + warning_cls = DeprecationWarning + + def test_deprecated(self): + self.assert_deprecated(lambda: np.MachAr) + + def test_deprecated_module(self): + self.assert_deprecated(lambda: getattr(np.core, "machar")) + + def test_deprecated_attr(self): + finfo = np.finfo(float) + self.assert_deprecated(lambda: getattr(finfo, "machar")) + + +class TestQuantileInterpolationDeprecation(_DeprecationTestCase): + # Deprecated 2021-11-08, NumPy 1.22 + @pytest.mark.parametrize("func", + [np.percentile, np.quantile, np.nanpercentile, np.nanquantile]) + def test_deprecated(self, func): + self.assert_deprecated( + lambda: func([0., 1.], 0., interpolation="linear")) + self.assert_deprecated( + lambda: func([0., 1.], 0., interpolation="nearest")) + + @pytest.mark.parametrize("func", + [np.percentile, np.quantile, np.nanpercentile, np.nanquantile]) + def test_both_passed(self, func): + with warnings.catch_warnings(): + # catch the DeprecationWarning so that it does not raise: + warnings.simplefilter("always", DeprecationWarning) + with pytest.raises(TypeError): + func([0., 1.], 0., interpolation="nearest", method="nearest") + + +class TestMemEventHook(_DeprecationTestCase): + # Deprecated 2021-11-18, NumPy 1.23 + def test_mem_seteventhook(self): + # The actual tests are within the C code in + # multiarray/_multiarray_tests.c.src + import numpy.core._multiarray_tests as ma_tests + with pytest.warns(DeprecationWarning, + match='PyDataMem_SetEventHook is deprecated'): + ma_tests.test_pydatamem_seteventhook_start() + # force an allocation and free of a numpy array + # needs to be larger then limit of small memory cacher in ctors.c + a = np.zeros(1000) + del a + break_cycles() + with pytest.warns(DeprecationWarning, + match='PyDataMem_SetEventHook is deprecated'): + ma_tests.test_pydatamem_seteventhook_end() diff --git a/numpy/core/tests/test_dlpack.py b/numpy/core/tests/test_dlpack.py new file mode 100644 index 000000000..f848b2008 --- /dev/null +++ b/numpy/core/tests/test_dlpack.py @@ -0,0 +1,109 @@ +import sys +import pytest + +import numpy as np +from numpy.testing import assert_array_equal, IS_PYPY + + +class TestDLPack: + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_dunder_dlpack_refcount(self): + x = np.arange(5) + y = x.__dlpack__() + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + def test_dunder_dlpack_stream(self): + x = np.arange(5) + x.__dlpack__(stream=None) + + with pytest.raises(RuntimeError): + x.__dlpack__(stream=1) + + def test_strides_not_multiple_of_itemsize(self): + dt = np.dtype([('int', np.int32), ('char', np.int8)]) + y = np.zeros((5,), dtype=dt) + z = y['int'] + + with pytest.raises(RuntimeError): + np._from_dlpack(z) + + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_from_dlpack_refcount(self): + x = np.arange(5) + y = np._from_dlpack(x) + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + @pytest.mark.parametrize("dtype", [ + np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64, + np.float16, np.float32, np.float64, + np.complex64, np.complex128 + ]) + def test_dtype_passthrough(self, dtype): + x = np.arange(5, dtype=dtype) + y = np._from_dlpack(x) + + assert y.dtype == x.dtype + assert_array_equal(x, y) + + def test_invalid_dtype(self): + x = np.asarray(np.datetime64('2021-05-27')) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_invalid_byte_swapping(self): + dt = np.dtype('=i8').newbyteorder() + x = np.arange(5, dtype=dt) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_non_contiguous(self): + x = np.arange(25).reshape((5, 5)) + + y1 = x[0] + assert_array_equal(y1, np._from_dlpack(y1)) + + y2 = x[:, 0] + assert_array_equal(y2, np._from_dlpack(y2)) + + y3 = x[1, :] + assert_array_equal(y3, np._from_dlpack(y3)) + + y4 = x[1] + assert_array_equal(y4, np._from_dlpack(y4)) + + y5 = np.diagonal(x).copy() + assert_array_equal(y5, np._from_dlpack(y5)) + + @pytest.mark.parametrize("ndim", range(33)) + def test_higher_dims(self, ndim): + shape = (1,) * ndim + x = np.zeros(shape, dtype=np.float64) + + assert shape == np._from_dlpack(x).shape + + def test_dlpack_device(self): + x = np.arange(5) + assert x.__dlpack_device__() == (1, 0) + assert np._from_dlpack(x).__dlpack_device__() == (1, 0) + + def dlpack_deleter_exception(self): + x = np.arange(5) + _ = x.__dlpack__() + raise RuntimeError + + def test_dlpack_destructor_exception(self): + with pytest.raises(RuntimeError): + self.dlpack_deleter_exception() + + def test_readonly(self): + x = np.arange(5) + x.flags.writeable = False + with pytest.raises(TypeError): + x.__dlpack__() diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 61dce2494..e49604e4d 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -3,7 +3,6 @@ import operator import pytest import ctypes import gc -import warnings import types from typing import Any @@ -628,6 +627,12 @@ class TestSubarray: t2 = np.dtype('2i4', align=True) assert_equal(t1.alignment, t2.alignment) + def test_aligned_empty(self): + # Mainly regression test for gh-19696: construction failed completely + dt = np.dtype([], align=True) + assert dt == np.dtype([]) + dt = np.dtype({"names": [], "formats": [], "itemsize": 0}, align=True) + assert dt == np.dtype([]) def iter_struct_object_dtypes(): """ @@ -724,26 +729,30 @@ class TestStructuredObjectRefcounting: def test_structured_object_indexing(self, shape, index, items_changed, dt, pat, count, singleton): """Structured object reference counting for advanced indexing.""" - zero = 0 - one = 1 + # Use two small negative values (should be singletons, but less likely + # to run into race-conditions). This failed in some threaded envs + # When using 0 and 1. If it fails again, should remove all explicit + # checks, and rely on `pytest-leaks` reference count checker only. + val0 = -4 + val1 = -5 - arr = np.zeros(shape, dt) + arr = np.full(shape, val0, dt) gc.collect() - before_zero = sys.getrefcount(zero) - before_one = sys.getrefcount(one) + before_val0 = sys.getrefcount(val0) + before_val1 = sys.getrefcount(val1) # Test item getting: part = arr[index] - after_zero = sys.getrefcount(zero) - assert after_zero - before_zero == count * items_changed + after_val0 = sys.getrefcount(val0) + assert after_val0 - before_val0 == count * items_changed del part # Test item setting: - arr[index] = one + arr[index] = val1 gc.collect() - after_zero = sys.getrefcount(zero) - after_one = sys.getrefcount(one) - assert before_zero - after_zero == count * items_changed - assert after_one - before_one == count * items_changed + after_val0 = sys.getrefcount(val0) + after_val1 = sys.getrefcount(val1) + assert before_val0 - after_val0 == count * items_changed + assert after_val1 - before_val1 == count * items_changed @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'], iter_struct_object_dtypes()) diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py index 78c5e527b..172311624 100644 --- a/numpy/core/tests/test_einsum.py +++ b/numpy/core/tests/test_einsum.py @@ -1,5 +1,7 @@ import itertools +import pytest + import numpy as np from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_almost_equal, @@ -744,6 +746,52 @@ class TestEinsum: np.einsum('ij,jk->ik', x, x, out=out) assert_array_equal(out.base, correct_base) + @pytest.mark.parametrize("dtype", + np.typecodes["AllFloat"] + np.typecodes["AllInteger"]) + def test_different_paths(self, dtype): + # Test originally added to cover broken float16 path: gh-20305 + # Likely most are covered elsewhere, at least partially. + dtype = np.dtype(dtype) + # Simple test, designed to excersize most specialized code paths, + # note the +0.5 for floats. This makes sure we use a float value + # where the results must be exact. + arr = (np.arange(7) + 0.5).astype(dtype) + scalar = np.array(2, dtype=dtype) + + # contig -> scalar: + res = np.einsum('i->', arr) + assert res == arr.sum() + # contig, contig -> contig: + res = np.einsum('i,i->i', arr, arr) + assert_array_equal(res, arr * arr) + # noncontig, noncontig -> contig: + res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2]) + assert_array_equal(res, arr * arr) + # contig + contig -> scalar + assert np.einsum('i,i->', arr, arr) == (arr * arr).sum() + # contig + scalar -> contig (with out) + out = np.ones(7, dtype=dtype) + res = np.einsum('i,->i', arr, dtype.type(2), out=out) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> contig (with out) + res = np.einsum(',i->i', scalar, arr) + assert_array_equal(res, arr * dtype.type(2)) + # scalar + contig -> scalar + res = np.einsum(',i->', scalar, arr) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + scalar -> scalar + res = np.einsum('i,->', arr, scalar) + # Use einsum to compare to not have difference due to sum round-offs: + assert res == np.einsum('i->', scalar * arr) + # contig + contig + contig -> scalar + arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype) + res = np.einsum('i,i,i->', arr, arr, arr) + assert_array_equal(res, (arr * arr * arr).sum()) + # four arrays: + res = np.einsum('i,i,i,i->', arr, arr, arr, arr) + assert_array_equal(res, (arr * arr * arr * arr).sum()) + def test_small_boolean_arrays(self): # See gh-5946. # Use array of True embedded in False. diff --git a/numpy/core/tests/test_getlimits.py b/numpy/core/tests/test_getlimits.py index de7b3e769..c5148db2c 100644 --- a/numpy/core/tests/test_getlimits.py +++ b/numpy/core/tests/test_getlimits.py @@ -46,7 +46,7 @@ class TestFinfo: [np.float16, np.float32, np.float64, np.complex64, np.complex128])) for dt1, dt2 in dts: - for attr in ('bits', 'eps', 'epsneg', 'iexp', 'machar', 'machep', + for attr in ('bits', 'eps', 'epsneg', 'iexp', 'machep', 'max', 'maxexp', 'min', 'minexp', 'negep', 'nexp', 'nmant', 'precision', 'resolution', 'tiny', 'smallest_normal', 'smallest_subnormal'): diff --git a/numpy/core/tests/test_machar.py b/numpy/core/tests/test_machar.py index 673f309f1..3a66ec51f 100644 --- a/numpy/core/tests/test_machar.py +++ b/numpy/core/tests/test_machar.py @@ -3,7 +3,7 @@ Test machar. Given recent changes to hardcode type data, we might want to get rid of both MachAr and this test at some point. """ -from numpy.core.machar import MachAr +from numpy.core._machar import MachAr import numpy.core.numerictypes as ntypes from numpy import errstate, array diff --git a/numpy/core/tests/test_mem_policy.py b/numpy/core/tests/test_mem_policy.py new file mode 100644 index 000000000..3dae36d5a --- /dev/null +++ b/numpy/core/tests/test_mem_policy.py @@ -0,0 +1,423 @@ +import asyncio +import gc +import os +import pytest +import numpy as np +import threading +import warnings +from numpy.testing import extbuild, assert_warns +import sys + + +@pytest.fixture +def get_module(tmp_path): + """ Add a memory policy that returns a false pointer 64 bytes into the + actual allocation, and fill the prefix with some text. Then check at each + memory manipulation that the prefix exists, to make sure all alloc/realloc/ + free/calloc go via the functions here. + """ + if sys.platform.startswith('cygwin'): + pytest.skip('link fails on cygwin') + functions = [ + ("get_default_policy", "METH_NOARGS", """ + Py_INCREF(PyDataMem_DefaultHandler); + return PyDataMem_DefaultHandler; + """), + ("set_secret_data_policy", "METH_NOARGS", """ + PyObject *secret_data = + PyCapsule_New(&secret_data_handler, "mem_handler", NULL); + if (secret_data == NULL) { + return NULL; + } + PyObject *old = PyDataMem_SetHandler(secret_data); + Py_DECREF(secret_data); + return old; + """), + ("set_old_policy", "METH_O", """ + PyObject *old; + if (args != NULL && PyCapsule_CheckExact(args)) { + old = PyDataMem_SetHandler(args); + } + else { + old = PyDataMem_SetHandler(NULL); + } + return old; + """), + ("get_array", "METH_NOARGS", """ + char *buf = (char *)malloc(20); + npy_intp dims[1]; + dims[0] = 20; + PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8); + return PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, NULL, + buf, NPY_ARRAY_WRITEABLE, NULL); + """), + ("set_own", "METH_O", """ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_ValueError, + "need an ndarray"); + return NULL; + } + PyArray_ENABLEFLAGS((PyArrayObject*)args, NPY_ARRAY_OWNDATA); + // Maybe try this too? + // PyArray_BASE(PyArrayObject *)args) = NULL; + Py_RETURN_NONE; + """), + ("get_array_with_base", "METH_NOARGS", """ + char *buf = (char *)malloc(20); + npy_intp dims[1]; + dims[0] = 20; + PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8); + PyObject *arr = PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, + NULL, buf, + NPY_ARRAY_WRITEABLE, NULL); + if (arr == NULL) return NULL; + PyObject *obj = PyCapsule_New(buf, "buf capsule", + (PyCapsule_Destructor)&warn_on_free); + if (obj == NULL) { + Py_DECREF(arr); + return NULL; + } + if (PyArray_SetBaseObject((PyArrayObject *)arr, obj) < 0) { + Py_DECREF(arr); + Py_DECREF(obj); + return NULL; + } + return arr; + + """), + ] + prologue = ''' + #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION + #include <numpy/arrayobject.h> + /* + * This struct allows the dynamic configuration of the allocator funcs + * of the `secret_data_allocator`. It is provided here for + * demonstration purposes, as a valid `ctx` use-case scenario. + */ + typedef struct { + void *(*malloc)(size_t); + void *(*calloc)(size_t, size_t); + void *(*realloc)(void *, size_t); + void (*free)(void *); + } SecretDataAllocatorFuncs; + + NPY_NO_EXPORT void * + shift_alloc(void *ctx, size_t sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + char *real = (char *)funcs->malloc(sz + 64); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated %ld", (unsigned long)sz); + return (void *)(real + 64); + } + NPY_NO_EXPORT void * + shift_zero(void *ctx, size_t sz, size_t cnt) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + char *real = (char *)funcs->calloc(sz + 64, cnt); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated %ld via zero", + (unsigned long)sz); + return (void *)(real + 64); + } + NPY_NO_EXPORT void + shift_free(void *ctx, void * p, npy_uintp sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + if (p == NULL) { + return ; + } + char *real = (char *)p - 64; + if (strncmp(real, "originally allocated", 20) != 0) { + fprintf(stdout, "uh-oh, unmatched shift_free, " + "no appropriate prefix\\n"); + /* Make C runtime crash by calling free on the wrong address */ + funcs->free((char *)p + 10); + /* funcs->free(real); */ + } + else { + npy_uintp i = (npy_uintp)atoi(real +20); + if (i != sz) { + fprintf(stderr, "uh-oh, unmatched shift_free" + "(ptr, %ld) but allocated %ld\\n", sz, i); + /* This happens in some places, only print */ + funcs->free(real); + } + else { + funcs->free(real); + } + } + } + NPY_NO_EXPORT void * + shift_realloc(void *ctx, void * p, npy_uintp sz) { + SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx; + if (p != NULL) { + char *real = (char *)p - 64; + if (strncmp(real, "originally allocated", 20) != 0) { + fprintf(stdout, "uh-oh, unmatched shift_realloc\\n"); + return realloc(p, sz); + } + return (void *)((char *)funcs->realloc(real, sz + 64) + 64); + } + else { + char *real = (char *)funcs->realloc(p, sz + 64); + if (real == NULL) { + return NULL; + } + snprintf(real, 64, "originally allocated " + "%ld via realloc", (unsigned long)sz); + return (void *)(real + 64); + } + } + /* As an example, we use the standard {m|c|re}alloc/free funcs. */ + static SecretDataAllocatorFuncs secret_data_handler_ctx = { + malloc, + calloc, + realloc, + free + }; + static PyDataMem_Handler secret_data_handler = { + "secret_data_allocator", + 1, + { + &secret_data_handler_ctx, /* ctx */ + shift_alloc, /* malloc */ + shift_zero, /* calloc */ + shift_realloc, /* realloc */ + shift_free /* free */ + } + }; + void warn_on_free(void *capsule) { + PyErr_WarnEx(PyExc_UserWarning, "in warn_on_free", 1); + void * obj = PyCapsule_GetPointer(capsule, + PyCapsule_GetName(capsule)); + free(obj); + }; + ''' + more_init = "import_array();" + try: + import mem_policy + return mem_policy + except ImportError: + pass + # if it does not exist, build and load it + return extbuild.build_and_import_extension('mem_policy', + functions, + prologue=prologue, + include_dirs=[np.get_include()], + build_dir=tmp_path, + more_init=more_init) + + +def test_set_policy(get_module): + + get_handler_name = np.core.multiarray.get_handler_name + get_handler_version = np.core.multiarray.get_handler_version + orig_policy_name = get_handler_name() + + a = np.arange(10).reshape((2, 5)) # a doesn't own its own data + assert get_handler_name(a) is None + assert get_handler_version(a) is None + assert get_handler_name(a.base) == orig_policy_name + assert get_handler_version(a.base) == 1 + + orig_policy = get_module.set_secret_data_policy() + + b = np.arange(10).reshape((2, 5)) # b doesn't own its own data + assert get_handler_name(b) is None + assert get_handler_version(b) is None + assert get_handler_name(b.base) == 'secret_data_allocator' + assert get_handler_version(b.base) == 1 + + if orig_policy_name == 'default_allocator': + get_module.set_old_policy(None) # tests PyDataMem_SetHandler(NULL) + assert get_handler_name() == 'default_allocator' + else: + get_module.set_old_policy(orig_policy) + assert get_handler_name() == orig_policy_name + + +def test_default_policy_singleton(get_module): + get_handler_name = np.core.multiarray.get_handler_name + + # set the policy to default + orig_policy = get_module.set_old_policy(None) + + assert get_handler_name() == 'default_allocator' + + # re-set the policy to default + def_policy_1 = get_module.set_old_policy(None) + + assert get_handler_name() == 'default_allocator' + + # set the policy to original + def_policy_2 = get_module.set_old_policy(orig_policy) + + # since default policy is a singleton, + # these should be the same object + assert def_policy_1 is def_policy_2 is get_module.get_default_policy() + + +def test_policy_propagation(get_module): + # The memory policy goes hand-in-hand with flags.owndata + + class MyArr(np.ndarray): + pass + + get_handler_name = np.core.multiarray.get_handler_name + orig_policy_name = get_handler_name() + a = np.arange(10).view(MyArr).reshape((2, 5)) + assert get_handler_name(a) is None + assert a.flags.owndata is False + + assert get_handler_name(a.base) is None + assert a.base.flags.owndata is False + + assert get_handler_name(a.base.base) == orig_policy_name + assert a.base.base.flags.owndata is True + + +async def concurrent_context1(get_module, orig_policy_name, event): + if orig_policy_name == 'default_allocator': + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + else: + get_module.set_old_policy(None) + assert np.core.multiarray.get_handler_name() == 'default_allocator' + event.set() + + +async def concurrent_context2(get_module, orig_policy_name, event): + await event.wait() + # the policy is not affected by changes in parallel contexts + assert np.core.multiarray.get_handler_name() == orig_policy_name + # change policy in the child context + if orig_policy_name == 'default_allocator': + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + else: + get_module.set_old_policy(None) + assert np.core.multiarray.get_handler_name() == 'default_allocator' + + +async def async_test_context_locality(get_module): + orig_policy_name = np.core.multiarray.get_handler_name() + + event = asyncio.Event() + # the child contexts inherit the parent policy + concurrent_task1 = asyncio.create_task( + concurrent_context1(get_module, orig_policy_name, event)) + concurrent_task2 = asyncio.create_task( + concurrent_context2(get_module, orig_policy_name, event)) + await concurrent_task1 + await concurrent_task2 + + # the parent context is not affected by child policy changes + assert np.core.multiarray.get_handler_name() == orig_policy_name + + +def test_context_locality(get_module): + if (sys.implementation.name == 'pypy' + and sys.pypy_version_info[:3] < (7, 3, 6)): + pytest.skip('no context-locality support in PyPy < 7.3.6') + asyncio.run(async_test_context_locality(get_module)) + + +def concurrent_thread1(get_module, event): + get_module.set_secret_data_policy() + assert np.core.multiarray.get_handler_name() == 'secret_data_allocator' + event.set() + + +def concurrent_thread2(get_module, event): + event.wait() + # the policy is not affected by changes in parallel threads + assert np.core.multiarray.get_handler_name() == 'default_allocator' + # change policy in the child thread + get_module.set_secret_data_policy() + + +def test_thread_locality(get_module): + orig_policy_name = np.core.multiarray.get_handler_name() + + event = threading.Event() + # the child threads do not inherit the parent policy + concurrent_task1 = threading.Thread(target=concurrent_thread1, + args=(get_module, event)) + concurrent_task2 = threading.Thread(target=concurrent_thread2, + args=(get_module, event)) + concurrent_task1.start() + concurrent_task2.start() + concurrent_task1.join() + concurrent_task2.join() + + # the parent thread is not affected by child policy changes + assert np.core.multiarray.get_handler_name() == orig_policy_name + + +@pytest.mark.slow +def test_new_policy(get_module): + a = np.arange(10) + orig_policy_name = np.core.multiarray.get_handler_name(a) + + orig_policy = get_module.set_secret_data_policy() + + b = np.arange(10) + assert np.core.multiarray.get_handler_name(b) == 'secret_data_allocator' + + # test array manipulation. This is slow + if orig_policy_name == 'default_allocator': + # when the np.core.test tests recurse into this test, the + # policy will be set so this "if" will be false, preventing + # infinite recursion + # + # if needed, debug this by + # - running tests with -- -s (to not capture stdout/stderr + # - setting extra_argv=['-vv'] here + assert np.core.test('full', verbose=2, extra_argv=['-vv']) + # also try the ma tests, the pickling test is quite tricky + assert np.ma.test('full', verbose=2, extra_argv=['-vv']) + + get_module.set_old_policy(orig_policy) + + c = np.arange(10) + assert np.core.multiarray.get_handler_name(c) == orig_policy_name + +@pytest.mark.xfail(sys.implementation.name == "pypy", + reason=("bad interaction between getenv and " + "os.environ inside pytest")) +@pytest.mark.parametrize("policy", ["0", "1", None]) +def test_switch_owner(get_module, policy): + a = get_module.get_array() + assert np.core.multiarray.get_handler_name(a) is None + get_module.set_own(a) + oldval = os.environ.get('NUMPY_WARN_IF_NO_MEM_POLICY', None) + if policy is None: + if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ: + os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY') + else: + os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = policy + try: + # The policy should be NULL, so we have to assume we can call + # "free". A warning is given if the policy == "1" + if policy == "1": + with assert_warns(RuntimeWarning) as w: + del a + gc.collect() + else: + del a + gc.collect() + + finally: + if oldval is None: + if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ: + os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY') + else: + os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = oldval + +def test_owner_is_base(get_module): + a = get_module.get_array_with_base() + with pytest.warns(UserWarning, match='warn_on_free'): + del a + gc.collect() diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 0da36bbea..23182470b 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -7814,6 +7814,216 @@ class TestNewBufferProtocol: _multiarray_tests.corrupt_or_fix_bufferinfo(obj) +class TestArrayCreationCopyArgument(object): + + class RaiseOnBool: + + def __bool__(self): + raise ValueError + + true_vals = [True, np._CopyMode.ALWAYS, np.True_] + false_vals = [False, np._CopyMode.IF_NEEDED, np.False_] + + def test_scalars(self): + # Test both numpy and python scalars + for dtype in np.typecodes["All"]: + arr = np.zeros((), dtype=dtype) + scalar = arr[()] + pyscalar = arr.item(0) + + # Test never-copy raises error: + assert_raises(ValueError, np.array, scalar, + copy=np._CopyMode.NEVER) + assert_raises(ValueError, np.array, pyscalar, + copy=np._CopyMode.NEVER) + assert_raises(ValueError, np.array, pyscalar, + copy=self.RaiseOnBool()) + assert_raises(ValueError, _multiarray_tests.npy_ensurenocopy, + [1]) + # Casting with a dtype (to unsigned integers) can be special: + with pytest.raises(ValueError): + np.array(pyscalar, dtype=np.int64, copy=np._CopyMode.NEVER) + + def test_compatible_cast(self): + + # Some types are compatible even though they are different, no + # copy is necessary for them. This is mostly true for some integers + def int_types(byteswap=False): + int_types = (np.typecodes["Integer"] + + np.typecodes["UnsignedInteger"]) + for int_type in int_types: + yield np.dtype(int_type) + if byteswap: + yield np.dtype(int_type).newbyteorder() + + for int1 in int_types(): + for int2 in int_types(True): + arr = np.arange(10, dtype=int1) + + for copy in self.true_vals: + res = np.array(arr, copy=copy, dtype=int2) + assert res is not arr and res.flags.owndata + assert_array_equal(res, arr) + + if int1 == int2: + # Casting is not necessary, base check is sufficient here + for copy in self.false_vals: + res = np.array(arr, copy=copy, dtype=int2) + assert res is arr or res.base is arr + + res = np.array(arr, + copy=np._CopyMode.NEVER, + dtype=int2) + assert res is arr or res.base is arr + + else: + # Casting is necessary, assert copy works: + for copy in self.false_vals: + res = np.array(arr, copy=copy, dtype=int2) + assert res is not arr and res.flags.owndata + assert_array_equal(res, arr) + + assert_raises(ValueError, np.array, + arr, copy=np._CopyMode.NEVER, + dtype=int2) + assert_raises(ValueError, np.array, + arr, copy=None, + dtype=int2) + + def test_buffer_interface(self): + + # Buffer interface gives direct memory access (no copy) + arr = np.arange(10) + view = memoryview(arr) + + # Checking bases is a bit tricky since numpy creates another + # memoryview, so use may_share_memory. + for copy in self.true_vals: + res = np.array(view, copy=copy) + assert not np.may_share_memory(arr, res) + for copy in self.false_vals: + res = np.array(view, copy=copy) + assert np.may_share_memory(arr, res) + res = np.array(view, copy=np._CopyMode.NEVER) + assert np.may_share_memory(arr, res) + + def test_array_interfaces(self): + # Array interface gives direct memory access (much like a memoryview) + base_arr = np.arange(10) + + class ArrayLike: + __array_interface__ = base_arr.__array_interface__ + + arr = ArrayLike() + + for copy, val in [(True, None), (np._CopyMode.ALWAYS, None), + (False, arr), (np._CopyMode.IF_NEEDED, arr), + (np._CopyMode.NEVER, arr)]: + res = np.array(arr, copy=copy) + assert res.base is val + + def test___array__(self): + base_arr = np.arange(10) + + class ArrayLike: + def __array__(self): + # __array__ should return a copy, numpy cannot know this + # however. + return base_arr + + arr = ArrayLike() + + for copy in self.true_vals: + res = np.array(arr, copy=copy) + assert_array_equal(res, base_arr) + # An additional copy is currently forced by numpy in this case, + # you could argue, numpy does not trust the ArrayLike. This + # may be open for change: + assert res is not base_arr + + for copy in self.false_vals: + res = np.array(arr, copy=False) + assert_array_equal(res, base_arr) + assert res is base_arr # numpy trusts the ArrayLike + + with pytest.raises(ValueError): + np.array(arr, copy=np._CopyMode.NEVER) + + @pytest.mark.parametrize( + "arr", [np.ones(()), np.arange(81).reshape((9, 9))]) + @pytest.mark.parametrize("order1", ["C", "F", None]) + @pytest.mark.parametrize("order2", ["C", "F", "A", "K"]) + def test_order_mismatch(self, arr, order1, order2): + # The order is the main (python side) reason that can cause + # a never-copy to fail. + # Prepare C-order, F-order and non-contiguous arrays: + arr = arr.copy(order1) + if order1 == "C": + assert arr.flags.c_contiguous + elif order1 == "F": + assert arr.flags.f_contiguous + elif arr.ndim != 0: + # Make array non-contiguous + arr = arr[::2, ::2] + assert not arr.flags.forc + + # Whether a copy is necessary depends on the order of arr: + if order2 == "C": + no_copy_necessary = arr.flags.c_contiguous + elif order2 == "F": + no_copy_necessary = arr.flags.f_contiguous + else: + # Keeporder and Anyorder are OK with non-contiguous output. + # This is not consistent with the `astype` behaviour which + # enforces contiguity for "A". It is probably historic from when + # "K" did not exist. + no_copy_necessary = True + + # Test it for both the array and a memoryview + for view in [arr, memoryview(arr)]: + for copy in self.true_vals: + res = np.array(view, copy=copy, order=order2) + assert res is not arr and res.flags.owndata + assert_array_equal(arr, res) + + if no_copy_necessary: + for copy in self.false_vals: + res = np.array(view, copy=copy, order=order2) + # res.base.obj refers to the memoryview + if not IS_PYPY: + assert res is arr or res.base.obj is arr + + res = np.array(view, copy=np._CopyMode.NEVER, + order=order2) + if not IS_PYPY: + assert res is arr or res.base.obj is arr + else: + for copy in self.false_vals: + res = np.array(arr, copy=copy, order=order2) + assert_array_equal(arr, res) + assert_raises(ValueError, np.array, + view, copy=np._CopyMode.NEVER, + order=order2) + assert_raises(ValueError, np.array, + view, copy=None, + order=order2) + + def test_striding_not_ok(self): + arr = np.array([[1, 2, 4], [3, 4, 5]]) + assert_raises(ValueError, np.array, + arr.T, copy=np._CopyMode.NEVER, + order='C') + assert_raises(ValueError, np.array, + arr.T, copy=np._CopyMode.NEVER, + order='C', dtype=np.int64) + assert_raises(ValueError, np.array, + arr, copy=np._CopyMode.NEVER, + order='F') + assert_raises(ValueError, np.array, + arr, copy=np._CopyMode.NEVER, + order='F', dtype=np.int64) + + class TestArrayAttributeDeletion: def test_multiarray_writable_attributes_deletion(self): @@ -7977,18 +8187,6 @@ def test_scalar_element_deletion(): assert_raises(ValueError, a[0].__delitem__, 'x') -class TestMemEventHook: - def test_mem_seteventhook(self): - # The actual tests are within the C code in - # multiarray/_multiarray_tests.c.src - _multiarray_tests.test_pydatamem_seteventhook_start() - # force an allocation and free of a numpy array - # needs to be larger then limit of small memory cacher in ctors.c - a = np.zeros(1000) - del a - break_cycles() - _multiarray_tests.test_pydatamem_seteventhook_end() - class TestMapIter: def test_mapiter(self): # The actual tests are within the C code in diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index fbf6da0e1..ed775cac6 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -9,7 +9,7 @@ import numpy.core._multiarray_tests as _multiarray_tests from numpy import array, arange, nditer, all from numpy.testing import ( assert_, assert_equal, assert_array_equal, assert_raises, - HAS_REFCOUNT, suppress_warnings + HAS_REFCOUNT, suppress_warnings, break_cycles ) @@ -3128,6 +3128,8 @@ def test_warn_noclose(): assert len(sup.log) == 1 +@pytest.mark.skipif(sys.version_info[:2] == (3, 9) and sys.platform == "win32", + reason="Errors with Python 3.9 on Windows") @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") @pytest.mark.parametrize(["in_dtype", "buf_dtype"], [("i", "O"), ("O", "i"), # most simple cases @@ -3148,6 +3150,8 @@ def test_partial_iteration_cleanup(in_dtype, buf_dtype, steps): # Note that resetting does not free references del it + break_cycles() + break_cycles() assert count == sys.getrefcount(value) # Repeat the test with `iternext` @@ -3157,6 +3161,8 @@ def test_partial_iteration_cleanup(in_dtype, buf_dtype, steps): it.iternext() del it # should ensure cleanup + break_cycles() + break_cycles() assert count == sys.getrefcount(value) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index e36f76c53..ad9437911 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -16,7 +16,7 @@ from numpy.testing import ( ) from numpy.core._rational_tests import rational -from hypothesis import assume, given, strategies as st +from hypothesis import given, strategies as st from hypothesis.extra import numpy as hynp @@ -646,7 +646,7 @@ class TestFloatExceptions: if np.dtype(ftype).kind == 'f': # Get some extreme values for the type fi = np.finfo(ftype) - ft_tiny = fi.machar.tiny + ft_tiny = fi._machar.tiny ft_max = fi.max ft_eps = fi.eps underflow = 'underflow' @@ -655,7 +655,7 @@ class TestFloatExceptions: # 'c', complex, corresponding real dtype rtype = type(ftype(0).real) fi = np.finfo(rtype) - ft_tiny = ftype(fi.machar.tiny) + ft_tiny = ftype(fi._machar.tiny) ft_max = ftype(fi.max) ft_eps = ftype(fi.eps) # The complex types raise different exceptions @@ -932,25 +932,6 @@ class TestTypes: # Promote with object: assert_equal(promote_types('O', S+'30'), np.dtype('O')) - @pytest.mark.parametrize(["dtype1", "dtype2"], - [[np.dtype("V6"), np.dtype("V10")], - [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])], - [np.dtype("i8,i8"), np.dtype("i4,i4")], - ]) - def test_invalid_void_promotion(self, dtype1, dtype2): - # Mainly test structured void promotion, which currently allows - # byte-swapping, but nothing else: - with pytest.raises(TypeError): - np.promote_types(dtype1, dtype2) - - @pytest.mark.parametrize(["dtype1", "dtype2"], - [[np.dtype("V10"), np.dtype("V10")], - [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])], - [np.dtype("i8,i8"), np.dtype("i8,>i8")], - ]) - def test_valid_void_promotion(self, dtype1, dtype2): - assert np.promote_types(dtype1, dtype2) is dtype1 - @pytest.mark.parametrize("dtype", list(np.typecodes["All"]) + ["i,i", "S3", "S100", "U3", "U100", rational]) @@ -1503,6 +1484,18 @@ class TestNonzero: a = np.array([[False], [TrueThenFalse()]]) assert_raises(RuntimeError, np.nonzero, a) + def test_nonzero_sideffects_structured_void(self): + # Checks that structured void does not mutate alignment flag of + # original array. + arr = np.zeros(5, dtype="i1,i8,i8") # `ones` may short-circuit + assert arr.flags.aligned # structs are considered "aligned" + assert not arr["f2"].flags.aligned + # make sure that nonzero/count_nonzero do not flip the flag: + np.nonzero(arr) + assert arr.flags.aligned + np.count_nonzero(arr) + assert arr.flags.aligned + def test_nonzero_exception_safe(self): # gh-13930 diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py index 6077c8f75..eef4c1433 100644 --- a/numpy/core/tests/test_scalar_methods.py +++ b/numpy/core/tests/test_scalar_methods.py @@ -183,3 +183,21 @@ def test_class_getitem_38(cls: Type[np.number]) -> None: match = "Type subscription requires python >= 3.9" with pytest.raises(TypeError, match=match): cls[Any] + + +class TestBitCount: + # derived in part from the cpython test "test_bit_count" + + @pytest.mark.parametrize("itype", np.sctypes['int']+np.sctypes['uint']) + def test_small(self, itype): + for a in range(max(np.iinfo(itype).min, 0), 128): + msg = f"Smoke test for {itype}({a}).bit_count()" + assert itype(a).bit_count() == bin(a).count("1"), msg + + def test_bit_count(self): + for exp in [10, 17, 63]: + a = 2**exp + assert np.uint64(a).bit_count() == 1 + assert np.uint64(a - 1).bit_count() == exp + assert np.uint64(a ^ 63).bit_count() == 7 + assert np.uint64((a - 1) ^ 510).bit_count() == exp - 8 diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index becd65b11..90078a2ea 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -5,14 +5,14 @@ import itertools import operator import platform import pytest -from hypothesis import given, settings, Verbosity, assume +from hypothesis import given, settings, Verbosity from hypothesis.strategies import sampled_from import numpy as np from numpy.testing import ( assert_, assert_equal, assert_raises, assert_almost_equal, assert_array_equal, IS_PYPY, suppress_warnings, _gen_alignment_data, - assert_warns, assert_raises_regex, + assert_warns, ) types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc, diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index f0c60953b..12a67c44d 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -329,7 +329,38 @@ class _SIMD_FP(_Test_Utility): data_square = [x*x for x in data] square = self.square(vdata) assert square == data_square - + + @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil), + ("self.trunc", math.trunc)]) + def test_rounding(self, intrin, func): + """ + Test intrinsics: + npyv_ceil_##SFX + npyv_trunc_##SFX + """ + intrin_name = intrin + intrin = eval(intrin) + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + # special cases + round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf)) + for case, desired in round_cases: + data_round = [desired]*self.nlanes + _round = intrin(self.setall(case)) + assert _round == pytest.approx(data_round, nan_ok=True) + for x in range(0, 2**20, 256**2): + for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15): + data = [x*w+a for a in range(self.nlanes)] + vdata = self.load(data) + data_round = [func(x) for x in data] + _round = intrin(vdata) + assert _round == data_round + # signed zero + if "ceil" in intrin_name or "trunc" in intrin_name: + for w in (-0.25, -0.30, -0.45): + _round = self._to_unsigned(intrin(self.setall(w))) + data_round = self._to_unsigned(self.setall(-0.0)) + assert _round == data_round + def test_max(self): """ Test intrinsics: @@ -818,6 +849,7 @@ class _SIMD_ALL(_Test_Utility): if self._is_fp(): return + int_min = self._int_min() def trunc_div(a, d): """ Divide towards zero works with large integers > 2^53, @@ -830,57 +862,31 @@ class _SIMD_ALL(_Test_Utility): return a // d return (a + sign_d - sign_a) // d + 1 - int_min = self._int_min() if self._is_signed() else 1 - int_max = self._int_max() - rdata = ( - 0, 1, self.nlanes, int_max-self.nlanes, - int_min, int_min//2 + 1 - ) - divisors = (1, 2, 9, 13, self.nlanes, int_min, int_max, int_max//2) - - for x, d in itertools.product(rdata, divisors): - data = self._data(x) - vdata = self.load(data) - data_divc = [trunc_div(a, d) for a in data] - divisor = self.divisor(d) - divc = self.divc(vdata, divisor) - assert divc == data_divc - - if not self._is_signed(): - return - - safe_neg = lambda x: -x-1 if -x > int_max else -x - # test round division for signed integers - for x, d in itertools.product(rdata, divisors): - d_neg = safe_neg(d) - data = self._data(x) - data_neg = [safe_neg(a) for a in data] - vdata = self.load(data) - vdata_neg = self.load(data_neg) - divisor = self.divisor(d) - divisor_neg = self.divisor(d_neg) - - # round towards zero - data_divc = [trunc_div(a, d_neg) for a in data] - divc = self.divc(vdata, divisor_neg) - assert divc == data_divc - data_divc = [trunc_div(a, d) for a in data_neg] - divc = self.divc(vdata_neg, divisor) + data = [1, -int_min] # to test overflow + data += range(0, 2**8, 2**5) + data += range(0, 2**8, 2**5-1) + bsize = self._scalar_size() + if bsize > 8: + data += range(2**8, 2**16, 2**13) + data += range(2**8, 2**16, 2**13-1) + if bsize > 16: + data += range(2**16, 2**32, 2**29) + data += range(2**16, 2**32, 2**29-1) + if bsize > 32: + data += range(2**32, 2**64, 2**61) + data += range(2**32, 2**64, 2**61-1) + # negate + data += [-x for x in data] + for dividend, divisor in itertools.product(data, data): + divisor = self.setall(divisor)[0] # cast + if divisor == 0: + continue + dividend = self.load(self._data(dividend)) + data_divc = [trunc_div(a, divisor) for a in dividend] + divisor_parms = self.divisor(divisor) + divc = self.divc(dividend, divisor_parms) assert divc == data_divc - # test truncate sign if the dividend is zero - vzero = self.zero() - for d in (-1, -10, -100, int_min//2, int_min): - divisor = self.divisor(d) - divc = self.divc(vzero, divisor) - assert divc == vzero - - # test overflow - vmin = self.setall(int_min) - divisor = self.divisor(-1) - divc = self.divc(vmin, divisor) - assert divc == vmin - def test_arithmetic_reduce_sum(self): """ Test reduce sum intrinsics: diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 30929ce91..ef0bac957 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -1362,6 +1362,14 @@ class TestUfunc: np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object), ) + def test_object_array_accumulate_failure(self): + # Typical accumulation on object works as expected: + res = np.add.accumulate(np.array([1, 0, 2], dtype=object)) + assert_array_equal(res, np.array([1, 1, 3], dtype=object)) + # But errors are propagated from the inner-loop if they occur: + with pytest.raises(TypeError): + np.add.accumulate([1, None, 2]) + def test_object_array_reduceat_inplace(self): # Checks that in-place reduceats work, see also gh-7465 arr = np.empty(4, dtype=object) @@ -1381,6 +1389,15 @@ class TestUfunc: np.add.reduceat(arr, np.arange(4), out=arr, axis=-1) assert_array_equal(arr, out) + def test_object_array_reduceat_failure(self): + # Reduceat works as expected when no invalid operation occurs (None is + # not involved in an operation here) + res = np.add.reduceat(np.array([1, None, 2], dtype=object), [1, 2]) + assert_array_equal(res, np.array([None, 2], dtype=object)) + # But errors when None would be involved in an operation: + with pytest.raises(TypeError): + np.add.reduceat([1, None, 2], [0, 2]) + def test_zerosize_reduction(self): # Test with default dtype and object dtype for a in [[], np.array([], dtype=object)]: @@ -2098,6 +2115,25 @@ class TestUfunc: with pytest.raises(TypeError): ufunc(a, a, signature=signature) + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_support_anything(self, ufunc): + # The logical ufuncs support even input that can't be promoted: + a = np.array('1') + c = np.array([1., 2.]) + assert_array_equal(ufunc(a, c), ufunc([True, True], True)) + assert ufunc.reduce(a) == True + + @pytest.mark.parametrize("ufunc", + [np.logical_and, np.logical_or, np.logical_xor]) + def test_logical_ufuncs_out_cast_check(self, ufunc): + a = np.array('1') + c = np.array([1., 2.]) + out = a.copy() + with pytest.raises(TypeError): + # It would be safe, but not equiv casting: + ufunc(a, c, out=out, casting="equiv") + def test_reduce_noncontig_output(self): # Check that reduction deals with non-contiguous output arrays # appropriately. @@ -2119,6 +2155,22 @@ class TestUfunc: assert_equal(y_base[1,:], y_base_copy[1,:]) assert_equal(y_base[3,:], y_base_copy[3,:]) + @pytest.mark.parametrize("with_cast", [True, False]) + def test_reduceat_and_accumulate_out_shape_mismatch(self, with_cast): + # Should raise an error mentioning "shape" or "size" + arr = np.arange(5) + out = np.arange(3) # definitely wrong shape + if with_cast: + # If a cast is necessary on the output, we can be sure to use + # the generic NpyIter (non-fast) path. + out = out.astype(np.float64) + + with pytest.raises(ValueError, match="(shape|size)"): + np.add.reduceat(arr, [0, 3], out=out) + + with pytest.raises(ValueError, match="(shape|size)"): + np.add.accumulate(arr, out=out) + @pytest.mark.parametrize('out_shape', [(), (1,), (3,), (1, 1), (1, 3), (4, 3)]) @pytest.mark.parametrize('keepdims', [True, False]) @@ -2331,8 +2383,9 @@ def test_reduce_casterrors(offset): out = np.array(-1, dtype=np.intp) count = sys.getrefcount(value) - with pytest.raises(ValueError): - # This is an unsafe cast, but we currently always allow that: + with pytest.raises(ValueError, match="invalid literal"): + # This is an unsafe cast, but we currently always allow that. + # Note that the double loop is picked, but the cast fails. np.add.reduce(arr, dtype=np.intp, out=out) assert count == sys.getrefcount(value) # If an error occurred during casting, the operation is done at most until @@ -2340,3 +2393,20 @@ def test_reduce_casterrors(offset): # if the error happened immediately. # This does not define behaviour, the output is invalid and thus undefined assert out[()] < value * offset + + +@pytest.mark.parametrize("method", + [np.add.accumulate, np.add.reduce, + pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat"), + pytest.param(lambda x: np.log.at(x, [2]), id="at")]) +def test_ufunc_methods_floaterrors(method): + # adding inf and -inf (or log(-inf) creates an invalid float and warns + arr = np.array([np.inf, 0, -np.inf]) + with np.errstate(all="warn"): + with pytest.warns(RuntimeWarning, match="invalid value"): + method(arr) + + arr = np.array([np.inf, 0, -np.inf]) + with np.errstate(all="raise"): + with pytest.raises(FloatingPointError): + method(arr) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 8ff81ea51..fc7c592f0 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -15,7 +15,7 @@ from numpy.testing import ( assert_, assert_equal, assert_raises, assert_raises_regex, assert_array_equal, assert_almost_equal, assert_array_almost_equal, assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings, - _gen_alignment_data, assert_array_almost_equal_nulp, assert_warns + _gen_alignment_data, assert_array_almost_equal_nulp ) def get_glibc_version(): @@ -28,9 +28,7 @@ def get_glibc_version(): glibcver = get_glibc_version() -glibc_newerthan_2_17 = pytest.mark.xfail( - glibcver != '0.0' and glibcver < '2.17', - reason="Older glibc versions may not raise appropriate FP exceptions") +glibc_older_than = lambda x: (glibcver != '0.0' and glibcver < x) def on_powerpc(): """ True if we are running on a Power PC platform.""" @@ -50,14 +48,6 @@ def bad_arcsinh(): # The eps for float128 is 1-e33, so this is way bigger return abs((v1 / v2) - 1.0) > 1e-23 -if platform.machine() == 'aarch64' and bad_arcsinh(): - skip_longcomplex_msg = ('Trig functions of np.longcomplex values known to be ' - 'inaccurate on aarch64 for some compilation ' - 'configurations, should be fixed by building on a ' - 'platform using glibc>2.17') -else: - skip_longcomplex_msg = '' - class _FilterInvalids: def setup(self): @@ -1022,9 +1012,11 @@ class TestSpecialFloats: yf = np.array(y, dtype=dt) assert_equal(np.exp(yf), xf) - # Older version of glibc may not raise the correct FP exceptions # See: https://github.com/numpy/numpy/issues/19192 - @glibc_newerthan_2_17 + @pytest.mark.xfail( + glibc_older_than("2.17"), + reason="Older glibc versions may not raise appropriate FP exceptions" + ) def test_exp_exceptions(self): with np.errstate(over='raise'): assert_raises(FloatingPointError, np.exp, np.float32(100.)) @@ -1405,8 +1397,10 @@ class TestAVXFloat32Transcendental: M = np.int_(N/20) index = np.random.randint(low=0, high=N, size=M) x_f32 = np.float32(np.random.uniform(low=-100.,high=100.,size=N)) - # test coverage for elements > 117435.992f for which glibc is used - x_f32[index] = np.float32(10E+10*np.random.rand(M)) + if not glibc_older_than("2.17"): + # test coverage for elements > 117435.992f for which glibc is used + # this is known to be problematic on old glibc, so skip it there + x_f32[index] = np.float32(10E+10*np.random.rand(M)) x_f64 = np.float64(x_f32) assert_array_max_ulp(np.sin(x_f32), np.float32(np.sin(x_f64)), maxulp=2) assert_array_max_ulp(np.cos(x_f32), np.float32(np.cos(x_f64)), maxulp=2) @@ -3439,13 +3433,14 @@ class TestComplexFunctions: x_series = np.logspace(-20, -3.001, 200) x_basic = np.logspace(-2.999, 0, 10, endpoint=False) - if dtype is np.longcomplex: + if glibc_older_than("2.19") and dtype is np.longcomplex: + if (platform.machine() == 'aarch64' and bad_arcsinh()): + pytest.skip("Trig functions of np.longcomplex values known " + "to be inaccurate on aarch64 for some compilation " + "configurations.") # It's not guaranteed that the system-provided arc functions # are accurate down to a few epsilons. (Eg. on Linux 64-bit) # So, give more leeway for long complex tests here: - # Can use 2.1 for > Ubuntu LTS Trusty (2014), glibc = 2.19. - if skip_longcomplex_msg: - pytest.skip(skip_longcomplex_msg) check(x_series, 50.0*eps) else: check(x_series, 2.1*eps) @@ -3886,3 +3881,11 @@ def test_bad_legacy_ufunc_silent_errors(): with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): ncu_tests.always_error.at(arr, [0, 1, 2], arr) + + +@pytest.mark.parametrize('x1', [np.arange(3.0), [0.0, 1.0, 2.0]]) +def test_bad_legacy_gufunc_silent_errors(x1): + # Verify that an exception raised in a gufunc loop propagates correctly. + # The signature of always_error_gufunc is '(i),()->()'. + with pytest.raises(RuntimeError, match=r"How unexpected :\)!"): + ncu_tests.always_error_gufunc(x1, 0.0) diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py index a703c697a..32e2dca66 100644 --- a/numpy/core/tests/test_umath_accuracy.py +++ b/numpy/core/tests/test_umath_accuracy.py @@ -1,5 +1,4 @@ import numpy as np -import platform import os from os import path import sys |