summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2021-11-18 14:31:58 -0800
committerGitHub <noreply@github.com>2021-11-18 14:31:58 -0800
commit5e9ce0c0529e3085498ac892941a020a65c7369a (patch)
treea70d9e941549b4a51b493f1b170ef33ce0d5a217 /numpy/core
parent2ff7ab64d4e7d5928e96ca95b85350aa9caa2b63 (diff)
parent056abda14dab7fa8daf7a1ab44144aeb2250c216 (diff)
downloadnumpy-5e9ce0c0529e3085498ac892941a020a65c7369a.tar.gz
Merge branch 'numpy:main' into as_min_max
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/__init__.py18
-rw-r--r--numpy/core/_add_newdocs.py46
-rw-r--r--numpy/core/_add_newdocs_scalars.py19
-rw-r--r--numpy/core/_machar.py (renamed from numpy/core/machar.py)1
-rw-r--r--numpy/core/_ufunc_config.pyi13
-rw-r--r--numpy/core/code_generators/cversions.txt9
-rw-r--r--numpy/core/code_generators/genapi.py1
-rw-r--r--numpy/core/code_generators/generate_umath.py2
-rw-r--r--numpy/core/code_generators/numpy_api.py10
-rw-r--r--numpy/core/code_generators/ufunc_docstrings.py51
-rw-r--r--numpy/core/getlimits.py25
-rw-r--r--numpy/core/getlimits.pyi52
-rw-r--r--numpy/core/include/numpy/experimental_dtype_api.h122
-rw-r--r--numpy/core/include/numpy/ndarraytypes.h52
-rw-r--r--numpy/core/include/numpy/npy_common.h29
-rw-r--r--numpy/core/include/numpy/npy_math.h11
-rw-r--r--numpy/core/include/numpy/numpyconfig.h16
-rw-r--r--numpy/core/include/numpy/ufuncobject.h5
-rw-r--r--numpy/core/multiarray.py32
-rw-r--r--numpy/core/multiarray.pyi11
-rw-r--r--numpy/core/numeric.py22
-rw-r--r--numpy/core/numeric.pyi532
-rw-r--r--numpy/core/numerictypes.py6
-rw-r--r--numpy/core/setup.py72
-rw-r--r--numpy/core/setup_common.py5
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src4
-rw-r--r--numpy/core/src/common/dlpack/dlpack.h201
-rw-r--r--numpy/core/src/common/npy_dlpack.h28
-rw-r--r--numpy/core/src/common/npy_sort.h.src7
-rw-r--r--numpy/core/src/common/numpy_tag.h78
-rw-r--r--numpy/core/src/common/simd/avx2/math.h8
-rw-r--r--numpy/core/src/common/simd/avx2/memory.h24
-rw-r--r--numpy/core/src/common/simd/avx2/misc.h27
-rw-r--r--numpy/core/src/common/simd/avx512/math.h10
-rw-r--r--numpy/core/src/common/simd/avx512/memory.h12
-rw-r--r--numpy/core/src/common/simd/avx512/misc.h32
-rw-r--r--numpy/core/src/common/simd/avx512/utils.h22
-rw-r--r--numpy/core/src/common/simd/intdiv.h7
-rw-r--r--numpy/core/src/common/simd/neon/math.h82
-rw-r--r--numpy/core/src/common/simd/simd.h19
-rw-r--r--numpy/core/src/common/simd/sse/math.h59
-rw-r--r--numpy/core/src/common/simd/sse/misc.h25
-rw-r--r--numpy/core/src/common/simd/vsx/math.h8
-rw-r--r--numpy/core/src/multiarray/_multiarray_tests.c.src16
-rw-r--r--numpy/core/src/multiarray/alloc.c363
-rw-r--r--numpy/core/src/multiarray/alloc.h19
-rw-r--r--numpy/core/src/multiarray/array_coercion.c16
-rw-r--r--numpy/core/src/multiarray/array_coercion.h2
-rw-r--r--numpy/core/src/multiarray/array_method.c7
-rw-r--r--numpy/core/src/multiarray/array_method.h11
-rw-r--r--numpy/core/src/multiarray/arrayobject.c43
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src35
-rw-r--r--numpy/core/src/multiarray/common.c2
-rw-r--r--numpy/core/src/multiarray/compiled_base.c2
-rw-r--r--numpy/core/src/multiarray/conversion_utils.c35
-rw-r--r--numpy/core/src/multiarray/conversion_utils.h9
-rw-r--r--numpy/core/src/multiarray/convert_datatype.c2
-rw-r--r--numpy/core/src/multiarray/ctors.c137
-rw-r--r--numpy/core/src/multiarray/ctors.h7
-rw-r--r--numpy/core/src/multiarray/descriptor.c11
-rw-r--r--numpy/core/src/multiarray/dlpack.c408
-rw-r--r--numpy/core/src/multiarray/dtypemeta.h6
-rw-r--r--numpy/core/src/multiarray/einsum_sumprod.c.src30
-rw-r--r--numpy/core/src/multiarray/experimental_public_dtype_api.c39
-rw-r--r--numpy/core/src/multiarray/getset.c18
-rw-r--r--numpy/core/src/multiarray/item_selection.c52
-rw-r--r--numpy/core/src/multiarray/methods.c109
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c66
-rw-r--r--numpy/core/src/multiarray/nditer_pywrap.c20
-rw-r--r--numpy/core/src/multiarray/scalarapi.c5
-rw-r--r--numpy/core/src/multiarray/scalartypes.c.src77
-rw-r--r--numpy/core/src/multiarray/shape.c12
-rw-r--r--numpy/core/src/npymath/npy_math_internal.h.src126
-rw-r--r--numpy/core/src/npymath/npy_math_private.h18
-rw-r--r--numpy/core/src/npysort/radixsort.c.src231
-rw-r--r--numpy/core/src/npysort/radixsort.cpp354
-rw-r--r--numpy/core/src/umath/_scaled_float_dtype.c52
-rw-r--r--numpy/core/src/umath/_umath_tests.c.src10
-rw-r--r--numpy/core/src/umath/clip.c.src120
-rw-r--r--numpy/core/src/umath/clip.cpp282
-rw-r--r--numpy/core/src/umath/clip.h73
-rw-r--r--numpy/core/src/umath/clip.h.src18
-rw-r--r--numpy/core/src/umath/dispatching.c181
-rw-r--r--numpy/core/src/umath/dispatching.h4
-rw-r--r--numpy/core/src/umath/legacy_array_method.c19
-rw-r--r--numpy/core/src/umath/loops.c.src8
-rw-r--r--numpy/core/src/umath/loops.h.src6
-rw-r--r--numpy/core/src/umath/loops_trigonometric.dispatch.c.src2
-rw-r--r--numpy/core/src/umath/loops_umath_fp.dispatch.c.src4
-rw-r--r--numpy/core/src/umath/loops_unary_fp.dispatch.c.src23
-rw-r--r--numpy/core/src/umath/reduction.c57
-rw-r--r--numpy/core/src/umath/reduction.h110
-rw-r--r--numpy/core/src/umath/simd.inc.src22
m---------numpy/core/src/umath/svml0
-rw-r--r--numpy/core/src/umath/ufunc_object.c985
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.c33
-rw-r--r--numpy/core/src/umath/ufunc_type_resolution.h4
-rw-r--r--numpy/core/src/umath/umathmodule.c29
-rw-r--r--numpy/core/tests/data/generate_umath_validation_data.cpp137
-rw-r--r--numpy/core/tests/test_api.py28
-rw-r--r--numpy/core/tests/test_casting_unittests.py1
-rw-r--r--numpy/core/tests/test_custom_dtypes.py54
-rw-r--r--numpy/core/tests/test_datetime.py15
-rw-r--r--numpy/core/tests/test_deprecations.py57
-rw-r--r--numpy/core/tests/test_dlpack.py109
-rw-r--r--numpy/core/tests/test_dtype.py35
-rw-r--r--numpy/core/tests/test_einsum.py48
-rw-r--r--numpy/core/tests/test_getlimits.py2
-rw-r--r--numpy/core/tests/test_machar.py2
-rw-r--r--numpy/core/tests/test_mem_policy.py423
-rw-r--r--numpy/core/tests/test_multiarray.py222
-rw-r--r--numpy/core/tests/test_nditer.py8
-rw-r--r--numpy/core/tests/test_numeric.py37
-rw-r--r--numpy/core/tests/test_scalar_methods.py18
-rw-r--r--numpy/core/tests/test_scalarmath.py4
-rw-r--r--numpy/core/tests/test_simd.py106
-rw-r--r--numpy/core/tests/test_ufunc.py74
-rw-r--r--numpy/core/tests/test_umath.py43
-rw-r--r--numpy/core/tests/test_umath_accuracy.py1
119 files changed, 5879 insertions, 1690 deletions
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
index dad9293e1..b89e27f0f 100644
--- a/numpy/core/__init__.py
+++ b/numpy/core/__init__.py
@@ -9,6 +9,7 @@ are available in the main ``numpy`` namespace - use that instead.
from numpy.version import version as __version__
import os
+import warnings
# disables OpenBLAS affinity setting of the main thread that limits
# python threads or processes to one core
@@ -80,8 +81,8 @@ from .memmap import *
from .defchararray import chararray
from . import function_base
from .function_base import *
-from . import machar
-from .machar import *
+from . import _machar
+from ._machar import *
from . import getlimits
from .getlimits import *
from . import shape_base
@@ -105,11 +106,9 @@ from . import _methods
__all__ = ['char', 'rec', 'memmap']
__all__ += numeric.__all__
-__all__ += fromnumeric.__all__
__all__ += ['record', 'recarray', 'format_parser']
__all__ += ['chararray']
__all__ += function_base.__all__
-__all__ += machar.__all__
__all__ += getlimits.__all__
__all__ += shape_base.__all__
__all__ += einsumfunc.__all__
@@ -151,6 +150,17 @@ def _DType_reduce(DType):
return _DType_reconstruct, (scalar_type,)
+def __getattr__(name):
+ # Deprecated 2021-10-20, NumPy 1.22
+ if name == "machar":
+ warnings.warn(
+ "The `np.core.machar` module is deprecated (NumPy 1.22)",
+ DeprecationWarning, stacklevel=2,
+ )
+ return _machar
+ raise AttributeError(f"Module {__name__!r} has no attribute {name!r}")
+
+
import copyreg
copyreg.pickle(ufunc, _ufunc_reduce)
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py
index 7467be80f..078c58976 100644
--- a/numpy/core/_add_newdocs.py
+++ b/numpy/core/_add_newdocs.py
@@ -328,7 +328,7 @@ add_newdoc('numpy.core', 'nditer',
... with it:
... for (a, b, c) in it:
... addop(a, b, out=c)
- ... return it.operands[2]
+ ... return it.operands[2]
Here is the same function, but following the C-style pattern:
@@ -1573,6 +1573,19 @@ add_newdoc('numpy.core.multiarray', 'frombuffer',
array_function_like_doc,
))
+add_newdoc('numpy.core.multiarray', '_from_dlpack',
+ """
+ _from_dlpack(x, /)
+
+ Create a NumPy array from an object implementing the ``__dlpack__``
+ protocol.
+
+ See Also
+ --------
+ `Array API documentation
+ <https://data-apis.org/array-api/latest/design_topics/data_interchange.html#syntax-for-data-interchange-with-dlpack>`_
+ """)
+
add_newdoc('numpy.core', 'fastCopyAndTranspose',
"""_fastCopyAndTranspose(a)""")
@@ -2263,6 +2276,15 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_priority__',
add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_struct__',
"""Array protocol: C-struct side."""))
+add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack__',
+ """a.__dlpack__(*, stream=None)
+
+ DLPack Protocol: Part of the Array API."""))
+
+add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack_device__',
+ """a.__dlpack_device__()
+
+ DLPack Protocol: Part of the Array API."""))
add_newdoc('numpy.core.multiarray', 'ndarray', ('base',
"""
@@ -2819,7 +2841,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__class_getitem__',
>>> import numpy as np
>>> np.ndarray[Any, np.dtype[Any]]
- numpy.ndarray[typing.Any, numpy.dtype[Any]]
+ numpy.ndarray[typing.Any, numpy.dtype[typing.Any]]
Notes
-----
@@ -4727,6 +4749,26 @@ add_newdoc('numpy.core.umath', '_add_newdoc_ufunc',
and then throwing away the ufunc.
""")
+add_newdoc('numpy.core.multiarray', 'get_handler_name',
+ """
+ get_handler_name(a: ndarray) -> str,None
+
+ Return the name of the memory handler used by `a`. If not provided, return
+ the name of the memory handler that will be used to allocate data for the
+ next `ndarray` in this context. May return None if `a` does not own its
+ memory, in which case you can traverse ``a.base`` for a memory handler.
+ """)
+
+add_newdoc('numpy.core.multiarray', 'get_handler_version',
+ """
+ get_handler_version(a: ndarray) -> int,None
+
+ Return the version of the memory handler used by `a`. If not provided,
+ return the version of the memory handler that will be used to allocate data
+ for the next `ndarray` in this context. May return None if `a` does not own
+ its memory, in which case you can traverse ``a.base`` for a memory handler.
+ """)
+
add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage',
"""
_set_madvise_hugepage(enabled: bool) -> bool
diff --git a/numpy/core/_add_newdocs_scalars.py b/numpy/core/_add_newdocs_scalars.py
index 8773d6c96..94859a9d5 100644
--- a/numpy/core/_add_newdocs_scalars.py
+++ b/numpy/core/_add_newdocs_scalars.py
@@ -290,3 +290,22 @@ for float_name in ('half', 'single', 'double', 'longdouble'):
>>> np.{float_name}(3.2).is_integer()
False
"""))
+
+for int_name in ('int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32',
+ 'int64', 'uint64', 'int64', 'uint64', 'int64', 'uint64'):
+ # Add negative examples for signed cases by checking typecode
+ add_newdoc('numpy.core.numerictypes', int_name, ('bit_count',
+ f"""
+ {int_name}.bit_count() -> int
+
+ Computes the number of 1-bits in the absolute value of the input.
+ Analogous to the builtin `int.bit_count` or ``popcount`` in C++.
+
+ Examples
+ --------
+ >>> np.{int_name}(127).bit_count()
+ 7""" +
+ (f"""
+ >>> np.{int_name}(-127).bit_count()
+ 7
+ """ if dtype(int_name).char.islower() else "")))
diff --git a/numpy/core/machar.py b/numpy/core/_machar.py
index c77be793f..ace19a429 100644
--- a/numpy/core/machar.py
+++ b/numpy/core/_machar.py
@@ -13,6 +13,7 @@ from numpy.core.overrides import set_module
# Need to speed this up...especially for longfloat
+# Deprecated 2021-10-20, NumPy 1.22
@set_module('numpy')
class MachAr:
"""
diff --git a/numpy/core/_ufunc_config.pyi b/numpy/core/_ufunc_config.pyi
index 9c8cc8ab6..cd7129bcb 100644
--- a/numpy/core/_ufunc_config.pyi
+++ b/numpy/core/_ufunc_config.pyi
@@ -1,11 +1,10 @@
-from typing import Optional, Union, Callable, Any, Literal, Protocol, TypedDict
+from typing import Optional, Union, Callable, Any, Literal, TypedDict
+
+from numpy import _SupportsWrite
_ErrKind = Literal["ignore", "warn", "raise", "call", "print", "log"]
_ErrFunc = Callable[[str, int], Any]
-class _SupportsWrite(Protocol):
- def write(self, msg: str, /) -> Any: ...
-
class _ErrDict(TypedDict):
divide: _ErrKind
over: _ErrKind
@@ -30,8 +29,8 @@ def geterr() -> _ErrDict: ...
def setbufsize(size: int) -> int: ...
def getbufsize() -> int: ...
def seterrcall(
- func: Union[None, _ErrFunc, _SupportsWrite]
-) -> Union[None, _ErrFunc, _SupportsWrite]: ...
-def geterrcall() -> Union[None, _ErrFunc, _SupportsWrite]: ...
+ func: Union[None, _ErrFunc, _SupportsWrite[str]]
+) -> Union[None, _ErrFunc, _SupportsWrite[str]]: ...
+def geterrcall() -> Union[None, _ErrFunc, _SupportsWrite[str]]: ...
# See `numpy/__init__.pyi` for the `errstate` class
diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt
index a02c7153a..e1ee8a860 100644
--- a/numpy/core/code_generators/cversions.txt
+++ b/numpy/core/code_generators/cversions.txt
@@ -1,6 +1,8 @@
# Hash below were defined from numpy_api_order.txt and ufunc_api_order.txt
# When adding a new version here for a new minor release, also add the same
-# version as NPY_x_y_API_VERSION in numpyconfig.h
+# version as NPY_x_y_API_VERSION in numpyconfig.h and C_API_VERSION in
+# setup_common.py.
+
0x00000001 = 603580d224763e58c5e7147f804dc0f5
0x00000002 = 8ecb29306758515ae69749c803a75da1
0x00000003 = bf22c0d05b31625d2a7015988d61ce5a
@@ -56,5 +58,8 @@
# DType related API additions.
# A new field was added to the end of PyArrayObject_fields.
# Version 14 (NumPy 1.21) No change.
-# Version 14 (NumPy 1.22) No change.
0x0000000e = 17a0f366e55ec05e5c5c149123478452
+
+# Version 15 (NumPy 1.22) Configurable memory allocations
+# Version 14 (NumPy 1.23) No change.
+0x0000000f = b8783365b873681cd204be50cdfb448d
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index c2458c2b5..b401ee6a5 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -41,6 +41,7 @@ API_FILES = [join('multiarray', 'alloc.c'),
join('multiarray', 'datetime_busdaycal.c'),
join('multiarray', 'datetime_strings.c'),
join('multiarray', 'descriptor.c'),
+ join('multiarray', 'dlpack.c'),
join('multiarray', 'dtypemeta.c'),
join('multiarray', 'einsum.c.src'),
join('multiarray', 'flagsobject.c'),
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 9fa87a11e..dc71fc5c9 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -829,7 +829,7 @@ defdict = {
docstrings.get('numpy.core.umath.ceil'),
None,
TD('e', f='ceil', astype={'e': 'f'}),
- TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+ TD(inexactvec, dispatch=[('loops_unary_fp', 'fd')]),
TD('fdg', f='ceil'),
TD(O, f='npy_ObjectCeil'),
),
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index fbd323368..d12d62d8f 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -19,6 +19,7 @@ from code_generators.genapi import StealRef, NonNull
multiarray_global_vars = {
'NPY_NUMUSERTYPES': (7, 'int'),
'NPY_DEFAULT_ASSIGN_CASTING': (292, 'NPY_CASTING'),
+ 'PyDataMem_DefaultHandler': (306, 'PyObject*'),
}
multiarray_scalar_bool_values = {
@@ -76,9 +77,9 @@ multiarray_types_api = {
# End 1.6 API
}
-#define NPY_NUMUSERTYPES (*(int *)PyArray_API[6])
-#define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[7])
-#define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[8])
+# define NPY_NUMUSERTYPES (*(int *)PyArray_API[6])
+# define PyBoolArrType_Type (*(PyTypeObject *)PyArray_API[7])
+# define _PyArrayScalar_BoolValues ((PyBoolScalarObject *)PyArray_API[8])
multiarray_funcs_api = {
'PyArray_GetNDArrayCVersion': (0,),
@@ -350,6 +351,9 @@ multiarray_funcs_api = {
'PyArray_ResolveWritebackIfCopy': (302,),
'PyArray_SetWritebackIfCopyBase': (303,),
# End 1.14 API
+ 'PyDataMem_SetHandler': (304,),
+ 'PyDataMem_GetHandler': (305,),
+ # End 1.21 API
}
ufunc_types_api = {
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index 4e1182de6..cd584eea7 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -1420,7 +1420,7 @@ add_newdoc('numpy.core.umath', 'floor_divide',
add_newdoc('numpy.core.umath', 'fmod',
"""
- Return the element-wise remainder of division.
+ Returns the element-wise remainder of division.
This is the NumPy implementation of the C library function fmod, the
remainder has the same sign as the dividend `x1`. It is equivalent to
@@ -1678,7 +1678,7 @@ add_newdoc('numpy.core.umath', 'invert',
add_newdoc('numpy.core.umath', 'isfinite',
"""
- Test element-wise for finiteness (not infinity or not Not a Number).
+ Test element-wise for finiteness (not infinity and not Not a Number).
The result is returned as a boolean array.
@@ -3065,8 +3065,14 @@ add_newdoc('numpy.core.umath', 'power',
First array elements raised to powers from second array, element-wise.
Raise each base in `x1` to the positionally-corresponding power in
- `x2`. `x1` and `x2` must be broadcastable to the same shape. Note that an
- integer type raised to a negative integer power will raise a ValueError.
+ `x2`. `x1` and `x2` must be broadcastable to the same shape.
+
+ An integer type raised to a negative integer power will raise a
+ ``ValueError``.
+
+ Negative values raised to a non-integral value will return ``nan``.
+ To get complex results, cast the input to complex, or specify the
+ ``dtype`` to be ``complex`` (see the example below).
Parameters
----------
@@ -3121,6 +3127,21 @@ add_newdoc('numpy.core.umath', 'power',
>>> x1 ** x2
array([ 0, 1, 8, 27, 16, 5])
+ Negative values raised to a non-integral value will result in ``nan``
+ (and a warning will be generated).
+
+ >>> x3 = np.array([-1.0, -4.0])
+ >>> with np.errstate(invalid='ignore'):
+ ... p = np.power(x3, 1.5)
+ ...
+ >>> p
+ array([nan, nan])
+
+ To get complex results, give the argument ``dtype=complex``.
+
+ >>> np.power(x3, 1.5, dtype=complex)
+ array([-1.83697020e-16-1.j, -1.46957616e-15-8.j])
+
""")
add_newdoc('numpy.core.umath', 'float_power',
@@ -3134,6 +3155,10 @@ add_newdoc('numpy.core.umath', 'float_power',
inexact. The intent is that the function will return a usable result for
negative powers and seldom overflow for positive powers.
+ Negative values raised to a non-integral value will return ``nan``.
+ To get complex results, cast the input to complex, or specify the
+ ``dtype`` to be ``complex`` (see the example below).
+
.. versionadded:: 1.12.0
Parameters
@@ -3181,6 +3206,21 @@ add_newdoc('numpy.core.umath', 'float_power',
array([[ 0., 1., 8., 27., 16., 5.],
[ 0., 1., 8., 27., 16., 5.]])
+ Negative values raised to a non-integral value will result in ``nan``
+ (and a warning will be generated).
+
+ >>> x3 = np.array([-1, -4])
+ >>> with np.errstate(invalid='ignore'):
+ ... p = np.float_power(x3, 1.5)
+ ...
+ >>> p
+ array([nan, nan])
+
+ To get complex results, give the argument ``dtype=complex``.
+
+ >>> np.float_power(x3, 1.5, dtype=complex)
+ array([-1.83697020e-16-1.j, -1.46957616e-15-8.j])
+
""")
add_newdoc('numpy.core.umath', 'radians',
@@ -3292,7 +3332,7 @@ add_newdoc('numpy.core.umath', 'reciprocal',
add_newdoc('numpy.core.umath', 'remainder',
"""
- Return element-wise remainder of division.
+ Returns the element-wise remainder of division.
Computes the remainder complementary to the `floor_divide` function. It is
equivalent to the Python modulus operator``x1 % x2`` and has the same sign
@@ -3787,6 +3827,7 @@ add_newdoc('numpy.core.umath', 'sqrt',
--------
lib.scimath.sqrt
A version which returns complex numbers when given negative reals.
+ Note: 0.0 and -0.0 are handled differently for complex inputs.
Notes
-----
diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py
index 0f7031bac..ab4a4d2be 100644
--- a/numpy/core/getlimits.py
+++ b/numpy/core/getlimits.py
@@ -5,13 +5,12 @@ __all__ = ['finfo', 'iinfo']
import warnings
-from .machar import MachAr
+from ._machar import MachAr
from .overrides import set_module
from . import numeric
from . import numerictypes as ntypes
from .numeric import array, inf, NaN
from .umath import log10, exp2, nextafter, isnan
-from . import umath
def _fr0(a):
@@ -386,6 +385,8 @@ class finfo:
machar : MachAr
The object which calculated these parameters and holds more
detailed information.
+
+ .. deprecated:: 1.22
machep : int
The exponent that yields `eps`.
max : floating point number of the appropriate type
@@ -502,7 +503,7 @@ class finfo:
self.eps = machar.eps.flat[0]
self.nexp = machar.iexp
self.nmant = machar.it
- self.machar = machar
+ self._machar = machar
self._str_tiny = machar._str_xmin.strip()
self._str_max = machar._str_xmax.strip()
self._str_epsneg = machar._str_epsneg.strip()
@@ -552,11 +553,11 @@ class finfo:
"""
# This check is necessary because the value for smallest_normal is
# platform dependent for longdouble types.
- if isnan(self.machar.smallest_normal.flat[0]):
+ if isnan(self._machar.smallest_normal.flat[0]):
warnings.warn(
'The value of smallest normal is undefined for double double',
UserWarning, stacklevel=2)
- return self.machar.smallest_normal.flat[0]
+ return self._machar.smallest_normal.flat[0]
@property
def tiny(self):
@@ -575,6 +576,20 @@ class finfo:
"""
return self.smallest_normal
+ @property
+ def machar(self):
+ """The object which calculated these parameters and holds more
+ detailed information.
+
+ .. deprecated:: 1.22
+ """
+ # Deprecated 2021-10-27, NumPy 1.22
+ warnings.warn(
+ "`finfo.machar` is deprecated (NumPy 1.22)",
+ DeprecationWarning, stacklevel=2,
+ )
+ return self._machar
+
@set_module('numpy')
class iinfo:
diff --git a/numpy/core/getlimits.pyi b/numpy/core/getlimits.pyi
index ca22e18f7..66d062995 100644
--- a/numpy/core/getlimits.pyi
+++ b/numpy/core/getlimits.pyi
@@ -1,58 +1,8 @@
-from typing import Any, Generic, List, Type, TypeVar
+from typing import List
from numpy import (
finfo as finfo,
iinfo as iinfo,
- floating,
- signedinteger,
)
-from numpy.typing import NBitBase, NDArray
-
-_NBit = TypeVar("_NBit", bound=NBitBase)
-
__all__: List[str]
-
-class MachArLike(Generic[_NBit]):
- def __init__(
- self,
- ftype: Type[floating[_NBit]],
- *,
- eps: floating[Any],
- epsneg: floating[Any],
- huge: floating[Any],
- tiny: floating[Any],
- ibeta: int,
- smallest_subnormal: None | floating[Any] = ...,
- # Expand `**kwargs` into keyword-only arguments
- machep: int,
- negep: int,
- minexp: int,
- maxexp: int,
- it: int,
- iexp: int,
- irnd: int,
- ngrd: int,
- ) -> None: ...
- @property
- def smallest_subnormal(self) -> NDArray[floating[_NBit]]: ...
- eps: NDArray[floating[_NBit]]
- epsilon: NDArray[floating[_NBit]]
- epsneg: NDArray[floating[_NBit]]
- huge: NDArray[floating[_NBit]]
- ibeta: signedinteger[_NBit]
- iexp: int
- irnd: int
- it: int
- machep: int
- maxexp: int
- minexp: int
- negep: int
- ngrd: int
- precision: int
- resolution: NDArray[floating[_NBit]]
- smallest_normal: NDArray[floating[_NBit]]
- tiny: NDArray[floating[_NBit]]
- title: str
- xmax: NDArray[floating[_NBit]]
- xmin: NDArray[floating[_NBit]]
diff --git a/numpy/core/include/numpy/experimental_dtype_api.h b/numpy/core/include/numpy/experimental_dtype_api.h
index 22854a725..554c7fb6c 100644
--- a/numpy/core/include/numpy/experimental_dtype_api.h
+++ b/numpy/core/include/numpy/experimental_dtype_api.h
@@ -16,13 +16,47 @@
* in your module init. (A version mismatch will be reported, just update
* to the correct one, this will alert you of possible changes.)
*
- * The two main symbols exported are:
+ * The following lists the main symbols currently exported. Please do not
+ * hesitate to ask for help or clarification:
*
- * - PyUFunc_AddLoopFromSpec (Register a new loop for a ufunc)
- * - PyArrayInitDTypeMeta_FromSpec (Create a new DType)
+ * - PyUFunc_AddLoopFromSpec:
*
- * Please check the in-line documentation for details and do not hesitate to
- * ask for help.
+ * Register a new loop for a ufunc. This uses the `PyArrayMethod_Spec`
+ * which must be filled in (see in-line comments).
+ *
+ * - PyUFunc_AddPromoter:
+ *
+ * Register a new promoter for a ufunc. A promoter is a function stored
+ * in a PyCapsule (see in-line comments). It is passed the operation and
+ * requested DType signatures and can mutate it to attempt a new search
+ * for a matching loop/promoter.
+ * I.e. for Numba a promoter could even add the desired loop.
+ *
+ * - PyArrayInitDTypeMeta_FromSpec:
+ *
+ * Initialize a new DType. It must currently be a static Python C type
+ * that is declared as `PyArray_DTypeMeta` and not `PyTypeObject`.
+ * Further, it must subclass `np.dtype` and set its type to
+ * `PyArrayDTypeMeta_Type` (before calling `PyType_Read()`).
+ *
+ * - PyArray_CommonDType:
+ *
+ * Find the common-dtype ("promotion") for two DType classes. Similar
+ * to `np.result_type`, but works on the classes and not instances.
+ *
+ * - PyArray_PromoteDTypeSequence:
+ *
+ * Same as CommonDType, but works with an arbitrary number of DTypes.
+ * This function is smarter and can often return successful and unambiguous
+ * results when `common_dtype(common_dtype(dt1, dt2), dt3)` would
+ * depend on the operation order or fail. Nevertheless, DTypes should
+ * aim to ensure that their common-dtype implementation is associative
+ * and commutative! (Mainly, unsigned and signed integers are not.)
+ *
+ * For guaranteed consistent results DTypes must implement common-Dtype
+ * "transitively". If A promotes B and B promotes C, than A must generally
+ * also promote C; where "promotes" means implements the promotion.
+ * (There are some exceptions for abstract DTypes)
*
* WARNING
* =======
@@ -67,11 +101,28 @@ __not_imported(void)
printf("*****\nCritical error, dtype API not imported\n*****\n");
}
static void *__uninitialized_table[] = {
+ &__not_imported, &__not_imported, &__not_imported, &__not_imported,
&__not_imported, &__not_imported, &__not_imported, &__not_imported};
static void **__experimental_dtype_api_table = __uninitialized_table;
+
+/*
+ * DTypeMeta struct, the content may be made fully opaque (except the size).
+ * We may also move everything into a single `void *dt_slots`.
+ */
+typedef struct {
+ PyHeapTypeObject super;
+ PyArray_Descr *singleton;
+ int type_num;
+ PyTypeObject *scalar_type;
+ npy_uint64 flags;
+ void *dt_slots;
+ void *reserved[3];
+} PyArray_DTypeMeta;
+
+
/*
* ******************************************************
* ArrayMethod API (Casting and UFuncs)
@@ -128,6 +179,28 @@ typedef PyObject *_ufunc_addloop_fromspec_func(
/*
+ * Type of the C promoter function, which must be wrapped into a
+ * PyCapsule with name "numpy._ufunc_promoter".
+ */
+typedef int promoter_function(PyObject *ufunc,
+ PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *new_op_dtypes[]);
+
+/*
+ * Function to register a promoter.
+ *
+ * @param ufunc The ufunc object to register the promoter with.
+ * @param DType_tuple A Python tuple containing DTypes or None matching the
+ * number of inputs and outputs of the ufunc.
+ * @param promoter A PyCapsule with name "numpy._ufunc_promoter" containing
+ * a pointer to a `promoter_function`.
+ */
+typedef int _ufunc_addpromoter_func(
+ PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter);
+#define PyUFunc_AddPromoter \
+ (*(_ufunc_addpromoter_func *)(__experimental_dtype_api_table[1]))
+
+/*
* In addition to the normal casting levels, NPY_CAST_IS_VIEW indicates
* that no cast operation is necessary at all (although a copy usually will be)
*
@@ -221,24 +294,8 @@ typedef struct{
} PyArrayDTypeMeta_Spec;
-/*
- * DTypeMeta struct, the content may be made fully opaque (except the size).
- * We may also move everything into a single `void *dt_slots`.
- */
-typedef struct {
- PyHeapTypeObject super;
- PyArray_Descr *singleton;
- int type_num;
- PyTypeObject *scalar_type;
- npy_uint64 flags;
- void *dt_slots;
- void *reserved[3];
-} PyArray_DTypeMeta;
-
-
#define PyArrayDTypeMeta_Type \
- (&(PyTypeObject *)__experimental_dtype_api_table[1])
-
+ (*(PyTypeObject *)__experimental_dtype_api_table[2])
typedef int __dtypemeta_fromspec(
PyArray_DTypeMeta *DType, PyArrayDTypeMeta_Spec *dtype_spec);
/*
@@ -250,8 +307,25 @@ typedef int __dtypemeta_fromspec(
* uses `PyArray_DTypeMeta` defined above as the C-structure.
*/
#define PyArrayInitDTypeMeta_FromSpec \
- ((__dtypemeta_fromspec *)(__experimental_dtype_api_table[2]))
+ ((__dtypemeta_fromspec *)(__experimental_dtype_api_table[3]))
+
+
+/*
+ * *************************************
+ * WORKING WITH DTYPES
+ * *************************************
+ */
+
+typedef PyArray_DTypeMeta *__common_dtype(
+ PyArray_DTypeMeta *DType1, PyArray_DTypeMeta *DType2);
+#define PyArray_CommonDType \
+ ((__common_dtype *)(__experimental_dtype_api_table[4]))
+
+typedef PyArray_DTypeMeta *__promote_dtype_sequence(
+ npy_intp num, PyArray_DTypeMeta *DTypes[]);
+#define PyArray_PromoteDTypeSequence \
+ ((__promote_dtype_sequence *)(__experimental_dtype_api_table[5]))
/*
@@ -264,7 +338,7 @@ typedef int __dtypemeta_fromspec(
* runtime-check this.
* You must call this function to use the symbols defined in this file.
*/
-#define __EXPERIMENTAL_DTYPE_VERSION 1
+#define __EXPERIMENTAL_DTYPE_VERSION 2
static int
import_experimental_dtype_api(int version)
diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h
index 8d810fa64..6240adc0c 100644
--- a/numpy/core/include/numpy/ndarraytypes.h
+++ b/numpy/core/include/numpy/ndarraytypes.h
@@ -355,12 +355,10 @@ struct NpyAuxData_tag {
#define NPY_ERR(str) fprintf(stderr, #str); fflush(stderr);
#define NPY_ERR2(str) fprintf(stderr, str); fflush(stderr);
- /*
- * Macros to define how array, and dimension/strides data is
- * allocated.
- */
-
- /* Data buffer - PyDataMem_NEW/FREE/RENEW are in multiarraymodule.c */
+/*
+* Macros to define how array, and dimension/strides data is
+* allocated. These should be made private
+*/
#define NPY_USE_PYMEM 1
@@ -667,6 +665,29 @@ typedef struct _arr_descr {
} PyArray_ArrayDescr;
/*
+ * Memory handler structure for array data.
+ */
+/* The declaration of free differs from PyMemAllocatorEx */
+typedef struct {
+ void *ctx;
+ void* (*malloc) (void *ctx, size_t size);
+ void* (*calloc) (void *ctx, size_t nelem, size_t elsize);
+ void* (*realloc) (void *ctx, void *ptr, size_t new_size);
+ void (*free) (void *ctx, void *ptr, size_t size);
+ /*
+ * This is the end of the version=1 struct. Only add new fields after
+ * this line
+ */
+} PyDataMemAllocator;
+
+typedef struct {
+ char name[127]; /* multiple of 64 to keep the struct aligned */
+ uint8_t version; /* currently 1 */
+ PyDataMemAllocator allocator;
+} PyDataMem_Handler;
+
+
+/*
* The main array object structure.
*
* It has been recommended to use the inline functions defined below
@@ -716,6 +737,10 @@ typedef struct tagPyArrayObject_fields {
/* For weak references */
PyObject *weakreflist;
void *_buffer_info; /* private buffer info, tagged to allow warning */
+ /*
+ * For malloc/calloc/realloc/free per object
+ */
+ PyObject *mem_handler;
} PyArrayObject_fields;
/*
@@ -843,7 +868,7 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
/*
* Always copy the array. Returned arrays are always CONTIGUOUS,
- * ALIGNED, and WRITEABLE.
+ * ALIGNED, and WRITEABLE. See also: NPY_ARRAY_ENSURENOCOPY = 0x4000.
*
* This flag may be requested in constructor functions.
*/
@@ -913,6 +938,13 @@ typedef int (PyArray_FinalizeFunc)(PyArrayObject *, PyObject *);
#define NPY_ARRAY_WRITEBACKIFCOPY 0x2000
/*
+ * No copy may be made while converting from an object/array (result is a view)
+ *
+ * This flag may be requested in constructor functions.
+ */
+#define NPY_ARRAY_ENSURENOCOPY 0x4000
+
+/*
* NOTE: there are also internal flags defined in multiarray/arrayobject.h,
* which start at bit 31 and work down.
*/
@@ -1659,6 +1691,12 @@ PyArray_CLEARFLAGS(PyArrayObject *arr, int flags)
((PyArrayObject_fields *)arr)->flags &= ~flags;
}
+static NPY_INLINE NPY_RETURNS_BORROWED_REF PyObject *
+PyArray_HANDLER(PyArrayObject *arr)
+{
+ return ((PyArrayObject_fields *)arr)->mem_handler;
+}
+
#define PyTypeNum_ISBOOL(type) ((type) == NPY_BOOL)
#define PyTypeNum_ISUNSIGNED(type) (((type) == NPY_UBYTE) || \
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index 12a3e725a..88794ca07 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -356,14 +356,31 @@ typedef unsigned long npy_ulonglong;
typedef unsigned char npy_bool;
#define NPY_FALSE 0
#define NPY_TRUE 1
-
-
+/*
+ * `NPY_SIZEOF_LONGDOUBLE` isn't usually equal to sizeof(long double).
+ * In some certain cases, it may forced to be equal to sizeof(double)
+ * even against the compiler implementation and the same goes for
+ * `complex long double`.
+ *
+ * Therefore, avoid `long double`, use `npy_longdouble` instead,
+ * and when it comes to standard math functions make sure of using
+ * the double version when `NPY_SIZEOF_LONGDOUBLE` == `NPY_SIZEOF_DOUBLE`.
+ * For example:
+ * npy_longdouble *ptr, x;
+ * #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
+ * npy_longdouble r = modf(x, ptr);
+ * #else
+ * npy_longdouble r = modfl(x, ptr);
+ * #endif
+ *
+ * See https://github.com/numpy/numpy/issues/20348
+ */
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
- typedef double npy_longdouble;
- #define NPY_LONGDOUBLE_FMT "g"
+ #define NPY_LONGDOUBLE_FMT "g"
+ typedef double npy_longdouble;
#else
- typedef long double npy_longdouble;
- #define NPY_LONGDOUBLE_FMT "Lg"
+ #define NPY_LONGDOUBLE_FMT "Lg"
+ typedef long double npy_longdouble;
#endif
#ifndef Py_USING_UNICODE
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index b1e6363e3..bead0dc14 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -150,6 +150,17 @@ NPY_INPLACE npy_long npy_lshiftl(npy_long a, npy_long b);
NPY_INPLACE npy_longlong npy_rshiftll(npy_longlong a, npy_longlong b);
NPY_INPLACE npy_longlong npy_lshiftll(npy_longlong a, npy_longlong b);
+NPY_INPLACE uint8_t npy_popcountuhh(npy_ubyte a);
+NPY_INPLACE uint8_t npy_popcountuh(npy_ushort a);
+NPY_INPLACE uint8_t npy_popcountu(npy_uint a);
+NPY_INPLACE uint8_t npy_popcountul(npy_ulong a);
+NPY_INPLACE uint8_t npy_popcountull(npy_ulonglong a);
+NPY_INPLACE uint8_t npy_popcounthh(npy_byte a);
+NPY_INPLACE uint8_t npy_popcounth(npy_short a);
+NPY_INPLACE uint8_t npy_popcount(npy_int a);
+NPY_INPLACE uint8_t npy_popcountl(npy_long a);
+NPY_INPLACE uint8_t npy_popcountll(npy_longlong a);
+
/*
* C99 double math funcs
*/
diff --git a/numpy/core/include/numpy/numpyconfig.h b/numpy/core/include/numpy/numpyconfig.h
index b2ce66244..4eac083e7 100644
--- a/numpy/core/include/numpy/numpyconfig.h
+++ b/numpy/core/include/numpy/numpyconfig.h
@@ -19,6 +19,19 @@
#define NPY_SIZEOF_LONG 4
#define NPY_SIZEOF_PY_INTPTR_T 4
#endif
+
+ #undef NPY_SIZEOF_LONGDOUBLE
+ #undef NPY_SIZEOF_COMPLEX_LONGDOUBLE
+
+ #ifdef __x86_64
+ #define NPY_SIZEOF_LONGDOUBLE 16
+ #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 32
+ #elif defined(__arm64__)
+ #define NPY_SIZEOF_LONGDOUBLE 8
+ #define NPY_SIZEOF_COMPLEX_LONGDOUBLE 16
+ #else
+ #error "unknown architecture"
+ #endif
#endif
/**
@@ -43,6 +56,7 @@
#define NPY_1_19_API_VERSION 0x00000008
#define NPY_1_20_API_VERSION 0x0000000e
#define NPY_1_21_API_VERSION 0x0000000e
-#define NPY_1_22_API_VERSION 0x0000000e
+#define NPY_1_22_API_VERSION 0x0000000f
+#define NPY_1_23_API_VERSION 0x0000000f
#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_NUMPYCONFIG_H_ */
diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h
index 3f184bd45..1d7050bbe 100644
--- a/numpy/core/include/numpy/ufuncobject.h
+++ b/numpy/core/include/numpy/ufuncobject.h
@@ -173,11 +173,8 @@ typedef struct _tagPyUFuncObject {
* but this was never implemented. (This is also why the above
* selector is called the "legacy" selector.)
*/
- #if PY_VERSION_HEX >= 0x03080000
vectorcallfunc vectorcall;
- #else
- void *reserved2;
- #endif
+
/* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */
void *_always_null_previously_masked_innerloop_selector;
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index 154df6f4d..f88d75978 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -14,8 +14,9 @@ from ._multiarray_umath import * # noqa: F403
# do not change them. issue gh-15518
# _get_ndarray_c_version is semi-public, on purpose not added to __all__
from ._multiarray_umath import (
- _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string,
- _ARRAY_API, _monotonicity, _get_ndarray_c_version, _set_madvise_hugepage,
+ _fastCopyAndTranspose, _flagdict, _from_dlpack, _insert, _reconstruct,
+ _vec_string, _ARRAY_API, _monotonicity, _get_ndarray_c_version,
+ _set_madvise_hugepage,
)
__all__ = [
@@ -23,29 +24,30 @@ __all__ = [
'ITEM_HASOBJECT', 'ITEM_IS_POINTER', 'LIST_PICKLE', 'MAXDIMS',
'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NEEDS_INIT', 'NEEDS_PYAPI',
'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP', '_fastCopyAndTranspose',
- '_flagdict', '_insert', '_reconstruct', '_vec_string', '_monotonicity',
- 'add_docstring', 'arange', 'array', 'asarray', 'asanyarray',
- 'ascontiguousarray', 'asfortranarray', 'bincount', 'broadcast',
- 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast',
+ '_flagdict', '_from_dlpack', '_insert', '_reconstruct', '_vec_string',
+ '_monotonicity', 'add_docstring', 'arange', 'array', 'asarray',
+ 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'bincount',
+ 'broadcast', 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast',
'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2',
'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data',
'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype',
'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat',
- 'frombuffer', 'fromfile', 'fromiter', 'fromstring', 'inner',
- 'interp', 'interp_complex', 'is_busday', 'lexsort',
- 'matmul', 'may_share_memory', 'min_scalar_type', 'ndarray', 'nditer',
- 'nested_iters', 'normalize_axis_index', 'packbits',
- 'promote_types', 'putmask', 'ravel_multi_index', 'result_type', 'scalar',
- 'set_datetimeparse_function', 'set_legacy_print_mode', 'set_numeric_ops',
- 'set_string_function', 'set_typeDict', 'shares_memory',
- 'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
- 'where', 'zeros']
+ 'frombuffer', 'fromfile', 'fromiter', 'fromstring',
+ 'get_handler_name', 'get_handler_version', 'inner', 'interp',
+ 'interp_complex', 'is_busday', 'lexsort', 'matmul', 'may_share_memory',
+ 'min_scalar_type', 'ndarray', 'nditer', 'nested_iters',
+ 'normalize_axis_index', 'packbits', 'promote_types', 'putmask',
+ 'ravel_multi_index', 'result_type', 'scalar', 'set_datetimeparse_function',
+ 'set_legacy_print_mode', 'set_numeric_ops', 'set_string_function',
+ 'set_typeDict', 'shares_memory', 'tracemalloc_domain', 'typeinfo',
+ 'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros']
# For backward compatibility, make sure pickle imports these functions from here
_reconstruct.__module__ = 'numpy.core.multiarray'
scalar.__module__ = 'numpy.core.multiarray'
+_from_dlpack.__module__ = 'numpy'
arange.__module__ = 'numpy'
array.__module__ = 'numpy'
asarray.__module__ = 'numpy'
diff --git a/numpy/core/multiarray.pyi b/numpy/core/multiarray.pyi
index 1f3792ecb..a9f68e181 100644
--- a/numpy/core/multiarray.pyi
+++ b/numpy/core/multiarray.pyi
@@ -50,6 +50,7 @@ from numpy import (
_ModeKind,
_SupportsBuffer,
_IOProtocol,
+ _CopyMode,
_NDIterFlagsKind,
_NDIterOpFlagsKind,
)
@@ -177,7 +178,7 @@ def array(
object: _ArrayType,
dtype: None = ...,
*,
- copy: bool = ...,
+ copy: bool | _CopyMode = ...,
order: _OrderKACF = ...,
subok: L[True],
ndmin: int = ...,
@@ -188,7 +189,7 @@ def array(
object: _ArrayLike[_SCT],
dtype: None = ...,
*,
- copy: bool = ...,
+ copy: bool | _CopyMode = ...,
order: _OrderKACF = ...,
subok: bool = ...,
ndmin: int = ...,
@@ -199,7 +200,7 @@ def array(
object: object,
dtype: None = ...,
*,
- copy: bool = ...,
+ copy: bool | _CopyMode = ...,
order: _OrderKACF = ...,
subok: bool = ...,
ndmin: int = ...,
@@ -210,7 +211,7 @@ def array(
object: Any,
dtype: _DTypeLike[_SCT],
*,
- copy: bool = ...,
+ copy: bool | _CopyMode = ...,
order: _OrderKACF = ...,
subok: bool = ...,
ndmin: int = ...,
@@ -221,7 +222,7 @@ def array(
object: Any,
dtype: DTypeLike,
*,
- copy: bool = ...,
+ copy: bool | _CopyMode = ...,
order: _OrderKACF = ...,
subok: bool = ...,
ndmin: int = ...,
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index d8a0cf9a6..014fa0a39 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -13,8 +13,8 @@ from .multiarray import (
WRAP, arange, array, asarray, asanyarray, ascontiguousarray,
asfortranarray, broadcast, can_cast, compare_chararrays,
concatenate, copyto, dot, dtype, empty,
- empty_like, flatiter, frombuffer, fromfile, fromiter, fromstring,
- inner, lexsort, matmul, may_share_memory,
+ empty_like, flatiter, frombuffer, _from_dlpack, fromfile, fromiter,
+ fromstring, inner, lexsort, matmul, may_share_memory,
min_scalar_type, ndarray, nditer, nested_iters, promote_types,
putmask, result_type, set_numeric_ops, shares_memory, vdot, where,
zeros, normalize_axis_index)
@@ -41,7 +41,7 @@ __all__ = [
'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc',
'arange', 'array', 'asarray', 'asanyarray', 'ascontiguousarray',
'asfortranarray', 'zeros', 'count_nonzero', 'empty', 'broadcast', 'dtype',
- 'fromstring', 'fromfile', 'frombuffer', 'where',
+ 'fromstring', 'fromfile', 'frombuffer', '_from_dlpack', 'where',
'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', 'lexsort',
'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type',
'result_type', 'isfortran', 'empty_like', 'zeros_like', 'ones_like',
@@ -1184,7 +1184,7 @@ def roll(a, shift, axis=None):
>>> np.roll(x, -2)
array([2, 3, 4, 5, 6, 7, 8, 9, 0, 1])
- >>> x2 = np.reshape(x, (2,5))
+ >>> x2 = np.reshape(x, (2, 5))
>>> x2
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])
@@ -1206,6 +1206,12 @@ def roll(a, shift, axis=None):
>>> np.roll(x2, -1, axis=1)
array([[1, 2, 3, 4, 0],
[6, 7, 8, 9, 5]])
+ >>> np.roll(x2, (1, 1), axis=(1, 0))
+ array([[9, 5, 6, 7, 8],
+ [4, 0, 1, 2, 3]])
+ >>> np.roll(x2, (2, 1), axis=(1, 0))
+ array([[8, 9, 5, 6, 7],
+ [3, 4, 0, 1, 2]])
"""
a = asanyarray(a)
@@ -1823,6 +1829,14 @@ def fromfunction(function, shape, *, dtype=float, like=None, **kwargs):
Examples
--------
+ >>> np.fromfunction(lambda i, j: i, (2, 2), dtype=float)
+ array([[0., 0.],
+ [1., 1.]])
+
+ >>> np.fromfunction(lambda i, j: j, (2, 2), dtype=float)
+ array([[0., 1.],
+ [0., 1.]])
+
>>> np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
array([[ True, False, False],
[False, True, False],
diff --git a/numpy/core/numeric.pyi b/numpy/core/numeric.pyi
index 54ab4b7c8..d7ec30351 100644
--- a/numpy/core/numeric.pyi
+++ b/numpy/core/numeric.pyi
@@ -1,6 +1,5 @@
from typing import (
Any,
- Optional,
Union,
Sequence,
Tuple,
@@ -8,18 +7,64 @@ from typing import (
List,
overload,
TypeVar,
- Iterable,
Literal,
+ Type,
+ SupportsAbs,
+ SupportsIndex,
+ NoReturn,
)
+from typing_extensions import TypeGuard
-from numpy import ndarray, generic, dtype, bool_, signedinteger, _OrderKACF, _OrderCF
-from numpy.typing import ArrayLike, DTypeLike, _ShapeLike
+from numpy import (
+ ComplexWarning as ComplexWarning,
+ dtype,
+ generic,
+ unsignedinteger,
+ signedinteger,
+ floating,
+ complexfloating,
+ bool_,
+ int_,
+ intp,
+ float64,
+ timedelta64,
+ object_,
+ _OrderKACF,
+ _OrderCF,
+)
+
+from numpy.typing import (
+ ArrayLike,
+ NDArray,
+ DTypeLike,
+ _ShapeLike,
+ _SupportsDType,
+ _FiniteNestedSequence,
+ _SupportsArray,
+ _ScalarLike_co,
+ _ArrayLikeBool_co,
+ _ArrayLikeUInt_co,
+ _ArrayLikeInt_co,
+ _ArrayLikeFloat_co,
+ _ArrayLikeComplex_co,
+ _ArrayLikeTD64_co,
+ _ArrayLikeObject_co,
+)
_T = TypeVar("_T")
-_ArrayType = TypeVar("_ArrayType", bound=ndarray)
+_SCT = TypeVar("_SCT", bound=generic)
+_ArrayType = TypeVar("_ArrayType", bound=NDArray[Any])
+_DTypeLike = Union[
+ dtype[_SCT],
+ Type[_SCT],
+ _SupportsDType[dtype[_SCT]],
+]
+_ArrayLike = _FiniteNestedSequence[_SupportsArray[dtype[_SCT]]]
_CorrelateMode = Literal["valid", "same", "full"]
+__all__: List[str]
+
@overload
def zeros_like(
a: _ArrayType,
@@ -30,20 +75,61 @@ def zeros_like(
) -> _ArrayType: ...
@overload
def zeros_like(
- a: ArrayLike,
- dtype: DTypeLike = ...,
+ a: _ArrayLike[_SCT],
+ dtype: None = ...,
order: _OrderKACF = ...,
subok: bool = ...,
- shape: Optional[_ShapeLike] = ...,
-) -> ndarray: ...
+ shape: None | _ShapeLike = ...,
+) -> NDArray[_SCT]: ...
+@overload
+def zeros_like(
+ a: object,
+ dtype: None = ...,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
+@overload
+def zeros_like(
+ a: Any,
+ dtype: _DTypeLike[_SCT],
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[_SCT]: ...
+@overload
+def zeros_like(
+ a: Any,
+ dtype: DTypeLike,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
+@overload
def ones(
shape: _ShapeLike,
- dtype: DTypeLike = ...,
+ dtype: None = ...,
+ order: _OrderCF = ...,
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[float64]: ...
+@overload
+def ones(
+ shape: _ShapeLike,
+ dtype: _DTypeLike[_SCT],
order: _OrderCF = ...,
*,
like: ArrayLike = ...,
-) -> ndarray: ...
+) -> NDArray[_SCT]: ...
+@overload
+def ones(
+ shape: _ShapeLike,
+ dtype: DTypeLike,
+ order: _OrderCF = ...,
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[Any]: ...
@overload
def ones_like(
@@ -55,21 +141,64 @@ def ones_like(
) -> _ArrayType: ...
@overload
def ones_like(
- a: ArrayLike,
- dtype: DTypeLike = ...,
+ a: _ArrayLike[_SCT],
+ dtype: None = ...,
order: _OrderKACF = ...,
subok: bool = ...,
- shape: Optional[_ShapeLike] = ...,
-) -> ndarray: ...
+ shape: None | _ShapeLike = ...,
+) -> NDArray[_SCT]: ...
+@overload
+def ones_like(
+ a: object,
+ dtype: None = ...,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
+@overload
+def ones_like(
+ a: Any,
+ dtype: _DTypeLike[_SCT],
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[_SCT]: ...
+@overload
+def ones_like(
+ a: Any,
+ dtype: DTypeLike,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
+@overload
def full(
shape: _ShapeLike,
fill_value: Any,
- dtype: DTypeLike = ...,
+ dtype: None = ...,
+ order: _OrderCF = ...,
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[Any]: ...
+@overload
+def full(
+ shape: _ShapeLike,
+ fill_value: Any,
+ dtype: _DTypeLike[_SCT],
order: _OrderCF = ...,
*,
like: ArrayLike = ...,
-) -> ndarray: ...
+) -> NDArray[_SCT]: ...
+@overload
+def full(
+ shape: _ShapeLike,
+ fill_value: Any,
+ dtype: DTypeLike,
+ order: _OrderCF = ...,
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[Any]: ...
@overload
def full_like(
@@ -82,13 +211,40 @@ def full_like(
) -> _ArrayType: ...
@overload
def full_like(
- a: ArrayLike,
+ a: _ArrayLike[_SCT],
fill_value: Any,
- dtype: DTypeLike = ...,
+ dtype: None = ...,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike = ...,
+) -> NDArray[_SCT]: ...
+@overload
+def full_like(
+ a: object,
+ fill_value: Any,
+ dtype: None = ...,
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
+@overload
+def full_like(
+ a: Any,
+ fill_value: Any,
+ dtype: _DTypeLike[_SCT],
+ order: _OrderKACF = ...,
+ subok: bool = ...,
+ shape: None | _ShapeLike= ...,
+) -> NDArray[_SCT]: ...
+@overload
+def full_like(
+ a: Any,
+ fill_value: Any,
+ dtype: DTypeLike,
order: _OrderKACF = ...,
subok: bool = ...,
- shape: Optional[_ShapeLike] = ...,
-) -> ndarray: ...
+ shape: None | _ShapeLike= ...,
+) -> NDArray[Any]: ...
@overload
def count_nonzero(
@@ -105,78 +261,306 @@ def count_nonzero(
keepdims: bool = ...,
) -> Any: ... # TODO: np.intp or ndarray[np.intp]
-def isfortran(a: Union[ndarray, generic]) -> bool: ...
+def isfortran(a: NDArray[Any] | generic) -> bool: ...
-def argwhere(a: ArrayLike) -> ndarray: ...
+def argwhere(a: ArrayLike) -> NDArray[intp]: ...
-def flatnonzero(a: ArrayLike) -> ndarray: ...
+def flatnonzero(a: ArrayLike) -> NDArray[intp]: ...
+@overload
def correlate(
- a: ArrayLike,
- v: ArrayLike,
+ a: _ArrayLikeBool_co,
+ v: _ArrayLikeBool_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[bool_]: ...
+@overload
+def correlate(
+ a: _ArrayLikeUInt_co,
+ v: _ArrayLikeUInt_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[unsignedinteger[Any]]: ...
+@overload
+def correlate(
+ a: _ArrayLikeInt_co,
+ v: _ArrayLikeInt_co,
mode: _CorrelateMode = ...,
-) -> ndarray: ...
+) -> NDArray[signedinteger[Any]]: ...
+@overload
+def correlate(
+ a: _ArrayLikeFloat_co,
+ v: _ArrayLikeFloat_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[floating[Any]]: ...
+@overload
+def correlate(
+ a: _ArrayLikeComplex_co,
+ v: _ArrayLikeComplex_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[complexfloating[Any, Any]]: ...
+@overload
+def correlate(
+ a: _ArrayLikeTD64_co,
+ v: _ArrayLikeTD64_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[timedelta64]: ...
+@overload
+def correlate(
+ a: _ArrayLikeObject_co,
+ v: _ArrayLikeObject_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[object_]: ...
+@overload
def convolve(
- a: ArrayLike,
- v: ArrayLike,
+ a: _ArrayLikeBool_co,
+ v: _ArrayLikeBool_co,
mode: _CorrelateMode = ...,
-) -> ndarray: ...
+) -> NDArray[bool_]: ...
+@overload
+def convolve(
+ a: _ArrayLikeUInt_co,
+ v: _ArrayLikeUInt_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[unsignedinteger[Any]]: ...
+@overload
+def convolve(
+ a: _ArrayLikeInt_co,
+ v: _ArrayLikeInt_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[signedinteger[Any]]: ...
+@overload
+def convolve(
+ a: _ArrayLikeFloat_co,
+ v: _ArrayLikeFloat_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[floating[Any]]: ...
+@overload
+def convolve(
+ a: _ArrayLikeComplex_co,
+ v: _ArrayLikeComplex_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[complexfloating[Any, Any]]: ...
+@overload
+def convolve(
+ a: _ArrayLikeTD64_co,
+ v: _ArrayLikeTD64_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[timedelta64]: ...
+@overload
+def convolve(
+ a: _ArrayLikeObject_co,
+ v: _ArrayLikeObject_co,
+ mode: _CorrelateMode = ...,
+) -> NDArray[object_]: ...
@overload
def outer(
- a: ArrayLike,
- b: ArrayLike,
+ a: _ArrayLikeBool_co,
+ b: _ArrayLikeBool_co,
out: None = ...,
-) -> ndarray: ...
+) -> NDArray[bool_]: ...
@overload
def outer(
- a: ArrayLike,
- b: ArrayLike,
- out: _ArrayType = ...,
+ a: _ArrayLikeUInt_co,
+ b: _ArrayLikeUInt_co,
+ out: None = ...,
+) -> NDArray[unsignedinteger[Any]]: ...
+@overload
+def outer(
+ a: _ArrayLikeInt_co,
+ b: _ArrayLikeInt_co,
+ out: None = ...,
+) -> NDArray[signedinteger[Any]]: ...
+@overload
+def outer(
+ a: _ArrayLikeFloat_co,
+ b: _ArrayLikeFloat_co,
+ out: None = ...,
+) -> NDArray[floating[Any]]: ...
+@overload
+def outer(
+ a: _ArrayLikeComplex_co,
+ b: _ArrayLikeComplex_co,
+ out: None = ...,
+) -> NDArray[complexfloating[Any, Any]]: ...
+@overload
+def outer(
+ a: _ArrayLikeTD64_co,
+ b: _ArrayLikeTD64_co,
+ out: None = ...,
+) -> NDArray[timedelta64]: ...
+@overload
+def outer(
+ a: _ArrayLikeObject_co,
+ b: _ArrayLikeObject_co,
+ out: None = ...,
+) -> NDArray[object_]: ...
+@overload
+def outer(
+ a: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
+ b: _ArrayLikeComplex_co | _ArrayLikeTD64_co | _ArrayLikeObject_co,
+ out: _ArrayType,
) -> _ArrayType: ...
+@overload
def tensordot(
- a: ArrayLike,
- b: ArrayLike,
- axes: Union[int, Tuple[_ShapeLike, _ShapeLike]] = ...,
-) -> ndarray: ...
+ a: _ArrayLikeBool_co,
+ b: _ArrayLikeBool_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[bool_]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeUInt_co,
+ b: _ArrayLikeUInt_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[unsignedinteger[Any]]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeInt_co,
+ b: _ArrayLikeInt_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[signedinteger[Any]]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeFloat_co,
+ b: _ArrayLikeFloat_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[floating[Any]]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeComplex_co,
+ b: _ArrayLikeComplex_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[complexfloating[Any, Any]]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeTD64_co,
+ b: _ArrayLikeTD64_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[timedelta64]: ...
+@overload
+def tensordot(
+ a: _ArrayLikeObject_co,
+ b: _ArrayLikeObject_co,
+ axes: int | Tuple[_ShapeLike, _ShapeLike] = ...,
+) -> NDArray[object_]: ...
+@overload
+def roll(
+ a: _ArrayLike[_SCT],
+ shift: _ShapeLike,
+ axis: None | _ShapeLike = ...,
+) -> NDArray[_SCT]: ...
+@overload
def roll(
a: ArrayLike,
shift: _ShapeLike,
- axis: Optional[_ShapeLike] = ...,
-) -> ndarray: ...
+ axis: None | _ShapeLike = ...,
+) -> NDArray[Any]: ...
-def rollaxis(a: ndarray, axis: int, start: int = ...) -> ndarray: ...
+def rollaxis(
+ a: NDArray[_SCT],
+ axis: int,
+ start: int = ...,
+) -> NDArray[_SCT]: ...
def moveaxis(
- a: ndarray,
+ a: NDArray[_SCT],
source: _ShapeLike,
destination: _ShapeLike,
-) -> ndarray: ...
+) -> NDArray[_SCT]: ...
+@overload
def cross(
- a: ArrayLike,
- b: ArrayLike,
+ a: _ArrayLikeBool_co,
+ b: _ArrayLikeBool_co,
axisa: int = ...,
axisb: int = ...,
axisc: int = ...,
- axis: Optional[int] = ...,
-) -> ndarray: ...
+ axis: None | int = ...,
+) -> NoReturn: ...
+@overload
+def cross(
+ a: _ArrayLikeUInt_co,
+ b: _ArrayLikeUInt_co,
+ axisa: int = ...,
+ axisb: int = ...,
+ axisc: int = ...,
+ axis: None | int = ...,
+) -> NDArray[unsignedinteger[Any]]: ...
+@overload
+def cross(
+ a: _ArrayLikeInt_co,
+ b: _ArrayLikeInt_co,
+ axisa: int = ...,
+ axisb: int = ...,
+ axisc: int = ...,
+ axis: None | int = ...,
+) -> NDArray[signedinteger[Any]]: ...
+@overload
+def cross(
+ a: _ArrayLikeFloat_co,
+ b: _ArrayLikeFloat_co,
+ axisa: int = ...,
+ axisb: int = ...,
+ axisc: int = ...,
+ axis: None | int = ...,
+) -> NDArray[floating[Any]]: ...
+@overload
+def cross(
+ a: _ArrayLikeComplex_co,
+ b: _ArrayLikeComplex_co,
+ axisa: int = ...,
+ axisb: int = ...,
+ axisc: int = ...,
+ axis: None | int = ...,
+) -> NDArray[complexfloating[Any, Any]]: ...
+@overload
+def cross(
+ a: _ArrayLikeObject_co,
+ b: _ArrayLikeObject_co,
+ axisa: int = ...,
+ axisb: int = ...,
+ axisc: int = ...,
+ axis: None | int = ...,
+) -> NDArray[object_]: ...
@overload
def indices(
dimensions: Sequence[int],
- dtype: DTypeLike = ...,
+ dtype: Type[int] = ...,
sparse: Literal[False] = ...,
-) -> ndarray: ...
+) -> NDArray[int_]: ...
@overload
def indices(
dimensions: Sequence[int],
- dtype: DTypeLike = ...,
+ dtype: Type[int] = ...,
sparse: Literal[True] = ...,
-) -> Tuple[ndarray, ...]: ...
+) -> Tuple[NDArray[int_], ...]: ...
+@overload
+def indices(
+ dimensions: Sequence[int],
+ dtype: _DTypeLike[_SCT],
+ sparse: Literal[False] = ...,
+) -> NDArray[_SCT]: ...
+@overload
+def indices(
+ dimensions: Sequence[int],
+ dtype: _DTypeLike[_SCT],
+ sparse: Literal[True],
+) -> Tuple[NDArray[_SCT], ...]: ...
+@overload
+def indices(
+ dimensions: Sequence[int],
+ dtype: DTypeLike,
+ sparse: Literal[False] = ...,
+) -> NDArray[Any]: ...
+@overload
+def indices(
+ dimensions: Sequence[int],
+ dtype: DTypeLike,
+ sparse: Literal[True],
+) -> Tuple[NDArray[Any], ...]: ...
def fromfunction(
function: Callable[..., _T],
@@ -187,18 +571,39 @@ def fromfunction(
**kwargs: Any,
) -> _T: ...
-def isscalar(element: Any) -> bool: ...
+def isscalar(element: object) -> TypeGuard[
+ generic | bool | int | float | complex | str | bytes | memoryview
+]: ...
-def binary_repr(num: int, width: Optional[int] = ...) -> str: ...
+def binary_repr(num: int, width: None | int = ...) -> str: ...
-def base_repr(number: int, base: int = ..., padding: int = ...) -> str: ...
+def base_repr(
+ number: SupportsAbs[float],
+ base: float = ...,
+ padding: SupportsIndex = ...,
+) -> str: ...
+@overload
def identity(
n: int,
- dtype: DTypeLike = ...,
+ dtype: None = ...,
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[float64]: ...
+@overload
+def identity(
+ n: int,
+ dtype: _DTypeLike[_SCT],
+ *,
+ like: ArrayLike = ...,
+) -> NDArray[_SCT]: ...
+@overload
+def identity(
+ n: int,
+ dtype: DTypeLike,
*,
like: ArrayLike = ...,
-) -> ndarray: ...
+) -> NDArray[Any]: ...
def allclose(
a: ArrayLike,
@@ -208,13 +613,22 @@ def allclose(
equal_nan: bool = ...,
) -> bool: ...
+@overload
+def isclose(
+ a: _ScalarLike_co,
+ b: _ScalarLike_co,
+ rtol: float = ...,
+ atol: float = ...,
+ equal_nan: bool = ...,
+) -> bool_: ...
+@overload
def isclose(
a: ArrayLike,
b: ArrayLike,
rtol: float = ...,
atol: float = ...,
equal_nan: bool = ...,
-) -> Any: ...
+) -> NDArray[bool_]: ...
def array_equal(a1: ArrayLike, a2: ArrayLike, equal_nan: bool = ...) -> bool: ...
diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py
index 12f424fd4..8e5de852b 100644
--- a/numpy/core/numerictypes.py
+++ b/numpy/core/numerictypes.py
@@ -80,12 +80,10 @@ Exported symbols include:
"""
import numbers
-import warnings
from numpy.core.multiarray import (
- typeinfo, ndarray, array, empty, dtype, datetime_data,
- datetime_as_string, busday_offset, busday_count, is_busday,
- busdaycalendar
+ ndarray, array, dtype, datetime_data, datetime_as_string,
+ busday_offset, busday_count, is_busday, busdaycalendar
)
from numpy.core.overrides import set_module
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 2b0e33244..1ec178445 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -672,16 +672,38 @@ def configuration(parent_package='',top_path=None):
# but we cannot use add_installed_pkg_config here either, so we only
# update the substitution dictionary during npymath build
config_cmd = config.get_config_cmd()
-
# Check that the toolchain works, to fail early if it doesn't
# (avoid late errors with MATHLIB which are confusing if the
# compiler does not work).
- st = config_cmd.try_link('int main(void) { return 0;}')
- if not st:
- # rerun the failing command in verbose mode
- config_cmd.compiler.verbose = True
- config_cmd.try_link('int main(void) { return 0;}')
- raise RuntimeError("Broken toolchain: cannot link a simple C program")
+ for lang, test_code, note in (
+ ('c', 'int main(void) { return 0;}', ''),
+ ('c++', (
+ 'int main(void)'
+ '{ auto x = 0.0; return static_cast<int>(x); }'
+ ), (
+ 'note: A compiler with support for C++11 language '
+ 'features is required.'
+ )
+ ),
+ ):
+ is_cpp = lang == 'c++'
+ if is_cpp:
+ # this a workround to get rid of invalid c++ flags
+ # without doing big changes to config.
+ # c tested first, compiler should be here
+ bk_c = config_cmd.compiler
+ config_cmd.compiler = bk_c.cxx_compiler()
+ st = config_cmd.try_link(test_code, lang=lang)
+ if not st:
+ # rerun the failing command in verbose mode
+ config_cmd.compiler.verbose = True
+ config_cmd.try_link(test_code, lang=lang)
+ raise RuntimeError(
+ f"Broken toolchain: cannot link a simple {lang.upper()} "
+ f"program. {note}"
+ )
+ if is_cpp:
+ config_cmd.compiler = bk_c
mlibs = check_mathlib(config_cmd)
posix_mlib = ' '.join(['-l%s' % l for l in mlibs])
@@ -696,16 +718,24 @@ def configuration(parent_package='',top_path=None):
join('src', 'npymath', 'halffloat.c')
]
- # Must be true for CRT compilers but not MinGW/cygwin. See gh-9977.
- # Intel and Clang also don't seem happy with /GL
- is_msvc = (platform.platform().startswith('Windows') and
- platform.python_compiler().startswith('MS'))
+ def gl_if_msvc(build_cmd):
+ """ Add flag if we are using MSVC compiler
+
+ We can't see this in our scope, because we have not initialized the
+ distutils build command, so use this deferred calculation to run when
+ we are building the library.
+ """
+ if build_cmd.compiler.compiler_type == 'msvc':
+ # explicitly disable whole-program optimization
+ return ['/GL-']
+ return []
+
config.add_installed_library('npymath',
sources=npymath_sources + [get_mathlib_info],
install_dir='lib',
build_info={
'include_dirs' : [], # empty list required for creating npy_math_internal.h
- 'extra_compiler_args' : (['/GL-'] if is_msvc else []),
+ 'extra_compiler_args': [gl_if_msvc],
})
config.add_npy_pkg_config("npymath.ini.in", "lib/npy-pkg-config",
subst_dict)
@@ -732,6 +762,7 @@ def configuration(parent_package='',top_path=None):
#######################################################################
common_deps = [
+ join('src', 'common', 'dlpack', 'dlpack.h'),
join('src', 'common', 'array_assign.h'),
join('src', 'common', 'binop_override.h'),
join('src', 'common', 'cblasfuncs.h'),
@@ -741,6 +772,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'common', 'npy_cblas.h'),
join('src', 'common', 'npy_config.h'),
join('src', 'common', 'npy_ctypes.h'),
+ join('src', 'common', 'npy_dlpack.h'),
join('src', 'common', 'npy_extint128.h'),
join('src', 'common', 'npy_import.h'),
join('src', 'common', 'npy_hashtable.h'),
@@ -873,6 +905,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'multiarray', 'datetime_busday.c'),
join('src', 'multiarray', 'datetime_busdaycal.c'),
join('src', 'multiarray', 'descriptor.c'),
+ join('src', 'multiarray', 'dlpack.c'),
join('src', 'multiarray', 'dtypemeta.c'),
join('src', 'multiarray', 'dragon4.c'),
join('src', 'multiarray', 'dtype_transfer.c'),
@@ -909,7 +942,7 @@ def configuration(parent_package='',top_path=None):
join('src', 'npysort', 'mergesort.c.src'),
join('src', 'npysort', 'timsort.c.src'),
join('src', 'npysort', 'heapsort.c.src'),
- join('src', 'npysort', 'radixsort.c.src'),
+ join('src', 'npysort', 'radixsort.cpp'),
join('src', 'common', 'npy_partition.h.src'),
join('src', 'npysort', 'selection.c.src'),
join('src', 'common', 'npy_binsearch.h.src'),
@@ -949,8 +982,8 @@ def configuration(parent_package='',top_path=None):
join('src', 'umath', 'loops_exponent_log.dispatch.c.src'),
join('src', 'umath', 'matmul.h.src'),
join('src', 'umath', 'matmul.c.src'),
- join('src', 'umath', 'clip.h.src'),
- join('src', 'umath', 'clip.c.src'),
+ join('src', 'umath', 'clip.h'),
+ join('src', 'umath', 'clip.cpp'),
join('src', 'umath', 'dispatching.c'),
join('src', 'umath', 'legacy_array_method.c'),
join('src', 'umath', 'ufunc_object.c'),
@@ -980,6 +1013,9 @@ def configuration(parent_package='',top_path=None):
svml_objs = glob.glob(svml_path + '/**/*.s', recursive=True)
config.add_extension('_multiarray_umath',
+ # Forcing C language even though we have C++ sources.
+ # It forces the C linker and don't link C++ runtime.
+ language = 'c',
sources=multiarray_src + umath_src +
common_src +
[generate_config_h,
@@ -994,7 +1030,11 @@ def configuration(parent_package='',top_path=None):
common_deps,
libraries=['npymath'],
extra_objects=svml_objs,
- extra_info=extra_info)
+ extra_info=extra_info,
+ extra_cxx_compile_args=['-std=c++11',
+ '-D__STDC_VERSION__=0',
+ '-fno-exceptions',
+ '-fno-rtti'])
#######################################################################
# umath_tests module #
diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py
index 85c8f16d1..772c87c96 100644
--- a/numpy/core/setup_common.py
+++ b/numpy/core/setup_common.py
@@ -43,8 +43,9 @@ C_ABI_VERSION = 0x01000009
# 0x0000000d - 1.19.x
# 0x0000000e - 1.20.x
# 0x0000000e - 1.21.x
-# 0x0000000e - 1.22.x
-C_API_VERSION = 0x0000000e
+# 0x0000000f - 1.22.x
+# 0x0000000f - 1.23.x
+C_API_VERSION = 0x0000000f
class MismatchCAPIWarning(Warning):
pass
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index 54770959c..84de9a059 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -381,7 +381,7 @@ SIMD_IMPL_INTRIN_1(sumup_@sfx@, @esfx@, v@sfx@)
***************************/
#if @fp_only@
/**begin repeat1
- * #intrin = sqrt, recip, abs, square#
+ * #intrin = sqrt, recip, abs, square, ceil, trunc#
*/
SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@)
/**end repeat1**/
@@ -615,7 +615,7 @@ SIMD_INTRIN_DEF(sumup_@sfx@)
***************************/
#if @fp_only@
/**begin repeat1
- * #intrin = sqrt, recip, abs, square#
+ * #intrin = sqrt, recip, abs, square, ceil, trunc#
*/
SIMD_INTRIN_DEF(@intrin@_@sfx@)
/**end repeat1**/
diff --git a/numpy/core/src/common/dlpack/dlpack.h b/numpy/core/src/common/dlpack/dlpack.h
new file mode 100644
index 000000000..29209aee1
--- /dev/null
+++ b/numpy/core/src/common/dlpack/dlpack.h
@@ -0,0 +1,201 @@
+// Taken from:
+// https://github.com/dmlc/dlpack/blob/9b6176fdecb55e9bf39b16f08b96913ed3f275b4/include/dlpack/dlpack.h
+/*!
+ * Copyright (c) 2017 by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current version of dlpack */
+#define DLPACK_VERSION 050
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*!
+ * \brief The device type in DLDevice.
+ */
+typedef enum {
+ /*! \brief CPU device */
+ kDLCPU = 1,
+ /*! \brief CUDA GPU device */
+ kDLCUDA = 2,
+ /*!
+ * \brief Pinned CUDA CPU memory by cudaMallocHost
+ */
+ kDLCUDAHost = 3,
+ /*! \brief OpenCL devices. */
+ kDLOpenCL = 4,
+ /*! \brief Vulkan buffer for next generation graphics. */
+ kDLVulkan = 7,
+ /*! \brief Metal for Apple GPU. */
+ kDLMetal = 8,
+ /*! \brief Verilog simulator buffer */
+ kDLVPI = 9,
+ /*! \brief ROCm GPUs for AMD GPUs */
+ kDLROCM = 10,
+ /*!
+ * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+ */
+ kDLROCMHost = 11,
+ /*!
+ * \brief Reserved extension device type,
+ * used for quickly test extension device
+ * The semantics can differ depending on the implementation.
+ */
+ kDLExtDev = 12,
+ /*!
+ * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+ */
+ kDLCUDAManaged = 13,
+} DLDeviceType;
+
+/*!
+ * \brief A Device for Tensor and operator.
+ */
+typedef struct {
+ /*! \brief The device type used in the device. */
+ DLDeviceType device_type;
+ /*!
+ * \brief The device index.
+ * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
+ */
+ int device_id;
+} DLDevice;
+
+/*!
+ * \brief The type code options DLDataType.
+ */
+typedef enum {
+ /*! \brief signed integer */
+ kDLInt = 0U,
+ /*! \brief unsigned integer */
+ kDLUInt = 1U,
+ /*! \brief IEEE floating point */
+ kDLFloat = 2U,
+ /*!
+ * \brief Opaque handle type, reserved for testing purposes.
+ * Frameworks need to agree on the handle data type for the exchange to be well-defined.
+ */
+ kDLOpaqueHandle = 3U,
+ /*! \brief bfloat16 */
+ kDLBfloat = 4U,
+ /*!
+ * \brief complex number
+ * (C/C++/Python layout: compact struct per complex number)
+ */
+ kDLComplex = 5U,
+} DLDataTypeCode;
+
+/*!
+ * \brief The data type the tensor can hold.
+ *
+ * Examples
+ * - float: type_code = 2, bits = 32, lanes=1
+ * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
+ * - int8: type_code = 0, bits = 8, lanes=1
+ * - std::complex<float>: type_code = 5, bits = 64, lanes = 1
+ */
+typedef struct {
+ /*!
+ * \brief Type code of base types.
+ * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+ * footprint, but the value should be one of DLDataTypeCode enum values.
+ * */
+ uint8_t code;
+ /*!
+ * \brief Number of bits, common choices are 8, 16, 32.
+ */
+ uint8_t bits;
+ /*! \brief Number of lanes in the type, used for vector types. */
+ uint16_t lanes;
+} DLDataType;
+
+/*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+typedef struct {
+ /*!
+ * \brief The opaque data pointer points to the allocated data. This will be
+ * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always
+ * aligned to 256 bytes as in CUDA.
+ *
+ * For given DLTensor, the size of memory required to store the contents of
+ * data is calculated as follows:
+ *
+ * \code{.c}
+ * static inline size_t GetDataSize(const DLTensor* t) {
+ * size_t size = 1;
+ * for (tvm_index_t i = 0; i < t->ndim; ++i) {
+ * size *= t->shape[i];
+ * }
+ * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+ * return size;
+ * }
+ * \endcode
+ */
+ void* data;
+ /*! \brief The device of the tensor */
+ DLDevice device;
+ /*! \brief Number of dimensions */
+ int ndim;
+ /*! \brief The data type of the pointer*/
+ DLDataType dtype;
+ /*! \brief The shape of the tensor */
+ int64_t* shape;
+ /*!
+ * \brief strides of the tensor (in number of elements, not bytes)
+ * can be NULL, indicating tensor is compact and row-majored.
+ */
+ int64_t* strides;
+ /*! \brief The offset in bytes to the beginning pointer to data */
+ uint64_t byte_offset;
+} DLTensor;
+
+/*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ * intended to facilitate the borrowing of DLTensor by another framework. It is
+ * not meant to transfer the tensor. When the borrowing framework doesn't need
+ * the tensor, it should call the deleter to notify the host that the resource
+ * is no longer needed.
+ */
+typedef struct DLManagedTensor {
+ /*! \brief DLTensor which is being memory managed */
+ DLTensor dl_tensor;
+ /*! \brief the context of the original host framework of DLManagedTensor in
+ * which DLManagedTensor is used in the framework. It can also be NULL.
+ */
+ void * manager_ctx;
+ /*! \brief Destructor signature void (*)(void*) - this should be called
+ * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
+ * if there is no way for the caller to provide a reasonable destructor.
+ * The destructors deletes the argument self as well.
+ */
+ void (*deleter)(struct DLManagedTensor * self);
+} DLManagedTensor;
+#ifdef __cplusplus
+} // DLPACK_EXTERN_C
+#endif
+#endif // DLPACK_DLPACK_H_
diff --git a/numpy/core/src/common/npy_dlpack.h b/numpy/core/src/common/npy_dlpack.h
new file mode 100644
index 000000000..14ca352c0
--- /dev/null
+++ b/numpy/core/src/common/npy_dlpack.h
@@ -0,0 +1,28 @@
+#include "Python.h"
+#include "dlpack/dlpack.h"
+
+#ifndef NPY_DLPACK_H
+#define NPY_DLPACK_H
+
+// Part of the Array API specification.
+#define NPY_DLPACK_CAPSULE_NAME "dltensor"
+#define NPY_DLPACK_USED_CAPSULE_NAME "used_dltensor"
+
+// Used internally by NumPy to store a base object
+// as it has to release a reference to the original
+// capsule.
+#define NPY_DLPACK_INTERNAL_CAPSULE_NAME "numpy_dltensor"
+
+PyObject *
+array_dlpack(PyArrayObject *self, PyObject *const *args, Py_ssize_t len_args,
+ PyObject *kwnames);
+
+
+PyObject *
+array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args));
+
+
+NPY_NO_EXPORT PyObject *
+_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj);
+
+#endif
diff --git a/numpy/core/src/common/npy_sort.h.src b/numpy/core/src/common/npy_sort.h.src
index ddbde0c9b..b4a1e9b0c 100644
--- a/numpy/core/src/common/npy_sort.h.src
+++ b/numpy/core/src/common/npy_sort.h.src
@@ -49,9 +49,14 @@ NPY_NO_EXPORT int atimsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
* longlong, ulonglong#
*/
-
+#ifdef __cplusplus
+extern "C" {
+#endif
NPY_NO_EXPORT int radixsort_@suff@(void *vec, npy_intp cnt, void *null);
NPY_NO_EXPORT int aradixsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null);
+#ifdef __cplusplus
+}
+#endif
/**end repeat**/
diff --git a/numpy/core/src/common/numpy_tag.h b/numpy/core/src/common/numpy_tag.h
new file mode 100644
index 000000000..dc8d5286b
--- /dev/null
+++ b/numpy/core/src/common/numpy_tag.h
@@ -0,0 +1,78 @@
+#ifndef _NPY_COMMON_TAG_H_
+#define _NPY_COMMON_TAG_H_
+
+namespace npy {
+
+struct integral_tag {
+};
+struct floating_point_tag {
+};
+struct complex_tag {
+};
+struct date_tag {
+};
+
+struct bool_tag : integral_tag {
+ using type = npy_bool;
+};
+struct byte_tag : integral_tag {
+ using type = npy_byte;
+};
+struct ubyte_tag : integral_tag {
+ using type = npy_ubyte;
+};
+struct short_tag : integral_tag {
+ using type = npy_short;
+};
+struct ushort_tag : integral_tag {
+ using type = npy_ushort;
+};
+struct int_tag : integral_tag {
+ using type = npy_int;
+};
+struct uint_tag : integral_tag {
+ using type = npy_uint;
+};
+struct long_tag : integral_tag {
+ using type = npy_long;
+};
+struct ulong_tag : integral_tag {
+ using type = npy_ulong;
+};
+struct longlong_tag : integral_tag {
+ using type = npy_longlong;
+};
+struct ulonglong_tag : integral_tag {
+ using type = npy_ulonglong;
+};
+struct half_tag {
+ using type = npy_half;
+};
+struct float_tag : floating_point_tag {
+ using type = npy_float;
+};
+struct double_tag : floating_point_tag {
+ using type = npy_double;
+};
+struct longdouble_tag : floating_point_tag {
+ using type = npy_longdouble;
+};
+struct cfloat_tag : complex_tag {
+ using type = npy_cfloat;
+};
+struct cdouble_tag : complex_tag {
+ using type = npy_cdouble;
+};
+struct clongdouble_tag : complex_tag {
+ using type = npy_clongdouble;
+};
+struct datetime_tag : date_tag {
+ using type = npy_datetime;
+};
+struct timedelta_tag : date_tag {
+ using type = npy_timedelta;
+};
+
+} // namespace npy
+
+#endif
diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h
index 9460183df..ec15e50e1 100644
--- a/numpy/core/src/common/simd/avx2/math.h
+++ b/numpy/core/src/common/simd/avx2/math.h
@@ -105,4 +105,12 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return _mm256_blendv_epi8(a, b, _mm256_cmpgt_epi64(a, b));
}
+// ceil
+#define npyv_ceil_f32 _mm256_ceil_ps
+#define npyv_ceil_f64 _mm256_ceil_pd
+
+// trunc
+#define npyv_trunc_f32(A) _mm256_round_ps(A, _MM_FROUND_TO_ZERO)
+#define npyv_trunc_f64(A) _mm256_round_pd(A, _MM_FROUND_TO_ZERO)
+
#endif // _NPY_SIMD_AVX2_MATH_H
diff --git a/numpy/core/src/common/simd/avx2/memory.h b/numpy/core/src/common/simd/avx2/memory.h
index e27bf15fe..5891a270a 100644
--- a/numpy/core/src/common/simd/avx2/memory.h
+++ b/numpy/core/src/common/simd/avx2/memory.h
@@ -87,7 +87,7 @@ NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride)
#if 0 // slower
NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride)
{
- const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride);
+ const __m256i idx = npyv_set_s64(0, 1*stride, 2*stride, 3*stride);
return _mm256_i64gather_epi64((const void*)ptr, idx, 8);
}
NPY_FINLINE npyv_s64 npyv_loadn_s64(const npy_int64 *ptr, npy_intp stride)
@@ -170,9 +170,9 @@ NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane)
NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill)
{
assert(nlane > 0);
- const __m256i vfill = _mm256_set1_epi64x(fill);
- const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3);
- __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane);
+ const __m256i vfill = npyv_setall_s64(fill);
+ const __m256i steps = npyv_set_s64(0, 1, 2, 3);
+ __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
__m256i payload = _mm256_maskload_epi64((const void*)ptr, mask);
return _mm256_blendv_epi8(vfill, payload, mask);
@@ -181,8 +181,8 @@ NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, n
NPY_FINLINE npyv_s64 npyv_load_tillz_s64(const npy_int64 *ptr, npy_uintp nlane)
{
assert(nlane > 0);
- const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3);
- __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane);
+ const __m256i steps = npyv_set_s64(0, 1, 2, 3);
+ __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
return _mm256_maskload_epi64((const void*)ptr, mask);
}
@@ -211,10 +211,10 @@ NPY_FINLINE npyv_s64
npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill)
{
assert(nlane > 0);
- const __m256i vfill = _mm256_set1_epi64x(fill);
- const __m256i idx = _mm256_setr_epi64x(0, 1*stride, 2*stride, 3*stride);
- const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3);
- __m256i vnlane = _mm256_set1_epi64x(nlane > 4 ? 4 : (int)nlane);
+ const __m256i vfill = npyv_setall_s64(fill);
+ const __m256i idx = npyv_set_s64(0, 1*stride, 2*stride, 3*stride);
+ const __m256i steps = npyv_set_s64(0, 1, 2, 3);
+ __m256i vnlane = npyv_setall_s64(nlane > 4 ? 4 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
return _mm256_mask_i64gather_epi64(vfill, (const void*)ptr, idx, mask, 8);
}
@@ -238,8 +238,8 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a
NPY_FINLINE void npyv_store_till_s64(npy_int64 *ptr, npy_uintp nlane, npyv_s64 a)
{
assert(nlane > 0);
- const __m256i steps = _mm256_setr_epi64x(0, 1, 2, 3);
- __m256i vnlane = _mm256_set1_epi64x(nlane > 8 ? 8 : (int)nlane);
+ const __m256i steps = npyv_set_s64(0, 1, 2, 3);
+ __m256i vnlane = npyv_setall_s64(nlane > 8 ? 8 : (int)nlane);
__m256i mask = _mm256_cmpgt_epi64(vnlane, steps);
_mm256_maskstore_epi64((void*)ptr, mask, a);
}
diff --git a/numpy/core/src/common/simd/avx2/misc.h b/numpy/core/src/common/simd/avx2/misc.h
index e96696dc9..5e91e91b3 100644
--- a/numpy/core/src/common/simd/avx2/misc.h
+++ b/numpy/core/src/common/simd/avx2/misc.h
@@ -24,11 +24,27 @@
#define npyv_setall_s16(VAL) _mm256_set1_epi16((short)VAL)
#define npyv_setall_u32(VAL) _mm256_set1_epi32((int)VAL)
#define npyv_setall_s32(VAL) _mm256_set1_epi32(VAL)
-#define npyv_setall_u64(VAL) _mm256_set1_epi64x(VAL)
-#define npyv_setall_s64(VAL) _mm256_set1_epi64x(VAL)
#define npyv_setall_f32(VAL) _mm256_set1_ps(VAL)
#define npyv_setall_f64(VAL) _mm256_set1_pd(VAL)
+NPY_FINLINE __m256i npyv__setr_epi64(npy_int64, npy_int64, npy_int64, npy_int64);
+NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a)
+{
+ npy_int64 ai = (npy_int64)a;
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64(ai, ai, ai, ai);
+#else
+ return _mm256_set1_epi64x(ai);
+#endif
+}
+NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a)
+{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64(a, a, a, a);
+#else
+ return _mm256_set1_epi64x(a);
+#endif
+}
/*
* vector with specific values set to each lane and
* set a specific value to all remained lanes
@@ -59,7 +75,14 @@ NPY_FINLINE __m256i npyv__setr_epi32(int i0, int i1, int i2, int i3, int i4, int
}
NPY_FINLINE __m256i npyv__setr_epi64(npy_int64 i0, npy_int64 i1, npy_int64 i2, npy_int64 i3)
{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return _mm256_setr_epi32(
+ (int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32),
+ (int)i2, (int)(i2 >> 32), (int)i3, (int)(i3 >> 32)
+ );
+#else
return _mm256_setr_epi64x(i0, i1, i2, i3);
+#endif
}
NPY_FINLINE __m256 npyv__setr_ps(float i0, float i1, float i2, float i3, float i4, float i5,
diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h
index 0141396d0..f30e50ad0 100644
--- a/numpy/core/src/common/simd/avx512/math.h
+++ b/numpy/core/src/common/simd/avx512/math.h
@@ -35,7 +35,7 @@ NPY_FINLINE npyv_f64 npyv_abs_f64(npyv_f64 a)
return _mm512_range_pd(a, a, 8);
#else
return npyv_and_f64(
- a, _mm512_castsi512_pd(_mm512_set1_epi64(0x7fffffffffffffffLL))
+ a, _mm512_castsi512_pd(npyv_setall_s64(0x7fffffffffffffffLL))
);
#endif
}
@@ -112,4 +112,12 @@ NPY_FINLINE npyv_f64 npyv_minp_f64(npyv_f64 a, npyv_f64 b)
#define npyv_min_u64 _mm512_min_epu64
#define npyv_min_s64 _mm512_min_epi64
+// ceil
+#define npyv_ceil_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_POS_INF)
+#define npyv_ceil_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_POS_INF)
+
+// trunc
+#define npyv_trunc_f32(A) _mm512_roundscale_ps(A, _MM_FROUND_TO_ZERO)
+#define npyv_trunc_f64(A) _mm512_roundscale_pd(A, _MM_FROUND_TO_ZERO)
+
#endif // _NPY_SIMD_AVX512_MATH_H
diff --git a/numpy/core/src/common/simd/avx512/memory.h b/numpy/core/src/common/simd/avx512/memory.h
index bffd6e907..47095bf72 100644
--- a/numpy/core/src/common/simd/avx512/memory.h
+++ b/numpy/core/src/common/simd/avx512/memory.h
@@ -110,7 +110,7 @@ NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride)
//// 64
NPY_FINLINE npyv_u64 npyv_loadn_u64(const npy_uint64 *ptr, npy_intp stride)
{
- const __m512i idx = _mm512_setr_epi64(
+ const __m512i idx = npyv_set_s64(
0*stride, 1*stride, 2*stride, 3*stride,
4*stride, 5*stride, 6*stride, 7*stride
);
@@ -140,7 +140,7 @@ NPY_FINLINE void npyv_storen_f32(float *ptr, npy_intp stride, npyv_f32 a)
//// 64
NPY_FINLINE void npyv_storen_u64(npy_uint64 *ptr, npy_intp stride, npyv_u64 a)
{
- const __m512i idx = _mm512_setr_epi64(
+ const __m512i idx = npyv_set_s64(
0*stride, 1*stride, 2*stride, 3*stride,
4*stride, 5*stride, 6*stride, 7*stride
);
@@ -173,7 +173,7 @@ NPY_FINLINE npyv_s32 npyv_load_tillz_s32(const npy_int32 *ptr, npy_uintp nlane)
NPY_FINLINE npyv_s64 npyv_load_till_s64(const npy_int64 *ptr, npy_uintp nlane, npy_int64 fill)
{
assert(nlane > 0);
- const __m512i vfill = _mm512_set1_epi64(fill);
+ const __m512i vfill = npyv_setall_s64(fill);
const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1;
return _mm512_mask_loadu_epi64(vfill, mask, (const __m512i*)ptr);
}
@@ -210,11 +210,11 @@ NPY_FINLINE npyv_s64
npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_int64 fill)
{
assert(nlane > 0);
- const __m512i idx = _mm512_setr_epi64(
+ const __m512i idx = npyv_set_s64(
0*stride, 1*stride, 2*stride, 3*stride,
4*stride, 5*stride, 6*stride, 7*stride
);
- const __m512i vfill = _mm512_set1_epi64(fill);
+ const __m512i vfill = npyv_setall_s64(fill);
const __mmask8 mask = nlane > 31 ? -1 : (1 << nlane) - 1;
return _mm512_mask_i64gather_epi64(vfill, mask, idx, (const __m512i*)ptr, 8);
}
@@ -258,7 +258,7 @@ NPY_FINLINE void npyv_storen_till_s32(npy_int32 *ptr, npy_intp stride, npy_uintp
NPY_FINLINE void npyv_storen_till_s64(npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npyv_s64 a)
{
assert(nlane > 0);
- const __m512i idx = _mm512_setr_epi64(
+ const __m512i idx = npyv_set_s64(
0*stride, 1*stride, 2*stride, 3*stride,
4*stride, 5*stride, 6*stride, 7*stride
);
diff --git a/numpy/core/src/common/simd/avx512/misc.h b/numpy/core/src/common/simd/avx512/misc.h
index 4b6729b05..c3039ecfe 100644
--- a/numpy/core/src/common/simd/avx512/misc.h
+++ b/numpy/core/src/common/simd/avx512/misc.h
@@ -24,11 +24,30 @@
#define npyv_setall_s16(VAL) _mm512_set1_epi16((short)VAL)
#define npyv_setall_u32(VAL) _mm512_set1_epi32((int)VAL)
#define npyv_setall_s32(VAL) _mm512_set1_epi32(VAL)
-#define npyv_setall_u64(VAL) _mm512_set1_epi64(VAL)
-#define npyv_setall_s64(VAL) _mm512_set1_epi64(VAL)
#define npyv_setall_f32(VAL) _mm512_set1_ps(VAL)
#define npyv_setall_f64(VAL) _mm512_set1_pd(VAL)
+NPY_FINLINE __m512i npyv__setr_epi64(
+ npy_int64, npy_int64, npy_int64, npy_int64,
+ npy_int64, npy_int64, npy_int64, npy_int64
+);
+NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a)
+{
+ npy_int64 ai = (npy_int64)a;
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64(ai, ai, ai, ai, ai, ai, ai, ai);
+#else
+ return _mm512_set1_epi64(ai);
+#endif
+}
+NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a)
+{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64(a, a, a, a, a, a, a, a);
+#else
+ return _mm512_set1_epi64(a);
+#endif
+}
/**
* vector with specific values set to each lane and
* set a specific value to all remained lanes
@@ -76,7 +95,16 @@ NPY_FINLINE __m512i npyv__setr_epi32(
NPY_FINLINE __m512i npyv__setr_epi64(npy_int64 i0, npy_int64 i1, npy_int64 i2, npy_int64 i3,
npy_int64 i4, npy_int64 i5, npy_int64 i6, npy_int64 i7)
{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return _mm512_setr_epi32(
+ (int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32),
+ (int)i2, (int)(i2 >> 32), (int)i3, (int)(i3 >> 32),
+ (int)i4, (int)(i4 >> 32), (int)i5, (int)(i5 >> 32),
+ (int)i6, (int)(i6 >> 32), (int)i7, (int)(i7 >> 32)
+ );
+#else
return _mm512_setr_epi64(i0, i1, i2, i3, i4, i5, i6, i7);
+#endif
}
NPY_FINLINE __m512 npyv__setr_ps(
diff --git a/numpy/core/src/common/simd/avx512/utils.h b/numpy/core/src/common/simd/avx512/utils.h
index 8066283c6..c3079283f 100644
--- a/numpy/core/src/common/simd/avx512/utils.h
+++ b/numpy/core/src/common/simd/avx512/utils.h
@@ -26,7 +26,7 @@
#define npyv512_combine_ps256(A, B) _mm512_insertf32x8(_mm512_castps256_ps512(A), B, 1)
#else
#define npyv512_combine_ps256(A, B) \
- _mm512_castsi512_ps(npyv512_combine_si256(_mm512_castps_si512(A), _mm512_castps_si512(B)))
+ _mm512_castsi512_ps(npyv512_combine_si256(_mm256_castps_si256(A), _mm256_castps_si256(B)))
#endif
#define NPYV_IMPL_AVX512_FROM_AVX2_1ARG(FN_NAME, INTRIN) \
@@ -39,6 +39,26 @@
return npyv512_combine_si256(l_a, h_a); \
}
+#define NPYV_IMPL_AVX512_FROM_AVX2_PS_1ARG(FN_NAME, INTRIN) \
+ NPY_FINLINE __m512 FN_NAME(__m512 a) \
+ { \
+ __m256 l_a = npyv512_lower_ps256(a); \
+ __m256 h_a = npyv512_higher_ps256(a); \
+ l_a = INTRIN(l_a); \
+ h_a = INTRIN(h_a); \
+ return npyv512_combine_ps256(l_a, h_a); \
+ }
+
+#define NPYV_IMPL_AVX512_FROM_AVX2_PD_1ARG(FN_NAME, INTRIN) \
+ NPY_FINLINE __m512d FN_NAME(__m512d a) \
+ { \
+ __m256d l_a = npyv512_lower_pd256(a); \
+ __m256d h_a = npyv512_higher_pd256(a); \
+ l_a = INTRIN(l_a); \
+ h_a = INTRIN(h_a); \
+ return npyv512_combine_pd256(l_a, h_a); \
+ }
+
#define NPYV_IMPL_AVX512_FROM_AVX2_2ARG(FN_NAME, INTRIN) \
NPY_FINLINE __m512i FN_NAME(__m512i a, __m512i b) \
{ \
diff --git a/numpy/core/src/common/simd/intdiv.h b/numpy/core/src/common/simd/intdiv.h
index 5d2ab2906..a7a461721 100644
--- a/numpy/core/src/common/simd/intdiv.h
+++ b/numpy/core/src/common/simd/intdiv.h
@@ -162,11 +162,12 @@ NPY_FINLINE npy_uint64 npyv__divh128_u64(npy_uint64 high, npy_uint64 divisor)
npy_uint32 divisor_hi = divisor >> 32;
npy_uint32 divisor_lo = divisor & 0xFFFFFFFF;
// compute high quotient digit
- npy_uint32 quotient_hi = (npy_uint32)(high / divisor_hi);
+ npy_uint64 quotient_hi = high / divisor_hi;
npy_uint64 remainder = high - divisor_hi * quotient_hi;
npy_uint64 base32 = 1ULL << 32;
while (quotient_hi >= base32 || quotient_hi*divisor_lo > base32*remainder) {
- remainder += --divisor_hi;
+ --quotient_hi;
+ remainder += divisor_hi;
if (remainder >= base32) {
break;
}
@@ -200,7 +201,7 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
default:
l = npyv__bitscan_revnz_u32(d - 1) + 1; // ceil(log2(d))
l2 = (npy_uint8)(1 << l); // 2^l, overflow to 0 if l = 8
- m = ((l2 - d) << 8) / d + 1; // multiplier
+ m = ((npy_uint16)((l2 - d) << 8)) / d + 1; // multiplier
sh1 = 1; sh2 = l - 1; // shift counts
}
npyv_u8x3 divisor;
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index 19ea6f22f..19e5cd846 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -88,16 +88,16 @@ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a)
#define npyv_max_f64 vmaxq_f64
// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
#ifdef NPY_HAVE_ASIMD
#define npyv_maxp_f32 vmaxnmq_f32
#else
NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
- {
+ {
npyv_u32 nn_a = vceqq_f32(a, a);
npyv_u32 nn_b = vceqq_f32(b, b);
return vmaxq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a));
- }
+ }
#endif
#if NPY_SIMD_F64
#define npyv_maxp_f64 vmaxnmq_f64
@@ -123,16 +123,16 @@ NPY_FINLINE npyv_s64 npyv_max_s64(npyv_s64 a, npyv_s64 b)
#define npyv_min_f64 vminq_f64
// Minimum, supports IEEE floating-point arithmetic (IEC 60559),
// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
-// - Only if both corresponded elements are NaN, NaN is set.
+// - Only if both corresponded elements are NaN, NaN is set.
#ifdef NPY_HAVE_ASIMD
#define npyv_minp_f32 vminnmq_f32
#else
NPY_FINLINE npyv_f32 npyv_minp_f32(npyv_f32 a, npyv_f32 b)
- {
+ {
npyv_u32 nn_a = vceqq_f32(a, a);
npyv_u32 nn_b = vceqq_f32(b, b);
return vminq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a));
- }
+ }
#endif
#if NPY_SIMD_F64
#define npyv_minp_f64 vminnmq_f64
@@ -153,4 +153,74 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return vbslq_s64(npyv_cmplt_s64(a, b), a, b);
}
+// ceil
+#ifdef NPY_HAVE_ASIMD
+ #define npyv_ceil_f32 vrndpq_f32
+#else
+ NPY_FINLINE npyv_f32 npyv_ceil_f32(npyv_f32 a)
+ {
+ const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
+ const npyv_u32 one = vreinterpretq_u32_f32(vdupq_n_f32(1.0f));
+ const npyv_s32 max_int = vdupq_n_s32(0x7fffffff);
+ /**
+ * On armv7, vcvtq.f32 handles special cases as follows:
+ * NaN return 0
+ * +inf or +outrange return 0x80000000(-0.0f)
+ * -inf or -outrange return 0x7fffffff(nan)
+ */
+ npyv_s32 roundi = vcvtq_s32_f32(a);
+ npyv_f32 round = vcvtq_f32_s32(roundi);
+ npyv_f32 ceil = vaddq_f32(round, vreinterpretq_f32_u32(
+ vandq_u32(vcltq_f32(round, a), one))
+ );
+ // respect signed zero, e.g. -0.5 -> -0.0
+ npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32(
+ vreinterpretq_s32_f32(ceil),
+ vandq_s32(vreinterpretq_s32_f32(a), szero)
+ ));
+ // if nan or overflow return a
+ npyv_u32 nnan = npyv_notnan_f32(a);
+ npyv_u32 overflow = vorrq_u32(
+ vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int)
+ );
+ return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a);
+ }
+#endif
+#if NPY_SIMD_F64
+ #define npyv_ceil_f64 vrndpq_f64
+#endif // NPY_SIMD_F64
+
+// trunc
+#ifdef NPY_HAVE_ASIMD
+ #define npyv_trunc_f32 vrndq_f32
+#else
+ NPY_FINLINE npyv_f32 npyv_trunc_f32(npyv_f32 a)
+ {
+ const npyv_s32 szero = vreinterpretq_s32_f32(vdupq_n_f32(-0.0f));
+ const npyv_s32 max_int = vdupq_n_s32(0x7fffffff);
+ /**
+ * On armv7, vcvtq.f32 handles special cases as follows:
+ * NaN return 0
+ * +inf or +outrange return 0x80000000(-0.0f)
+ * -inf or -outrange return 0x7fffffff(nan)
+ */
+ npyv_s32 roundi = vcvtq_s32_f32(a);
+ npyv_f32 round = vcvtq_f32_s32(roundi);
+ // respect signed zero, e.g. -0.5 -> -0.0
+ npyv_f32 rzero = vreinterpretq_f32_s32(vorrq_s32(
+ vreinterpretq_s32_f32(round),
+ vandq_s32(vreinterpretq_s32_f32(a), szero)
+ ));
+ // if nan or overflow return a
+ npyv_u32 nnan = npyv_notnan_f32(a);
+ npyv_u32 overflow = vorrq_u32(
+ vceqq_s32(roundi, szero), vceqq_s32(roundi, max_int)
+ );
+ return vbslq_f32(vbicq_u32(nnan, overflow), rzero, a);
+ }
+#endif
+#if NPY_SIMD_F64
+ #define npyv_trunc_f64 vrndq_f64
+#endif // NPY_SIMD_F64
+
#endif // _NPY_SIMD_NEON_MATH_H
diff --git a/numpy/core/src/common/simd/simd.h b/numpy/core/src/common/simd/simd.h
index a3e2b95de..08b2a7d00 100644
--- a/numpy/core/src/common/simd/simd.h
+++ b/numpy/core/src/common/simd/simd.h
@@ -27,6 +27,25 @@ typedef npy_int64 npyv_lanetype_s64;
typedef float npyv_lanetype_f32;
typedef double npyv_lanetype_f64;
+#if defined(_MSC_VER) && defined(_M_IX86)
+/*
+ * Avoid using any of the following intrinsics with MSVC 32-bit,
+ * even if they are apparently work on newer versions.
+ * They had bad impact on the generated instructions,
+ * sometimes the compiler deal with them without the respect
+ * of 32-bit mode which lead to crush due to execute 64-bit
+ * instructions and other times generate bad emulated instructions.
+ */
+ #undef _mm512_set1_epi64
+ #undef _mm256_set1_epi64x
+ #undef _mm_set1_epi64x
+ #undef _mm512_setr_epi64x
+ #undef _mm256_setr_epi64x
+ #undef _mm_setr_epi64x
+ #undef _mm512_set_epi64x
+ #undef _mm256_set_epi64x
+ #undef _mm_set_epi64x
+#endif
#if defined(NPY_HAVE_AVX512F) && !defined(NPY_SIMD_FORCE_256) && !defined(NPY_SIMD_FORCE_128)
#include "avx512/avx512.h"
#elif defined(NPY_HAVE_AVX2) && !defined(NPY_SIMD_FORCE_128)
diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h
index 97d35afc5..5daf7711e 100644
--- a/numpy/core/src/common/simd/sse/math.h
+++ b/numpy/core/src/common/simd/sse/math.h
@@ -143,4 +143,63 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
return npyv_select_s64(npyv_cmplt_s64(a, b), a, b);
}
+// ceil
+#ifdef NPY_HAVE_SSE41
+ #define npyv_ceil_f32 _mm_ceil_ps
+ #define npyv_ceil_f64 _mm_ceil_pd
+#else
+ NPY_FINLINE npyv_f32 npyv_ceil_f32(npyv_f32 a)
+ {
+ const npyv_f32 szero = _mm_set1_ps(-0.0f);
+ const npyv_f32 one = _mm_set1_ps(1.0f);
+ npyv_s32 roundi = _mm_cvttps_epi32(a);
+ npyv_f32 round = _mm_cvtepi32_ps(roundi);
+ npyv_f32 ceil = _mm_add_ps(round, _mm_and_ps(_mm_cmplt_ps(round, a), one));
+ // respect signed zero, e.g. -0.5 -> -0.0
+ npyv_f32 rzero = _mm_or_ps(ceil, _mm_and_ps(a, szero));
+ // if overflow return a
+ return npyv_select_f32(_mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)), a, rzero);
+ }
+ NPY_FINLINE npyv_f64 npyv_ceil_f64(npyv_f64 a)
+ {
+ const npyv_f64 szero = _mm_set1_pd(-0.0);
+ const npyv_f64 one = _mm_set1_pd(1.0);
+ const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000);
+ npyv_f64 sign_two52 = _mm_or_pd(two_power_52, _mm_and_pd(a, szero));
+ // round by add magic number 2^52
+ npyv_f64 round = _mm_sub_pd(_mm_add_pd(a, sign_two52), sign_two52);
+ npyv_f64 ceil = _mm_add_pd(round, _mm_and_pd(_mm_cmplt_pd(round, a), one));
+ // respect signed zero, e.g. -0.5 -> -0.0
+ return _mm_or_pd(ceil, _mm_and_pd(a, szero));
+ }
+#endif
+
+// trunc
+#ifdef NPY_HAVE_SSE41
+ #define npyv_trunc_f32(A) _mm_round_ps(A, _MM_FROUND_TO_ZERO)
+ #define npyv_trunc_f64(A) _mm_round_pd(A, _MM_FROUND_TO_ZERO)
+#else
+ NPY_FINLINE npyv_f32 npyv_trunc_f32(npyv_f32 a)
+ {
+ const npyv_f32 szero = _mm_set1_ps(-0.0f);
+ npyv_s32 roundi = _mm_cvttps_epi32(a);
+ npyv_f32 trunc = _mm_cvtepi32_ps(roundi);
+ // respect signed zero, e.g. -0.5 -> -0.0
+ npyv_f32 rzero = _mm_or_ps(trunc, _mm_and_ps(a, szero));
+ // if overflow return a
+ return npyv_select_f32(_mm_cmpeq_epi32(roundi, _mm_castps_si128(szero)), a, rzero);
+ }
+ NPY_FINLINE npyv_f64 npyv_trunc_f64(npyv_f64 a)
+ {
+ const npyv_f64 szero = _mm_set1_pd(-0.0);
+ const npyv_f64 one = _mm_set1_pd(1.0);
+ const npyv_f64 two_power_52 = _mm_set1_pd(0x10000000000000);
+ npyv_f64 abs_a = npyv_abs_f64(a);
+ // round by add magic number 2^52
+ npyv_f64 abs_round = _mm_sub_pd(_mm_add_pd(abs_a, two_power_52), two_power_52);
+ npyv_f64 subtrahend = _mm_and_pd(_mm_cmpgt_pd(abs_round, abs_a), one);
+ return _mm_or_pd(_mm_sub_pd(abs_round, subtrahend), _mm_and_pd(a, szero));
+ }
+#endif
+
#endif // _NPY_SIMD_SSE_MATH_H
diff --git a/numpy/core/src/common/simd/sse/misc.h b/numpy/core/src/common/simd/sse/misc.h
index 1099c491d..7d13fbf55 100644
--- a/numpy/core/src/common/simd/sse/misc.h
+++ b/numpy/core/src/common/simd/sse/misc.h
@@ -24,11 +24,28 @@
#define npyv_setall_s16(VAL) _mm_set1_epi16((short)(VAL))
#define npyv_setall_u32(VAL) _mm_set1_epi32((int)(VAL))
#define npyv_setall_s32(VAL) _mm_set1_epi32((int)(VAL))
-#define npyv_setall_u64(VAL) _mm_set1_epi64x((npy_int64)(VAL))
-#define npyv_setall_s64(VAL) _mm_set1_epi64x((npy_int64)(VAL))
#define npyv_setall_f32 _mm_set1_ps
#define npyv_setall_f64 _mm_set1_pd
+NPY_FINLINE __m128i npyv__setr_epi64(npy_int64 i0, npy_int64 i1);
+
+NPY_FINLINE npyv_u64 npyv_setall_u64(npy_uint64 a)
+{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64((npy_int64)a, (npy_int64)a);
+#else
+ return _mm_set1_epi64x((npy_int64)a);
+#endif
+}
+NPY_FINLINE npyv_s64 npyv_setall_s64(npy_int64 a)
+{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return npyv__setr_epi64(a, a);
+#else
+ return _mm_set1_epi64x((npy_int64)a);
+#endif
+}
+
/**
* vector with specific values set to each lane and
* set a specific value to all remained lanes
@@ -53,7 +70,11 @@ NPY_FINLINE __m128i npyv__setr_epi32(int i0, int i1, int i2, int i3)
}
NPY_FINLINE __m128i npyv__setr_epi64(npy_int64 i0, npy_int64 i1)
{
+#if defined(_MSC_VER) && defined(_M_IX86)
+ return _mm_setr_epi32((int)i0, (int)(i0 >> 32), (int)i1, (int)(i1 >> 32));
+#else
return _mm_set_epi64x(i1, i0);
+#endif
}
NPY_FINLINE __m128 npyv__setr_ps(float i0, float i1, float i2, float i3)
{
diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h
index b2e393c7c..d138cae8a 100644
--- a/numpy/core/src/common/simd/vsx/math.h
+++ b/numpy/core/src/common/simd/vsx/math.h
@@ -69,4 +69,12 @@ NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
#define npyv_min_u64 vec_min
#define npyv_min_s64 vec_min
+// ceil
+#define npyv_ceil_f32 vec_ceil
+#define npyv_ceil_f64 vec_ceil
+
+// trunc
+#define npyv_trunc_f32 vec_trunc
+#define npyv_trunc_f64 vec_trunc
+
#endif // _NPY_SIMD_VSX_MATH_H
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index e945d0771..9486b7cff 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -2193,7 +2193,7 @@ PrintFloat_Printf_g(PyObject *obj, int precision)
}
else if (PyArray_IsScalar(obj, LongDouble)) {
npy_longdouble x = PyArrayScalar_VAL(obj, LongDouble);
- PyOS_snprintf(str, sizeof(str), "%.*Lg", precision, x);
+ PyOS_snprintf(str, sizeof(str), "%.*" NPY_LONGDOUBLE_FMT, precision, x);
}
else{
double val = PyFloat_AsDouble(obj);
@@ -2363,6 +2363,17 @@ run_intp_converter(PyObject* NPY_UNUSED(self), PyObject *args)
return tup;
}
+/* used to test NPY_ARRAY_ENSURENOCOPY raises ValueError */
+static PyObject*
+npy_ensurenocopy(PyObject* NPY_UNUSED(self), PyObject* args)
+{
+ int flags = NPY_ARRAY_ENSURENOCOPY;
+ if (!PyArray_CheckFromAny(args, NULL, 0, 0, flags, NULL)) {
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
static PyMethodDef Multiarray_TestsMethods[] = {
{"argparse_example_function",
(PyCFunction)argparse_example_function,
@@ -2424,6 +2435,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
{"npy_discard",
npy_discard,
METH_O, NULL},
+ {"npy_ensurenocopy",
+ npy_ensurenocopy,
+ METH_O, NULL},
{"get_buffer_info",
get_buffer_info,
METH_VARARGS, NULL},
diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c
index adb4ae128..94a7daa83 100644
--- a/numpy/core/src/multiarray/alloc.c
+++ b/numpy/core/src/multiarray/alloc.c
@@ -133,9 +133,10 @@ npy_alloc_cache(npy_uintp sz)
/* zero initialized data, sz is number of bytes to allocate */
NPY_NO_EXPORT void *
-npy_alloc_cache_zero(npy_uintp sz)
+npy_alloc_cache_zero(size_t nmemb, size_t size)
{
void * p;
+ size_t sz = nmemb * size;
NPY_BEGIN_THREADS_DEF;
if (sz < NBUCKETS) {
p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &PyDataMem_NEW);
@@ -145,7 +146,7 @@ npy_alloc_cache_zero(npy_uintp sz)
return p;
}
NPY_BEGIN_THREADS;
- p = PyDataMem_NEW_ZEROED(sz, 1);
+ p = PyDataMem_NEW_ZEROED(nmemb, size);
NPY_END_THREADS;
return p;
}
@@ -185,10 +186,28 @@ npy_free_cache_dim(void * p, npy_uintp sz)
&PyArray_free);
}
+/* Similar to array_dealloc in arrayobject.c */
+static NPY_INLINE void
+WARN_NO_RETURN(PyObject* warning, const char * msg) {
+ if (PyErr_WarnEx(warning, msg, 1) < 0) {
+ PyObject * s;
+
+ s = PyUnicode_FromString("PyDataMem_UserFREE");
+ if (s) {
+ PyErr_WriteUnraisable(s);
+ Py_DECREF(s);
+ }
+ else {
+ PyErr_WriteUnraisable(Py_None);
+ }
+ }
+}
+
+
/* malloc/free/realloc hook */
-NPY_NO_EXPORT PyDataMem_EventHookFunc *_PyDataMem_eventhook;
-NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data;
+NPY_NO_EXPORT PyDataMem_EventHookFunc *_PyDataMem_eventhook = NULL;
+NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data = NULL;
/*NUMPY_API
* Sets the allocation event hook for numpy array data.
@@ -209,6 +228,8 @@ NPY_NO_EXPORT void *_PyDataMem_eventhook_user_data;
* operations that might cause new allocation events (such as the
* creation/destruction numpy objects, or creating/destroying Python
* objects which might cause a gc)
+ *
+ * Deprecated in 1.23
*/
NPY_NO_EXPORT PyDataMem_EventHookFunc *
PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook,
@@ -217,6 +238,10 @@ PyDataMem_SetEventHook(PyDataMem_EventHookFunc *newhook,
PyDataMem_EventHookFunc *temp;
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
+ /* 2021-11-18, 1.23 */
+ WARN_NO_RETURN(PyExc_DeprecationWarning,
+ "PyDataMem_SetEventHook is deprecated, use tracemalloc "
+ "and the 'np.lib.tracemalloc_domain' domain");
temp = _PyDataMem_eventhook;
_PyDataMem_eventhook = newhook;
if (old_data != NULL) {
@@ -254,21 +279,21 @@ PyDataMem_NEW(size_t size)
* Allocates zeroed memory for array data.
*/
NPY_NO_EXPORT void *
-PyDataMem_NEW_ZEROED(size_t size, size_t elsize)
+PyDataMem_NEW_ZEROED(size_t nmemb, size_t size)
{
void *result;
- result = calloc(size, elsize);
+ result = calloc(nmemb, size);
if (_PyDataMem_eventhook != NULL) {
NPY_ALLOW_C_API_DEF
NPY_ALLOW_C_API
if (_PyDataMem_eventhook != NULL) {
- (*_PyDataMem_eventhook)(NULL, result, size * elsize,
+ (*_PyDataMem_eventhook)(NULL, result, nmemb * size,
_PyDataMem_eventhook_user_data);
}
NPY_DISABLE_C_API
}
- PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
+ PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, nmemb * size);
return result;
}
@@ -316,3 +341,325 @@ PyDataMem_RENEW(void *ptr, size_t size)
}
return result;
}
+
+// The default data mem allocator malloc routine does not make use of a ctx.
+// It should be called only through PyDataMem_UserNEW
+// since itself does not handle eventhook and tracemalloc logic.
+static NPY_INLINE void *
+default_malloc(void *NPY_UNUSED(ctx), size_t size)
+{
+ return _npy_alloc_cache(size, 1, NBUCKETS, datacache, &malloc);
+}
+
+// The default data mem allocator calloc routine does not make use of a ctx.
+// It should be called only through PyDataMem_UserNEW_ZEROED
+// since itself does not handle eventhook and tracemalloc logic.
+static NPY_INLINE void *
+default_calloc(void *NPY_UNUSED(ctx), size_t nelem, size_t elsize)
+{
+ void * p;
+ size_t sz = nelem * elsize;
+ NPY_BEGIN_THREADS_DEF;
+ if (sz < NBUCKETS) {
+ p = _npy_alloc_cache(sz, 1, NBUCKETS, datacache, &malloc);
+ if (p) {
+ memset(p, 0, sz);
+ }
+ return p;
+ }
+ NPY_BEGIN_THREADS;
+ p = calloc(nelem, elsize);
+ NPY_END_THREADS;
+ return p;
+}
+
+// The default data mem allocator realloc routine does not make use of a ctx.
+// It should be called only through PyDataMem_UserRENEW
+// since itself does not handle eventhook and tracemalloc logic.
+static NPY_INLINE void *
+default_realloc(void *NPY_UNUSED(ctx), void *ptr, size_t new_size)
+{
+ return realloc(ptr, new_size);
+}
+
+// The default data mem allocator free routine does not make use of a ctx.
+// It should be called only through PyDataMem_UserFREE
+// since itself does not handle eventhook and tracemalloc logic.
+static NPY_INLINE void
+default_free(void *NPY_UNUSED(ctx), void *ptr, size_t size)
+{
+ _npy_free_cache(ptr, size, NBUCKETS, datacache, &free);
+}
+
+/* Memory handler global default */
+PyDataMem_Handler default_handler = {
+ "default_allocator",
+ 1,
+ {
+ NULL, /* ctx */
+ default_malloc, /* malloc */
+ default_calloc, /* calloc */
+ default_realloc, /* realloc */
+ default_free /* free */
+ }
+};
+/* singleton capsule of the default handler */
+PyObject *PyDataMem_DefaultHandler;
+
+#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600)
+PyObject *current_handler;
+#endif
+
+int uo_index=0; /* user_override index */
+
+/* Wrappers for the default or any user-assigned PyDataMem_Handler */
+
+NPY_NO_EXPORT void *
+PyDataMem_UserNEW(size_t size, PyObject *mem_handler)
+{
+ void *result;
+ PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ return NULL;
+ }
+ assert(size != 0);
+ result = handler->allocator.malloc(handler->allocator.ctx, size);
+ if (_PyDataMem_eventhook != NULL) {
+ NPY_ALLOW_C_API_DEF
+ NPY_ALLOW_C_API
+ if (_PyDataMem_eventhook != NULL) {
+ (*_PyDataMem_eventhook)(NULL, result, size,
+ _PyDataMem_eventhook_user_data);
+ }
+ NPY_DISABLE_C_API
+ }
+ PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
+ return result;
+}
+
+NPY_NO_EXPORT void *
+PyDataMem_UserNEW_ZEROED(size_t nmemb, size_t size, PyObject *mem_handler)
+{
+ void *result;
+ PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ return NULL;
+ }
+ result = handler->allocator.calloc(handler->allocator.ctx, nmemb, size);
+ if (_PyDataMem_eventhook != NULL) {
+ NPY_ALLOW_C_API_DEF
+ NPY_ALLOW_C_API
+ if (_PyDataMem_eventhook != NULL) {
+ (*_PyDataMem_eventhook)(NULL, result, nmemb * size,
+ _PyDataMem_eventhook_user_data);
+ }
+ NPY_DISABLE_C_API
+ }
+ PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, nmemb * size);
+ return result;
+}
+
+
+NPY_NO_EXPORT void
+PyDataMem_UserFREE(void *ptr, size_t size, PyObject *mem_handler)
+{
+ PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ WARN_NO_RETURN(PyExc_RuntimeWarning,
+ "Could not get pointer to 'mem_handler' from PyCapsule");
+ return;
+ }
+ PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr);
+ handler->allocator.free(handler->allocator.ctx, ptr, size);
+ if (_PyDataMem_eventhook != NULL) {
+ NPY_ALLOW_C_API_DEF
+ NPY_ALLOW_C_API
+ if (_PyDataMem_eventhook != NULL) {
+ (*_PyDataMem_eventhook)(ptr, NULL, 0,
+ _PyDataMem_eventhook_user_data);
+ }
+ NPY_DISABLE_C_API
+ }
+}
+
+NPY_NO_EXPORT void *
+PyDataMem_UserRENEW(void *ptr, size_t size, PyObject *mem_handler)
+{
+ void *result;
+ PyDataMem_Handler *handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ return NULL;
+ }
+
+ assert(size != 0);
+ result = handler->allocator.realloc(handler->allocator.ctx, ptr, size);
+ if (result != ptr) {
+ PyTraceMalloc_Untrack(NPY_TRACE_DOMAIN, (npy_uintp)ptr);
+ }
+ PyTraceMalloc_Track(NPY_TRACE_DOMAIN, (npy_uintp)result, size);
+ if (_PyDataMem_eventhook != NULL) {
+ NPY_ALLOW_C_API_DEF
+ NPY_ALLOW_C_API
+ if (_PyDataMem_eventhook != NULL) {
+ (*_PyDataMem_eventhook)(ptr, result, size,
+ _PyDataMem_eventhook_user_data);
+ }
+ NPY_DISABLE_C_API
+ }
+ return result;
+}
+
+/*NUMPY_API
+ * Set a new allocation policy. If the input value is NULL, will reset
+ * the policy to the default. Return the previous policy, or
+ * return NULL if an error has occurred. We wrap the user-provided
+ * functions so they will still call the python and numpy
+ * memory management callback hooks.
+ */
+NPY_NO_EXPORT PyObject *
+PyDataMem_SetHandler(PyObject *handler)
+{
+ PyObject *old_handler;
+#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600)
+ PyObject *token;
+ if (PyContextVar_Get(current_handler, NULL, &old_handler)) {
+ return NULL;
+ }
+ if (handler == NULL) {
+ handler = PyDataMem_DefaultHandler;
+ }
+ token = PyContextVar_Set(current_handler, handler);
+ if (token == NULL) {
+ Py_DECREF(old_handler);
+ return NULL;
+ }
+ Py_DECREF(token);
+ return old_handler;
+#else
+ PyObject *p;
+ p = PyThreadState_GetDict();
+ if (p == NULL) {
+ return NULL;
+ }
+ old_handler = PyDict_GetItemString(p, "current_allocator");
+ if (old_handler == NULL) {
+ old_handler = PyDataMem_DefaultHandler
+ }
+ Py_INCREF(old_handler);
+ if (handler == NULL) {
+ handler = PyDataMem_DefaultHandler;
+ }
+ const int error = PyDict_SetItemString(p, "current_allocator", handler);
+ if (error) {
+ Py_DECREF(old_handler);
+ return NULL;
+ }
+ return old_handler;
+#endif
+}
+
+/*NUMPY_API
+ * Return the policy that will be used to allocate data
+ * for the next PyArrayObject. On failure, return NULL.
+ */
+NPY_NO_EXPORT PyObject *
+PyDataMem_GetHandler()
+{
+ PyObject *handler;
+#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600)
+ if (PyContextVar_Get(current_handler, NULL, &handler)) {
+ return NULL;
+ }
+ return handler;
+#else
+ PyObject *p = PyThreadState_GetDict();
+ if (p == NULL) {
+ return NULL;
+ }
+ handler = PyDict_GetItemString(p, "current_allocator");
+ if (handler == NULL) {
+ handler = PyCapsule_New(&default_handler, "mem_handler", NULL);
+ if (handler == NULL) {
+ return NULL;
+ }
+ }
+ else {
+ Py_INCREF(handler);
+ }
+ return handler;
+#endif
+}
+
+NPY_NO_EXPORT PyObject *
+get_handler_name(PyObject *NPY_UNUSED(self), PyObject *args)
+{
+ PyObject *arr=NULL;
+ if (!PyArg_ParseTuple(args, "|O:get_handler_name", &arr)) {
+ return NULL;
+ }
+ if (arr != NULL && !PyArray_Check(arr)) {
+ PyErr_SetString(PyExc_ValueError, "if supplied, argument must be an ndarray");
+ return NULL;
+ }
+ PyObject *mem_handler;
+ PyDataMem_Handler *handler;
+ PyObject *name;
+ if (arr != NULL) {
+ mem_handler = PyArray_HANDLER((PyArrayObject *) arr);
+ if (mem_handler == NULL) {
+ Py_RETURN_NONE;
+ }
+ Py_INCREF(mem_handler);
+ }
+ else {
+ mem_handler = PyDataMem_GetHandler();
+ if (mem_handler == NULL) {
+ return NULL;
+ }
+ }
+ handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ Py_DECREF(mem_handler);
+ return NULL;
+ }
+ name = PyUnicode_FromString(handler->name);
+ Py_DECREF(mem_handler);
+ return name;
+}
+
+NPY_NO_EXPORT PyObject *
+get_handler_version(PyObject *NPY_UNUSED(self), PyObject *args)
+{
+ PyObject *arr=NULL;
+ if (!PyArg_ParseTuple(args, "|O:get_handler_version", &arr)) {
+ return NULL;
+ }
+ if (arr != NULL && !PyArray_Check(arr)) {
+ PyErr_SetString(PyExc_ValueError, "if supplied, argument must be an ndarray");
+ return NULL;
+ }
+ PyObject *mem_handler;
+ PyDataMem_Handler *handler;
+ PyObject *version;
+ if (arr != NULL) {
+ mem_handler = PyArray_HANDLER((PyArrayObject *) arr);
+ if (mem_handler == NULL) {
+ Py_RETURN_NONE;
+ }
+ Py_INCREF(mem_handler);
+ }
+ else {
+ mem_handler = PyDataMem_GetHandler();
+ if (mem_handler == NULL) {
+ return NULL;
+ }
+ }
+ handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler");
+ if (handler == NULL) {
+ Py_DECREF(mem_handler);
+ return NULL;
+ }
+ version = PyLong_FromLong(handler->version);
+ Py_DECREF(mem_handler);
+ return version;
+}
diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h
index 1259abca5..13c828458 100644
--- a/numpy/core/src/multiarray/alloc.h
+++ b/numpy/core/src/multiarray/alloc.h
@@ -11,13 +11,16 @@ NPY_NO_EXPORT PyObject *
_set_madvise_hugepage(PyObject *NPY_UNUSED(self), PyObject *enabled_obj);
NPY_NO_EXPORT void *
-npy_alloc_cache(npy_uintp sz);
+PyDataMem_UserNEW(npy_uintp sz, PyObject *mem_handler);
NPY_NO_EXPORT void *
-npy_alloc_cache_zero(npy_uintp sz);
+PyDataMem_UserNEW_ZEROED(size_t nmemb, size_t size, PyObject *mem_handler);
NPY_NO_EXPORT void
-npy_free_cache(void * p, npy_uintp sd);
+PyDataMem_UserFREE(void * p, npy_uintp sd, PyObject *mem_handler);
+
+NPY_NO_EXPORT void *
+PyDataMem_UserRENEW(void *ptr, size_t size, PyObject *mem_handler);
NPY_NO_EXPORT void *
npy_alloc_cache_dim(npy_uintp sz);
@@ -37,4 +40,14 @@ npy_free_cache_dim_array(PyArrayObject * arr)
npy_free_cache_dim(PyArray_DIMS(arr), PyArray_NDIM(arr));
}
+extern PyDataMem_Handler default_handler;
+#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600)
+extern PyObject *current_handler; /* PyContextVar/PyCapsule */
+#endif
+
+NPY_NO_EXPORT PyObject *
+get_handler_name(PyObject *NPY_UNUSED(self), PyObject *obj);
+NPY_NO_EXPORT PyObject *
+get_handler_version(PyObject *NPY_UNUSED(self), PyObject *obj);
+
#endif /* NUMPY_CORE_SRC_MULTIARRAY_ALLOC_H_ */
diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c
index 847bdafc3..2598e4bde 100644
--- a/numpy/core/src/multiarray/array_coercion.c
+++ b/numpy/core/src/multiarray/array_coercion.c
@@ -555,6 +555,7 @@ npy_new_coercion_cache(
cache = PyMem_Malloc(sizeof(coercion_cache_obj));
}
if (cache == NULL) {
+ Py_DECREF(arr_or_sequence);
PyErr_NoMemory();
return -1;
}
@@ -857,6 +858,7 @@ PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype)
* (Initially it is a pointer to the user-provided head pointer).
* @param fixed_DType User provided fixed DType class
* @param flags Discovery flags (reporting and behaviour flags, see def.)
+ * @param never_copy Specifies if a copy is allowed during array creation.
* @return The updated number of maximum dimensions (i.e. scalars will set
* this to the current dimensions).
*/
@@ -865,7 +867,8 @@ PyArray_DiscoverDTypeAndShape_Recursive(
PyObject *obj, int curr_dims, int max_dims, PyArray_Descr**out_descr,
npy_intp out_shape[NPY_MAXDIMS],
coercion_cache_obj ***coercion_cache_tail_ptr,
- PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags)
+ PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags,
+ int never_copy)
{
PyArrayObject *arr = NULL;
PyObject *seq;
@@ -923,7 +926,7 @@ PyArray_DiscoverDTypeAndShape_Recursive(
requested_descr = *out_descr;
}
arr = (PyArrayObject *)_array_from_array_like(obj,
- requested_descr, 0, NULL);
+ requested_descr, 0, NULL, never_copy);
if (arr == NULL) {
return -1;
}
@@ -1117,7 +1120,7 @@ PyArray_DiscoverDTypeAndShape_Recursive(
max_dims = PyArray_DiscoverDTypeAndShape_Recursive(
objects[i], curr_dims + 1, max_dims,
out_descr, out_shape, coercion_cache_tail_ptr, fixed_DType,
- flags);
+ flags, never_copy);
if (max_dims < 0) {
return -1;
@@ -1157,6 +1160,7 @@ PyArray_DiscoverDTypeAndShape_Recursive(
* The result may be unchanged (remain NULL) when converting a
* sequence with no elements. In this case it is callers responsibility
* to choose a default.
+ * @param never_copy Specifies that a copy is not allowed.
* @return dimensions of the discovered object or -1 on error.
* WARNING: If (and only if) the output is a single array, the ndim
* returned _can_ exceed the maximum allowed number of dimensions.
@@ -1169,7 +1173,7 @@ PyArray_DiscoverDTypeAndShape(
npy_intp out_shape[NPY_MAXDIMS],
coercion_cache_obj **coercion_cache,
PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
- PyArray_Descr **out_descr)
+ PyArray_Descr **out_descr, int never_copy)
{
coercion_cache_obj **coercion_cache_head = coercion_cache;
*coercion_cache = NULL;
@@ -1214,7 +1218,7 @@ PyArray_DiscoverDTypeAndShape(
int ndim = PyArray_DiscoverDTypeAndShape_Recursive(
obj, 0, max_dims, out_descr, out_shape, &coercion_cache,
- fixed_DType, &flags);
+ fixed_DType, &flags, never_copy);
if (ndim < 0) {
goto fail;
}
@@ -1499,7 +1503,7 @@ _discover_array_parameters(PyObject *NPY_UNUSED(self),
int ndim = PyArray_DiscoverDTypeAndShape(
obj, NPY_MAXDIMS, shape,
&coercion_cache,
- fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype);
+ fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype, 0);
Py_XDECREF(fixed_DType);
Py_XDECREF(fixed_descriptor);
if (ndim < 0) {
diff --git a/numpy/core/src/multiarray/array_coercion.h b/numpy/core/src/multiarray/array_coercion.h
index db0e479fe..f2482cecc 100644
--- a/numpy/core/src/multiarray/array_coercion.h
+++ b/numpy/core/src/multiarray/array_coercion.h
@@ -31,7 +31,7 @@ PyArray_DiscoverDTypeAndShape(
npy_intp out_shape[NPY_MAXDIMS],
coercion_cache_obj **coercion_cache,
PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
- PyArray_Descr **out_descr);
+ PyArray_Descr **out_descr, int never_copy);
NPY_NO_EXPORT int
PyArray_ExtractDTypeAndDescriptor(PyObject *dtype,
diff --git a/numpy/core/src/multiarray/array_method.c b/numpy/core/src/multiarray/array_method.c
index 406b0c6ff..d93dac506 100644
--- a/numpy/core/src/multiarray/array_method.c
+++ b/numpy/core/src/multiarray/array_method.c
@@ -780,6 +780,13 @@ _masked_stridedloop_data_free(NpyAuxData *auxdata)
* This function wraps a regular unmasked strided-loop as a
* masked strided-loop, only calling the function for elements
* where the mask is True.
+ *
+ * TODO: Reductions also use this code to implement masked reductions.
+ * Before consolidating them, reductions had a special case for
+ * broadcasts: when the mask stride was 0 the code does not check all
+ * elements as `npy_memchr` currently does.
+ * It may be worthwhile to add such an optimization again if broadcasted
+ * masks are common enough.
*/
static int
generic_masked_strided_loop(PyArrayMethod_Context *context,
diff --git a/numpy/core/src/multiarray/array_method.h b/numpy/core/src/multiarray/array_method.h
index b29c7c077..7b7372bd0 100644
--- a/numpy/core/src/multiarray/array_method.h
+++ b/numpy/core/src/multiarray/array_method.h
@@ -21,6 +21,17 @@ typedef enum {
NPY_METH_NO_FLOATINGPOINT_ERRORS = 1 << 2,
/* Whether the method supports unaligned access (not runtime) */
NPY_METH_SUPPORTS_UNALIGNED = 1 << 3,
+ /*
+ * Private flag for now for *logic* functions. The logical functions
+ * `logical_or` and `logical_and` can always cast the inputs to booleans
+ * "safely" (because that is how the cast to bool is defined).
+ * @seberg: I am not sure this is the best way to handle this, so its
+ * private for now (also it is very limited anyway).
+ * There is one "exception". NA aware dtypes cannot cast to bool
+ * (hopefully), so the `??->?` loop should error even with this flag.
+ * But a second NA fallback loop will be necessary.
+ */
+ _NPY_METH_FORCE_CAST_INPUTS = 1 << 17,
/* All flags which can change at runtime */
NPY_METH_RUNTIME_FLAGS = (
diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c
index 9b9df08f2..1b197d0f2 100644
--- a/numpy/core/src/multiarray/arrayobject.c
+++ b/numpy/core/src/multiarray/arrayobject.c
@@ -263,7 +263,7 @@ PyArray_CopyObject(PyArrayObject *dest, PyObject *src_object)
*/
ndim = PyArray_DiscoverDTypeAndShape(src_object,
PyArray_NDIM(dest), dims, &cache,
- NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype);
+ NPY_DTYPE(PyArray_DESCR(dest)), PyArray_DESCR(dest), &dtype, 0);
if (ndim < 0) {
return -1;
}
@@ -493,7 +493,28 @@ array_dealloc(PyArrayObject *self)
if (PyDataType_FLAGCHK(fa->descr, NPY_ITEM_REFCOUNT)) {
PyArray_XDECREF(self);
}
- npy_free_cache(fa->data, PyArray_NBYTES(self));
+ /*
+ * Allocation will never be 0, see comment in ctors.c
+ * line 820
+ */
+ size_t nbytes = PyArray_NBYTES(self);
+ if (nbytes == 0) {
+ nbytes = fa->descr->elsize ? fa->descr->elsize : 1;
+ }
+ if (fa->mem_handler == NULL) {
+ char *env = getenv("NUMPY_WARN_IF_NO_MEM_POLICY");
+ if ((env != NULL) && (strncmp(env, "1", 1) == 0)) {
+ char const * msg = "Trying to dealloc data, but a memory policy "
+ "is not set. If you take ownership of the data, you must "
+ "set a base owning the data (e.g. a PyCapsule).";
+ WARN_IN_DEALLOC(PyExc_RuntimeWarning, msg);
+ }
+ // Guess at malloc/free ???
+ free(fa->data);
+ } else {
+ PyDataMem_UserFREE(fa->data, nbytes, fa->mem_handler);
+ Py_DECREF(fa->mem_handler);
+ }
}
/* must match allocation in PyArray_NewFromDescr */
@@ -1705,22 +1726,6 @@ array_iter(PyArrayObject *arr)
return PySeqIter_New((PyObject *)arr);
}
-static PyObject *
-array_alloc(PyTypeObject *type, Py_ssize_t NPY_UNUSED(nitems))
-{
- /* nitems will always be 0 */
- PyObject *obj = PyObject_Malloc(type->tp_basicsize);
- PyObject_Init(obj, type);
- return obj;
-}
-
-static void
-array_free(PyObject * v)
-{
- /* avoid same deallocator as PyBaseObject, see gentype_free */
- PyObject_Free(v);
-}
-
NPY_NO_EXPORT PyTypeObject PyArray_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
@@ -1741,7 +1746,5 @@ NPY_NO_EXPORT PyTypeObject PyArray_Type = {
.tp_iter = (getiterfunc)array_iter,
.tp_methods = array_methods,
.tp_getset = array_getsetlist,
- .tp_alloc = (allocfunc)array_alloc,
.tp_new = (newfunc)array_new,
- .tp_free = (freefunc)array_free,
};
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 15782a91b..71808cc48 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -2759,10 +2759,10 @@ VOID_nonzero (char *ip, PyArrayObject *ap)
dummy_fields.descr = new;
if ((new->alignment > 1) && !__ALIGNED(ip + offset,
new->alignment)) {
- PyArray_CLEARFLAGS(ap, NPY_ARRAY_ALIGNED);
+ PyArray_CLEARFLAGS(dummy_arr, NPY_ARRAY_ALIGNED);
}
else {
- PyArray_ENABLEFLAGS(ap, NPY_ARRAY_ALIGNED);
+ PyArray_ENABLEFLAGS(dummy_arr, NPY_ARRAY_ALIGNED);
}
if (new->f->nonzero(ip+offset, dummy_arr)) {
nonz = NPY_TRUE;
@@ -3093,6 +3093,10 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap)
if (!PyArray_HASFIELDS(ap)) {
return STRING_compare(ip1, ip2, ap);
}
+ PyObject *mem_handler = PyDataMem_GetHandler();
+ if (mem_handler == NULL) {
+ goto finish;
+ }
descr = PyArray_DESCR(ap);
/*
* Compare on the first-field. If equal, then
@@ -3107,15 +3111,19 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap)
if (_unpack_field(tup, &new, &offset) < 0) {
goto finish;
}
- /* descr is the only field checked by compare or copyswap */
+ /* Set the fields needed by compare or copyswap */
dummy_struct.descr = new;
+
swap = PyArray_ISBYTESWAPPED(dummy);
nip1 = ip1 + offset;
nip2 = ip2 + offset;
if (swap || new->alignment > 1) {
if (swap || !npy_is_aligned(nip1, new->alignment)) {
- /* create buffer and copy */
- nip1 = npy_alloc_cache(new->elsize);
+ /*
+ * create temporary buffer and copy,
+ * always use the current handler for internal allocations
+ */
+ nip1 = PyDataMem_UserNEW(new->elsize, mem_handler);
if (nip1 == NULL) {
goto finish;
}
@@ -3124,11 +3132,15 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap)
new->f->copyswap(nip1, NULL, swap, dummy);
}
if (swap || !npy_is_aligned(nip2, new->alignment)) {
- /* create buffer and copy */
- nip2 = npy_alloc_cache(new->elsize);
+ /*
+ * create temporary buffer and copy,
+ * always use the current handler for internal allocations
+ */
+ nip2 = PyDataMem_UserNEW(new->elsize, mem_handler);
if (nip2 == NULL) {
if (nip1 != ip1 + offset) {
- npy_free_cache(nip1, new->elsize);
+ /* destroy temporary buffer */
+ PyDataMem_UserFREE(nip1, new->elsize, mem_handler);
}
goto finish;
}
@@ -3140,10 +3152,12 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap)
res = new->f->compare(nip1, nip2, dummy);
if (swap || new->alignment > 1) {
if (nip1 != ip1 + offset) {
- npy_free_cache(nip1, new->elsize);
+ /* destroy temporary buffer */
+ PyDataMem_UserFREE(nip1, new->elsize, mem_handler);
}
if (nip2 != ip2 + offset) {
- npy_free_cache(nip2, new->elsize);
+ /* destroy temporary buffer */
+ PyDataMem_UserFREE(nip2, new->elsize, mem_handler);
}
}
if (res != 0) {
@@ -3152,6 +3166,7 @@ VOID_compare(char *ip1, char *ip2, PyArrayObject *ap)
}
finish:
+ Py_XDECREF(mem_handler);
return res;
}
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 82d34193d..aa95d285a 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -119,7 +119,7 @@ PyArray_DTypeFromObject(PyObject *obj, int maxdims, PyArray_Descr **out_dtype)
int ndim;
ndim = PyArray_DiscoverDTypeAndShape(
- obj, maxdims, shape, &cache, NULL, NULL, out_dtype);
+ obj, maxdims, shape, &cache, NULL, NULL, out_dtype, 0);
if (ndim < 0) {
return -1;
}
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 9910fffe6..5853e068b 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1393,7 +1393,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args)
{
PyObject *obj;
PyObject *str;
- #if PY_VERSION_HEX >= 0x030700A2 && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300)
+ #if !defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300
const char *docstr;
#else
char *docstr;
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 6de764fb1..ef101a78b 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -163,6 +163,41 @@ PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq)
return PyArray_IntpConverter(obj, seq);
}
+NPY_NO_EXPORT int
+PyArray_CopyConverter(PyObject *obj, _PyArray_CopyMode *copymode) {
+ if (obj == Py_None) {
+ PyErr_SetString(PyExc_ValueError,
+ "NoneType copy mode not allowed.");
+ return NPY_FAIL;
+ }
+
+ int int_copymode;
+ PyObject* numpy_CopyMode = NULL;
+ npy_cache_import("numpy", "_CopyMode", &numpy_CopyMode);
+
+ if (numpy_CopyMode != NULL && (PyObject *)Py_TYPE(obj) == numpy_CopyMode) {
+ PyObject* mode_value = PyObject_GetAttrString(obj, "value");
+ if (mode_value == NULL) {
+ return NPY_FAIL;
+ }
+
+ int_copymode = (int)PyLong_AsLong(mode_value);
+ if (error_converting(int_copymode)) {
+ return NPY_FAIL;
+ }
+ }
+ else {
+ npy_bool bool_copymode;
+ if (!PyArray_BoolConverter(obj, &bool_copymode)) {
+ return NPY_FAIL;
+ }
+ int_copymode = (int)bool_copymode;
+ }
+
+ *copymode = (_PyArray_CopyMode)int_copymode;
+ return NPY_SUCCEED;
+}
+
/*NUMPY_API
* Get buffer chunk from object
*
diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
index 89cf2ef27..4072841ee 100644
--- a/numpy/core/src/multiarray/conversion_utils.h
+++ b/numpy/core/src/multiarray/conversion_utils.h
@@ -9,6 +9,15 @@ PyArray_IntpConverter(PyObject *obj, PyArray_Dims *seq);
NPY_NO_EXPORT int
PyArray_OptionalIntpConverter(PyObject *obj, PyArray_Dims *seq);
+typedef enum {
+ NPY_COPY_IF_NEEDED = 0,
+ NPY_COPY_ALWAYS = 1,
+ NPY_COPY_NEVER = 2,
+} _PyArray_CopyMode;
+
+NPY_NO_EXPORT int
+PyArray_CopyConverter(PyObject *obj, _PyArray_CopyMode *copyflag);
+
NPY_NO_EXPORT int
PyArray_BufferConverter(PyObject *obj, PyArray_Chunk *buf);
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index eeadad374..3135d6989 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -2119,7 +2119,7 @@ PyArray_ObjectType(PyObject *op, int minimum_type)
* This function is only used in one place within NumPy and should
* generally be avoided. It is provided mainly for backward compatibility.
*
- * The user of the function has to free the returned array.
+ * The user of the function has to free the returned array with PyDataMem_FREE.
*/
NPY_NO_EXPORT PyArrayObject **
PyArray_ConvertToCommonType(PyObject *op, int *retn)
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index 9da75fb8a..b62426854 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -726,6 +726,7 @@ PyArray_NewFromDescr_int(
fa->nd = nd;
fa->dimensions = NULL;
fa->data = NULL;
+ fa->mem_handler = NULL;
if (data == NULL) {
fa->flags = NPY_ARRAY_DEFAULT;
@@ -805,12 +806,19 @@ PyArray_NewFromDescr_int(
fa->flags |= NPY_ARRAY_C_CONTIGUOUS|NPY_ARRAY_F_CONTIGUOUS;
}
+
if (data == NULL) {
+ /* Store the handler in case the default is modified */
+ fa->mem_handler = PyDataMem_GetHandler();
+ if (fa->mem_handler == NULL) {
+ goto fail;
+ }
/*
* Allocate something even for zero-space arrays
* e.g. shape=(0,) -- otherwise buffer exposure
* (a.data) doesn't work as it should.
* Could probably just allocate a few bytes here. -- Chuck
+ * Note: always sync this with calls to PyDataMem_UserFREE
*/
if (nbytes == 0) {
nbytes = descr->elsize ? descr->elsize : 1;
@@ -820,21 +828,23 @@ PyArray_NewFromDescr_int(
* which could also be sub-fields of a VOID array
*/
if (zeroed || PyDataType_FLAGCHK(descr, NPY_NEEDS_INIT)) {
- data = npy_alloc_cache_zero(nbytes);
+ data = PyDataMem_UserNEW_ZEROED(nbytes, 1, fa->mem_handler);
}
else {
- data = npy_alloc_cache(nbytes);
+ data = PyDataMem_UserNEW(nbytes, fa->mem_handler);
}
if (data == NULL) {
raise_memory_error(fa->nd, fa->dimensions, descr);
goto fail;
}
+
fa->flags |= NPY_ARRAY_OWNDATA;
}
else {
+ /* The handlers should never be called in this case */
+ fa->mem_handler = NULL;
/*
- * If data is passed in, this object won't own it by default.
- * Caller must arrange for this to be reset if truly desired
+ * If data is passed in, this object won't own it.
*/
fa->flags &= ~NPY_ARRAY_OWNDATA;
}
@@ -902,6 +912,7 @@ PyArray_NewFromDescr_int(
return (PyObject *)fa;
fail:
+ Py_XDECREF(fa->mem_handler);
Py_DECREF(fa);
return NULL;
}
@@ -1273,6 +1284,7 @@ fail:
* DType may be used, but is not enforced.
* @param writeable whether the result must be writeable.
* @param context Unused parameter, must be NULL (should be removed later).
+ * @param never_copy Specifies that a copy is not allowed.
*
* @returns The array object, Py_NotImplemented if op is not array-like,
* or NULL with an error set. (A new reference to Py_NotImplemented
@@ -1280,7 +1292,8 @@ fail:
*/
NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op,
- PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context) {
+ PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context,
+ int never_copy) {
PyObject* tmp;
/*
@@ -1336,7 +1349,7 @@ _array_from_array_like(PyObject *op,
* this should be changed!
*/
if (!writeable && tmp == Py_NotImplemented) {
- tmp = PyArray_FromArrayAttr(op, requested_dtype, context);
+ tmp = PyArray_FromArrayAttr_int(op, requested_dtype, never_copy);
if (tmp == NULL) {
return NULL;
}
@@ -1436,7 +1449,7 @@ setArrayFromSequence(PyArrayObject *a, PyObject *s,
}
/* Try __array__ before using s as a sequence */
- PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL);
+ PyObject *tmp = _array_from_array_like(s, NULL, 0, NULL, 0);
if (tmp == NULL) {
goto fail;
}
@@ -1564,7 +1577,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
Py_XDECREF(newtype);
ndim = PyArray_DiscoverDTypeAndShape(op,
- NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype);
+ NPY_MAXDIMS, dims, &cache, fixed_DType, fixed_descriptor, &dtype,
+ flags & NPY_ARRAY_ENSURENOCOPY);
Py_XDECREF(fixed_descriptor);
Py_XDECREF(fixed_DType);
@@ -1689,7 +1703,17 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
((PyVoidScalarObject *)op)->flags,
NULL, op);
}
- else if (cache == 0 && newtype != NULL &&
+ /*
+ * If we got this far, we definitely have to create a copy, since we are
+ * converting either from a scalar (cache == NULL) or a (nested) sequence.
+ */
+ if (flags & NPY_ARRAY_ENSURENOCOPY ) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while creating an array.");
+ return NULL;
+ }
+
+ if (cache == 0 && newtype != NULL &&
PyDataType_ISSIGNED(newtype) && PyArray_IsScalar(op, Generic)) {
assert(ndim == 0);
/*
@@ -1790,7 +1814,8 @@ PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
* NPY_ARRAY_WRITEBACKIFCOPY,
* NPY_ARRAY_FORCECAST,
* NPY_ARRAY_ENSUREARRAY,
- * NPY_ARRAY_ELEMENTSTRIDES
+ * NPY_ARRAY_ELEMENTSTRIDES,
+ * NPY_ARRAY_ENSURENOCOPY
*
* or'd (|) together
*
@@ -1851,9 +1876,15 @@ PyArray_CheckFromAny(PyObject *op, PyArray_Descr *descr, int min_depth,
if (obj == NULL) {
return NULL;
}
- if ((requires & NPY_ARRAY_ELEMENTSTRIDES) &&
- !PyArray_ElementStrides(obj)) {
+
+ if ((requires & NPY_ARRAY_ELEMENTSTRIDES)
+ && !PyArray_ElementStrides(obj)) {
PyObject *ret;
+ if (requires & NPY_ARRAY_ENSURENOCOPY) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while creating a new array.");
+ return NULL;
+ }
ret = PyArray_NewCopy((PyArrayObject *)obj, NPY_ANYORDER);
Py_DECREF(obj);
obj = ret;
@@ -1928,6 +1959,12 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
!PyArray_EquivTypes(oldtype, newtype);
if (copy) {
+ if (flags & NPY_ARRAY_ENSURENOCOPY ) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while creating an array from given array.");
+ return NULL;
+ }
+
NPY_ORDER order = NPY_KEEPORDER;
int subok = 1;
@@ -2000,7 +2037,6 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
if (flags & NPY_ARRAY_ENSUREARRAY) {
subtype = &PyArray_Type;
}
-
ret = (PyArrayObject *)PyArray_View(arr, NULL, subtype);
if (ret == NULL) {
return NULL;
@@ -2425,18 +2461,30 @@ PyArray_FromInterface(PyObject *origin)
return NULL;
}
-/*NUMPY_API
+
+/**
+ * Check for an __array__ attribute and call it when it exists.
+ *
+ * .. warning:
+ * If returned, `NotImplemented` is borrowed and must not be Decref'd
+ *
+ * @param op The Python object to convert to an array.
+ * @param descr The desired `arr.dtype`, passed into the `__array__` call,
+ * as information but is not checked/enforced!
+ * @param never_copy Specifies that a copy is not allowed.
+ * NOTE: Currently, this means an error is raised instead of calling
+ * `op.__array__()`. In the future we could call for example call
+ * `op.__array__(never_copy=True)` instead.
+ * @returns NotImplemented if `__array__` is not defined or a NumPy array
+ * (or subclass). On error, return NULL.
*/
NPY_NO_EXPORT PyObject *
-PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
+PyArray_FromArrayAttr_int(
+ PyObject *op, PyArray_Descr *descr, int never_copy)
{
PyObject *new;
PyObject *array_meth;
- if (context != NULL) {
- PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL");
- return NULL;
- }
array_meth = PyArray_LookupSpecial_OnInstance(op, "__array__");
if (array_meth == NULL) {
if (PyErr_Occurred()) {
@@ -2452,6 +2500,16 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
}
return Py_NotImplemented;
}
+ if (never_copy) {
+ /* Currently, we must always assume that `__array__` returns a copy */
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while converting from an object "
+ "implementing the `__array__` protocol. NumPy cannot ensure "
+ "that no copy will be made.");
+ Py_DECREF(array_meth);
+ return NULL;
+ }
+
if (PyType_Check(op) && PyObject_HasAttrString(array_meth, "__get__")) {
/*
* If the input is a class `array_meth` may be a property-like object.
@@ -2462,11 +2520,11 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
Py_DECREF(array_meth);
return Py_NotImplemented;
}
- if (typecode == NULL) {
+ if (descr == NULL) {
new = PyObject_CallFunction(array_meth, NULL);
}
else {
- new = PyObject_CallFunction(array_meth, "O", typecode);
+ new = PyObject_CallFunction(array_meth, "O", descr);
}
Py_DECREF(array_meth);
if (new == NULL) {
@@ -2482,6 +2540,21 @@ PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
return new;
}
+
+/*NUMPY_API
+ */
+NPY_NO_EXPORT PyObject *
+PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode, PyObject *context)
+{
+ if (context != NULL) {
+ PyErr_SetString(PyExc_RuntimeError, "'context' must be NULL");
+ return NULL;
+ }
+
+ return PyArray_FromArrayAttr_int(op, typecode, 0);
+}
+
+
/*NUMPY_API
* new reference -- accepts NULL for mintype
*/
@@ -3409,7 +3482,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre
dptr += dtype->elsize;
if (num < 0 && thisbuf == size) {
totalbytes += bytes;
- tmp = PyDataMem_RENEW(PyArray_DATA(r), totalbytes);
+ /* The handler is always valid */
+ tmp = PyDataMem_UserRENEW(PyArray_DATA(r), totalbytes,
+ PyArray_HANDLER(r));
if (tmp == NULL) {
err = 1;
break;
@@ -3431,7 +3506,9 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre
const size_t nsize = PyArray_MAX(*nread,1)*dtype->elsize;
if (nsize != 0) {
- tmp = PyDataMem_RENEW(PyArray_DATA(r), nsize);
+ /* The handler is always valid */
+ tmp = PyDataMem_UserRENEW(PyArray_DATA(r), nsize,
+ PyArray_HANDLER(r));
if (tmp == NULL) {
err = 1;
}
@@ -3536,7 +3613,9 @@ PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, npy_intp num, char *sep)
const size_t nsize = PyArray_MAX(nread,1) * dtype->elsize;
char *tmp;
- if ((tmp = PyDataMem_RENEW(PyArray_DATA(ret), nsize)) == NULL) {
+ /* The handler is always valid */
+ if((tmp = PyDataMem_UserRENEW(PyArray_DATA(ret), nsize,
+ PyArray_HANDLER(ret))) == NULL) {
Py_DECREF(dtype);
Py_DECREF(ret);
return PyErr_NoMemory();
@@ -3820,7 +3899,9 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
*/
elcount = (i >> 1) + (i < 4 ? 4 : 2) + i;
if (!npy_mul_with_overflow_intp(&nbytes, elcount, elsize)) {
- new_data = PyDataMem_RENEW(PyArray_DATA(ret), nbytes);
+ /* The handler is always valid */
+ new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), nbytes,
+ PyArray_HANDLER(ret));
}
else {
new_data = NULL;
@@ -3858,10 +3939,12 @@ PyArray_FromIter(PyObject *obj, PyArray_Descr *dtype, npy_intp count)
* (assuming realloc is reasonably good about reusing space...)
*/
if (i == 0 || elsize == 0) {
- /* The size cannot be zero for PyDataMem_RENEW. */
+ /* The size cannot be zero for realloc. */
goto done;
}
- new_data = PyDataMem_RENEW(PyArray_DATA(ret), i * elsize);
+ /* The handler is always valid */
+ new_data = PyDataMem_UserRENEW(PyArray_DATA(ret), i * elsize,
+ PyArray_HANDLER(ret));
if (new_data == NULL) {
PyErr_SetString(PyExc_MemoryError,
"cannot allocate array memory");
diff --git a/numpy/core/src/multiarray/ctors.h b/numpy/core/src/multiarray/ctors.h
index e59e86e8b..98160b1cc 100644
--- a/numpy/core/src/multiarray/ctors.h
+++ b/numpy/core/src/multiarray/ctors.h
@@ -32,7 +32,8 @@ PyArray_New(
NPY_NO_EXPORT PyObject *
_array_from_array_like(PyObject *op,
- PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context);
+ PyArray_Descr *requested_dtype, npy_bool writeable, PyObject *context,
+ int never_copy);
NPY_NO_EXPORT PyObject *
PyArray_FromAny(PyObject *op, PyArray_Descr *newtype, int min_depth,
@@ -52,6 +53,10 @@ NPY_NO_EXPORT PyObject *
PyArray_FromInterface(PyObject *input);
NPY_NO_EXPORT PyObject *
+PyArray_FromArrayAttr_int(
+ PyObject *op, PyArray_Descr *descr, int never_copy);
+
+NPY_NO_EXPORT PyObject *
PyArray_FromArrayAttr(PyObject *op, PyArray_Descr *typecode,
PyObject *context);
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 6a09f92ac..0c539053c 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -1326,7 +1326,7 @@ _convert_from_dict(PyObject *obj, int align)
goto fail;
}
/* If align is set, make sure the alignment divides into the size */
- if (align && itemsize % new->alignment != 0) {
+ if (align && new->alignment > 0 && itemsize % new->alignment != 0) {
PyErr_Format(PyExc_ValueError,
"NumPy dtype descriptor requires alignment of %d bytes, "
"which is not divisible into the specified itemsize %d",
@@ -2305,8 +2305,9 @@ arraydescr_new(PyTypeObject *subtype,
{
if (subtype != &PyArrayDescr_Type) {
if (Py_TYPE(subtype) == &PyArrayDTypeMeta_Type &&
- !(PyType_GetFlags(Py_TYPE(subtype)) & Py_TPFLAGS_HEAPTYPE) &&
- (NPY_DT_SLOTS((PyArray_DTypeMeta *)subtype)) != NULL) {
+ (NPY_DT_SLOTS((PyArray_DTypeMeta *)subtype)) != NULL &&
+ !NPY_DT_is_legacy((PyArray_DTypeMeta *)subtype) &&
+ subtype->tp_new != PyArrayDescr_Type.tp_new) {
/*
* Appears to be a properly initialized user DType. Allocate
* it and initialize the main part as best we can.
@@ -2333,7 +2334,9 @@ arraydescr_new(PyTypeObject *subtype,
}
/* The DTypeMeta class should prevent this from happening. */
PyErr_Format(PyExc_SystemError,
- "'%S' must not inherit np.dtype.__new__().", subtype);
+ "'%S' must not inherit np.dtype.__new__(). User DTypes should "
+ "currently call `PyArrayDescr_Type.tp_new` from their new.",
+ subtype);
return NULL;
}
diff --git a/numpy/core/src/multiarray/dlpack.c b/numpy/core/src/multiarray/dlpack.c
new file mode 100644
index 000000000..291e60a22
--- /dev/null
+++ b/numpy/core/src/multiarray/dlpack.c
@@ -0,0 +1,408 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <dlpack/dlpack.h>
+
+#include "numpy/arrayobject.h"
+#include "common/npy_argparse.h"
+
+#include "common/dlpack/dlpack.h"
+#include "common/npy_dlpack.h"
+
+static void
+array_dlpack_deleter(DLManagedTensor *self)
+{
+ PyArrayObject *array = (PyArrayObject *)self->manager_ctx;
+ // This will also free the strides as it's one allocation.
+ PyMem_Free(self->dl_tensor.shape);
+ PyMem_Free(self);
+ Py_XDECREF(array);
+}
+
+/* This is exactly as mandated by dlpack */
+static void dlpack_capsule_deleter(PyObject *self) {
+ if (PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) {
+ return;
+ }
+
+ /* an exception may be in-flight, we must save it in case we create another one */
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+
+ DLManagedTensor *managed =
+ (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME);
+ if (managed == NULL) {
+ PyErr_WriteUnraisable(self);
+ goto done;
+ }
+ /*
+ * the spec says the deleter can be NULL if there is no way for the caller
+ * to provide a reasonable destructor.
+ */
+ if (managed->deleter) {
+ managed->deleter(managed);
+ /* TODO: is the deleter allowed to set a python exception? */
+ assert(!PyErr_Occurred());
+ }
+
+done:
+ PyErr_Restore(type, value, traceback);
+}
+
+/* used internally, almost identical to dlpack_capsule_deleter() */
+static void array_dlpack_internal_capsule_deleter(PyObject *self)
+{
+ /* an exception may be in-flight, we must save it in case we create another one */
+ PyObject *type, *value, *traceback;
+ PyErr_Fetch(&type, &value, &traceback);
+
+ DLManagedTensor *managed =
+ (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_INTERNAL_CAPSULE_NAME);
+ if (managed == NULL) {
+ PyErr_WriteUnraisable(self);
+ goto done;
+ }
+ /*
+ * the spec says the deleter can be NULL if there is no way for the caller
+ * to provide a reasonable destructor.
+ */
+ if (managed->deleter) {
+ managed->deleter(managed);
+ /* TODO: is the deleter allowed to set a python exception? */
+ assert(!PyErr_Occurred());
+ }
+
+done:
+ PyErr_Restore(type, value, traceback);
+}
+
+
+// This function cannot return NULL, but it can fail,
+// So call PyErr_Occurred to check if it failed after
+// calling it.
+static DLDevice
+array_get_dl_device(PyArrayObject *self) {
+ DLDevice ret;
+ ret.device_type = kDLCPU;
+ ret.device_id = 0;
+ PyObject *base = PyArray_BASE(self);
+ // The outer if is due to the fact that NumPy arrays are on the CPU
+ // by default (if not created from DLPack).
+ if (PyCapsule_IsValid(base, NPY_DLPACK_INTERNAL_CAPSULE_NAME)) {
+ DLManagedTensor *managed = PyCapsule_GetPointer(
+ base, NPY_DLPACK_INTERNAL_CAPSULE_NAME);
+ if (managed == NULL) {
+ return ret;
+ }
+ return managed->dl_tensor.device;
+ }
+ return ret;
+}
+
+
+PyObject *
+array_dlpack(PyArrayObject *self,
+ PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames)
+{
+ PyObject *stream = Py_None;
+ NPY_PREPARE_ARGPARSER;
+ if (npy_parse_arguments("__dlpack__", args, len_args, kwnames,
+ "$stream", NULL, &stream, NULL, NULL, NULL)) {
+ return NULL;
+ }
+
+ if (stream != Py_None) {
+ PyErr_SetString(PyExc_RuntimeError, "NumPy only supports "
+ "stream=None.");
+ return NULL;
+ }
+
+ if ( !(PyArray_FLAGS(self) & NPY_ARRAY_WRITEABLE)) {
+ PyErr_SetString(PyExc_TypeError, "NumPy currently only supports "
+ "dlpack for writeable arrays");
+ return NULL;
+ }
+
+ npy_intp itemsize = PyArray_ITEMSIZE(self);
+ int ndim = PyArray_NDIM(self);
+ npy_intp *strides = PyArray_STRIDES(self);
+ npy_intp *shape = PyArray_SHAPE(self);
+
+ if (!PyArray_IS_C_CONTIGUOUS(self) && PyArray_SIZE(self) != 1) {
+ for (int i = 0; i < ndim; ++i) {
+ if (strides[i] % itemsize != 0) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "DLPack only supports strides which are a multiple of "
+ "itemsize.");
+ return NULL;
+ }
+ }
+ }
+
+ DLDataType managed_dtype;
+ PyArray_Descr *dtype = PyArray_DESCR(self);
+
+ if (PyDataType_ISBYTESWAPPED(dtype)) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports native "
+ "byte swapping.");
+ return NULL;
+ }
+
+ managed_dtype.bits = 8 * itemsize;
+ managed_dtype.lanes = 1;
+
+ if (PyDataType_ISSIGNED(dtype)) {
+ managed_dtype.code = kDLInt;
+ }
+ else if (PyDataType_ISUNSIGNED(dtype)) {
+ managed_dtype.code = kDLUInt;
+ }
+ else if (PyDataType_ISFLOAT(dtype)) {
+ // We can't be sure that the dtype is
+ // IEEE or padded.
+ if (itemsize > 8) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE "
+ "floating point types without padding.");
+ return NULL;
+ }
+ managed_dtype.code = kDLFloat;
+ }
+ else if (PyDataType_ISCOMPLEX(dtype)) {
+ // We can't be sure that the dtype is
+ // IEEE or padded.
+ if (itemsize > 16) {
+ PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE "
+ "complex point types without padding.");
+ return NULL;
+ }
+ managed_dtype.code = kDLComplex;
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "DLPack only supports signed/unsigned integers, float "
+ "and complex dtypes.");
+ return NULL;
+ }
+
+ DLDevice device = array_get_dl_device(self);
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+
+ DLManagedTensor *managed = PyMem_Malloc(sizeof(DLManagedTensor));
+ if (managed == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ /*
+ * Note: the `dlpack.h` header suggests/standardizes that `data` must be
+ * 256-byte aligned. We ignore this intentionally, because `__dlpack__`
+ * standardizes that `byte_offset` must be 0 (for now) to not break pytorch:
+ * https://github.com/data-apis/array-api/issues/293#issuecomment-964111413
+ *
+ * We further assume that exporting fully unaligned data is OK even without
+ * `byte_offset` since the standard does not reject it.
+ * Presumably, pytorch will support importing `byte_offset != 0` and NumPy
+ * can choose to use it starting about 2023. At that point, it may be
+ * that NumPy MUST use `byte_offset` to adhere to the standard (as
+ * specified in the header)!
+ */
+ managed->dl_tensor.data = PyArray_DATA(self);
+ managed->dl_tensor.byte_offset = 0;
+ managed->dl_tensor.device = device;
+ managed->dl_tensor.dtype = managed_dtype;
+
+ int64_t *managed_shape_strides = PyMem_Malloc(sizeof(int64_t) * ndim * 2);
+ if (managed_shape_strides == NULL) {
+ PyErr_NoMemory();
+ PyMem_Free(managed);
+ return NULL;
+ }
+
+ int64_t *managed_shape = managed_shape_strides;
+ int64_t *managed_strides = managed_shape_strides + ndim;
+ for (int i = 0; i < ndim; ++i) {
+ managed_shape[i] = shape[i];
+ // Strides in DLPack are items; in NumPy are bytes.
+ managed_strides[i] = strides[i] / itemsize;
+ }
+
+ managed->dl_tensor.ndim = ndim;
+ managed->dl_tensor.shape = managed_shape;
+ managed->dl_tensor.strides = NULL;
+ if (PyArray_SIZE(self) != 1 && !PyArray_IS_C_CONTIGUOUS(self)) {
+ managed->dl_tensor.strides = managed_strides;
+ }
+ managed->dl_tensor.byte_offset = 0;
+ managed->manager_ctx = self;
+ managed->deleter = array_dlpack_deleter;
+
+ PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME,
+ dlpack_capsule_deleter);
+ if (capsule == NULL) {
+ PyMem_Free(managed);
+ PyMem_Free(managed_shape_strides);
+ return NULL;
+ }
+
+ // the capsule holds a reference
+ Py_INCREF(self);
+ return capsule;
+}
+
+PyObject *
+array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args))
+{
+ DLDevice device = array_get_dl_device(self);
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+ return Py_BuildValue("ii", device.device_type, device.device_id);
+}
+
+NPY_NO_EXPORT PyObject *
+_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj) {
+ PyObject *capsule = PyObject_CallMethod((PyObject *)obj->ob_type,
+ "__dlpack__", "O", obj);
+ if (capsule == NULL) {
+ return NULL;
+ }
+
+ DLManagedTensor *managed =
+ (DLManagedTensor *)PyCapsule_GetPointer(capsule,
+ NPY_DLPACK_CAPSULE_NAME);
+
+ if (managed == NULL) {
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ const int ndim = managed->dl_tensor.ndim;
+ if (ndim > NPY_MAXDIMS) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "maxdims of DLPack tensor is higher than the supported "
+ "maxdims.");
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ DLDeviceType device_type = managed->dl_tensor.device.device_type;
+ if (device_type != kDLCPU &&
+ device_type != kDLCUDAHost &&
+ device_type != kDLROCMHost &&
+ device_type != kDLCUDAManaged) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Unsupported device in DLTensor.");
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ if (managed->dl_tensor.dtype.lanes != 1) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Unsupported lanes in DLTensor dtype.");
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ int typenum = -1;
+ const uint8_t bits = managed->dl_tensor.dtype.bits;
+ const npy_intp itemsize = bits / 8;
+ switch (managed->dl_tensor.dtype.code) {
+ case kDLInt:
+ switch (bits)
+ {
+ case 8: typenum = NPY_INT8; break;
+ case 16: typenum = NPY_INT16; break;
+ case 32: typenum = NPY_INT32; break;
+ case 64: typenum = NPY_INT64; break;
+ }
+ break;
+ case kDLUInt:
+ switch (bits)
+ {
+ case 8: typenum = NPY_UINT8; break;
+ case 16: typenum = NPY_UINT16; break;
+ case 32: typenum = NPY_UINT32; break;
+ case 64: typenum = NPY_UINT64; break;
+ }
+ break;
+ case kDLFloat:
+ switch (bits)
+ {
+ case 16: typenum = NPY_FLOAT16; break;
+ case 32: typenum = NPY_FLOAT32; break;
+ case 64: typenum = NPY_FLOAT64; break;
+ }
+ break;
+ case kDLComplex:
+ switch (bits)
+ {
+ case 64: typenum = NPY_COMPLEX64; break;
+ case 128: typenum = NPY_COMPLEX128; break;
+ }
+ break;
+ }
+
+ if (typenum == -1) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "Unsupported dtype in DLTensor.");
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ npy_intp shape[NPY_MAXDIMS];
+ npy_intp strides[NPY_MAXDIMS];
+
+ for (int i = 0; i < ndim; ++i) {
+ shape[i] = managed->dl_tensor.shape[i];
+ // DLPack has elements as stride units, NumPy has bytes.
+ if (managed->dl_tensor.strides != NULL) {
+ strides[i] = managed->dl_tensor.strides[i] * itemsize;
+ }
+ }
+
+ char *data = (char *)managed->dl_tensor.data +
+ managed->dl_tensor.byte_offset;
+
+ PyArray_Descr *descr = PyArray_DescrFromType(typenum);
+ if (descr == NULL) {
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ PyObject *ret = PyArray_NewFromDescr(&PyArray_Type, descr, ndim, shape,
+ managed->dl_tensor.strides != NULL ? strides : NULL, data, 0, NULL);
+ if (ret == NULL) {
+ Py_DECREF(capsule);
+ return NULL;
+ }
+
+ PyObject *new_capsule = PyCapsule_New(managed,
+ NPY_DLPACK_INTERNAL_CAPSULE_NAME,
+ array_dlpack_internal_capsule_deleter);
+ if (new_capsule == NULL) {
+ Py_DECREF(capsule);
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ if (PyArray_SetBaseObject((PyArrayObject *)ret, new_capsule) < 0) {
+ Py_DECREF(capsule);
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ if (PyCapsule_SetName(capsule, NPY_DLPACK_USED_CAPSULE_NAME) < 0) {
+ Py_DECREF(capsule);
+ Py_DECREF(ret);
+ return NULL;
+ }
+
+ Py_DECREF(capsule);
+ return ret;
+}
+
+
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index 05e9e2394..2a61fe39d 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -74,9 +74,9 @@ typedef struct {
#define NPY_DTYPE(descr) ((PyArray_DTypeMeta *)Py_TYPE(descr))
#define NPY_DT_SLOTS(dtype) ((NPY_DType_Slots *)(dtype)->dt_slots)
-#define NPY_DT_is_legacy(dtype) ((dtype)->flags & NPY_DT_LEGACY)
-#define NPY_DT_is_abstract(dtype) ((dtype)->flags & NPY_DT_ABSTRACT)
-#define NPY_DT_is_parametric(dtype) ((dtype)->flags & NPY_DT_PARAMETRIC)
+#define NPY_DT_is_legacy(dtype) (((dtype)->flags & NPY_DT_LEGACY) != 0)
+#define NPY_DT_is_abstract(dtype) (((dtype)->flags & NPY_DT_ABSTRACT) != 0)
+#define NPY_DT_is_parametric(dtype) (((dtype)->flags & NPY_DT_PARAMETRIC) != 0)
/*
* Macros for convenient classmethod calls, since these require
diff --git a/numpy/core/src/multiarray/einsum_sumprod.c.src b/numpy/core/src/multiarray/einsum_sumprod.c.src
index 29ceabd71..3114a5896 100644
--- a/numpy/core/src/multiarray/einsum_sumprod.c.src
+++ b/numpy/core/src/multiarray/einsum_sumprod.c.src
@@ -337,13 +337,13 @@ static NPY_GCC_OPT_3 void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
- const @type@ b@i@ = @from@(data[@i@]);
- const @type@ c@i@ = @from@(data_out[@i@]);
+ const @temptype@ b@i@ = @from@(data[@i@]);
+ const @temptype@ c@i@ = @from@(data_out[@i@]);
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
- const @type@ abc@i@ = scalar * b@i@ + c@i@;
+ const @temptype@ abc@i@ = scalar * b@i@ + c@i@;
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
@@ -353,8 +353,8 @@ static NPY_GCC_OPT_3 void
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data, ++data_out) {
- const @type@ b = @from@(*data);
- const @type@ c = @from@(*data_out);
+ const @temptype@ b = @from@(*data);
+ const @temptype@ c = @from@(*data_out);
*data_out = @to@(scalar * b + c);
}
#endif // NPYV check for @type@
@@ -417,14 +417,14 @@ static void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
- const @type@ a@i@ = @from@(data0[@i@]);
- const @type@ b@i@ = @from@(data1[@i@]);
- const @type@ c@i@ = @from@(data_out[@i@]);
+ const @temptype@ a@i@ = @from@(data0[@i@]);
+ const @temptype@ b@i@ = @from@(data1[@i@]);
+ const @temptype@ c@i@ = @from@(data_out[@i@]);
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
- const @type@ abc@i@ = a@i@ * b@i@ + c@i@;
+ const @temptype@ abc@i@ = a@i@ * b@i@ + c@i@;
/**end repeat2**/
/**begin repeat2
* #i = 0, 1, 2, 3#
@@ -434,9 +434,9 @@ static void
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data0, ++data1, ++data_out) {
- const @type@ a = @from@(*data0);
- const @type@ b = @from@(*data1);
- const @type@ c = @from@(*data_out);
+ const @temptype@ a = @from@(*data0);
+ const @temptype@ b = @from@(*data1);
+ const @temptype@ c = @from@(*data_out);
*data_out = @to@(a * b + c);
}
#endif // NPYV check for @type@
@@ -521,14 +521,14 @@ static NPY_GCC_OPT_3 void
/**begin repeat2
* #i = 0, 1, 2, 3#
*/
- const @type@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
+ const @temptype@ ab@i@ = @from@(data0[@i@]) * @from@(data1[@i@]);
/**end repeat2**/
accum += ab0 + ab1 + ab2 + ab3;
}
#endif // !NPY_DISABLE_OPTIMIZATION
for (; count > 0; --count, ++data0, ++data1) {
- const @type@ a = @from@(*data0);
- const @type@ b = @from@(*data1);
+ const @temptype@ a = @from@(*data0);
+ const @temptype@ b = @from@(*data1);
accum += a * b;
}
#endif // NPYV check for @type@
diff --git a/numpy/core/src/multiarray/experimental_public_dtype_api.c b/numpy/core/src/multiarray/experimental_public_dtype_api.c
index 1e8abe9d6..4b9c7199b 100644
--- a/numpy/core/src/multiarray/experimental_public_dtype_api.c
+++ b/numpy/core/src/multiarray/experimental_public_dtype_api.c
@@ -13,9 +13,10 @@
#include "dtypemeta.h"
#include "array_coercion.h"
#include "convert_datatype.h"
+#include "common_dtype.h"
-#define EXPERIMENTAL_DTYPE_API_VERSION 1
+#define EXPERIMENTAL_DTYPE_API_VERSION 2
typedef struct{
@@ -130,6 +131,14 @@ PyArrayInitDTypeMeta_FromSpec(
return -1;
}
+ if (((PyTypeObject *)DType)->tp_repr == PyArrayDescr_Type.tp_repr
+ || ((PyTypeObject *)DType)->tp_str == PyArrayDescr_Type.tp_str) {
+ PyErr_SetString(PyExc_TypeError,
+ "A custom DType must implement `__repr__` and `__str__` since "
+ "the default inherited version (currently) fails.");
+ return -1;
+ }
+
if (spec->typeobj == NULL || !PyType_Check(spec->typeobj)) {
PyErr_SetString(PyExc_TypeError,
"Not giving a type object is currently not supported, but "
@@ -324,13 +333,41 @@ PyUFunc_AddLoopFromSpec(PyObject *ufunc, PyArrayMethod_Spec *spec)
}
+static int
+PyUFunc_AddPromoter(
+ PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter)
+{
+ if (!PyObject_TypeCheck(ufunc, &PyUFunc_Type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "ufunc object passed is not a ufunc!");
+ return -1;
+ }
+ if (!PyCapsule_CheckExact(promoter)) {
+ PyErr_SetString(PyExc_TypeError,
+ "promoter must (currently) be a PyCapsule.");
+ return -1;
+ }
+ if (PyCapsule_GetPointer(promoter, "numpy._ufunc_promoter") == NULL) {
+ return -1;
+ }
+ PyObject *info = PyTuple_Pack(2, DType_tuple, promoter);
+ if (info == NULL) {
+ return -1;
+ }
+ return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
+}
+
+
NPY_NO_EXPORT PyObject *
_get_experimental_dtype_api(PyObject *NPY_UNUSED(mod), PyObject *arg)
{
static void *experimental_api_table[] = {
&PyUFunc_AddLoopFromSpec,
+ &PyUFunc_AddPromoter,
&PyArrayDTypeMeta_Type,
&PyArrayInitDTypeMeta_FromSpec,
+ &PyArray_CommonDType,
+ &PyArray_PromoteDTypeSequence,
NULL,
};
diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c
index 2c8d1b3b4..e81ca2947 100644
--- a/numpy/core/src/multiarray/getset.c
+++ b/numpy/core/src/multiarray/getset.c
@@ -384,7 +384,23 @@ array_data_set(PyArrayObject *self, PyObject *op, void *NPY_UNUSED(ignored))
}
if (PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA) {
PyArray_XDECREF(self);
- PyDataMem_FREE(PyArray_DATA(self));
+ size_t nbytes = PyArray_NBYTES(self);
+ /*
+ * Allocation will never be 0, see comment in ctors.c
+ * line 820
+ */
+ if (nbytes == 0) {
+ PyArray_Descr *dtype = PyArray_DESCR(self);
+ nbytes = dtype->elsize ? dtype->elsize : 1;
+ }
+ PyObject *handler = PyArray_HANDLER(self);
+ if (handler == NULL) {
+ /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */
+ PyErr_SetString(PyExc_RuntimeError,
+ "no memory handler found but OWNDATA flag set");
+ return -1;
+ }
+ PyDataMem_UserFREE(PyArray_DATA(self), nbytes, handler);
}
if (PyArray_BASE(self)) {
if ((PyArray_FLAGS(self) & NPY_ARRAY_WRITEBACKIFCOPY) ||
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index ee66378a9..086b674c8 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -776,6 +776,7 @@ PyArray_Repeat(PyArrayObject *aop, PyObject *op, int axis)
return NULL;
}
+
/*NUMPY_API
*/
NPY_NO_EXPORT PyObject *
@@ -907,7 +908,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out,
Py_XDECREF(mps[i]);
}
Py_DECREF(ap);
- npy_free_cache(mps, n * sizeof(mps[0]));
+ PyDataMem_FREE(mps);
if (out != NULL && out != obj) {
Py_INCREF(out);
PyArray_ResolveWritebackIfCopy(obj);
@@ -922,7 +923,7 @@ PyArray_Choose(PyArrayObject *ip, PyObject *op, PyArrayObject *out,
Py_XDECREF(mps[i]);
}
Py_XDECREF(ap);
- npy_free_cache(mps, n * sizeof(mps[0]));
+ PyDataMem_FREE(mps);
PyArray_DiscardWritebackIfCopy(obj);
Py_XDECREF(obj);
return NULL;
@@ -962,14 +963,19 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
return 0;
}
+ PyObject *mem_handler = PyDataMem_GetHandler();
+ if (mem_handler == NULL) {
+ return -1;
+ }
it = (PyArrayIterObject *)PyArray_IterAllButAxis((PyObject *)op, &axis);
if (it == NULL) {
+ Py_DECREF(mem_handler);
return -1;
}
size = it->size;
if (needcopy) {
- buffer = npy_alloc_cache(N * elsize);
+ buffer = PyDataMem_UserNEW(N * elsize, mem_handler);
if (buffer == NULL) {
ret = -1;
goto fail;
@@ -1053,12 +1059,14 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
fail:
NPY_END_THREADS_DESCR(PyArray_DESCR(op));
- npy_free_cache(buffer, N * elsize);
+ /* cleanup internal buffer */
+ PyDataMem_UserFREE(buffer, N * elsize, mem_handler);
if (ret < 0 && !PyErr_Occurred()) {
/* Out of memory during sorting or buffer creation */
PyErr_NoMemory();
}
Py_DECREF(it);
+ Py_DECREF(mem_handler);
return ret;
}
@@ -1090,11 +1098,16 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
NPY_BEGIN_THREADS_DEF;
+ PyObject *mem_handler = PyDataMem_GetHandler();
+ if (mem_handler == NULL) {
+ return NULL;
+ }
rop = (PyArrayObject *)PyArray_NewFromDescr(
Py_TYPE(op), PyArray_DescrFromType(NPY_INTP),
PyArray_NDIM(op), PyArray_DIMS(op), NULL, NULL,
0, (PyObject *)op);
if (rop == NULL) {
+ Py_DECREF(mem_handler);
return NULL;
}
rstride = PyArray_STRIDE(rop, axis);
@@ -1102,6 +1115,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
/* Check if there is any argsorting to do */
if (N <= 1 || PyArray_SIZE(op) == 0) {
+ Py_DECREF(mem_handler);
memset(PyArray_DATA(rop), 0, PyArray_NBYTES(rop));
return (PyObject *)rop;
}
@@ -1115,7 +1129,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
size = it->size;
if (needcopy) {
- valbuffer = npy_alloc_cache(N * elsize);
+ valbuffer = PyDataMem_UserNEW(N * elsize, mem_handler);
if (valbuffer == NULL) {
ret = -1;
goto fail;
@@ -1123,7 +1137,8 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
}
if (needidxbuffer) {
- idxbuffer = (npy_intp *)npy_alloc_cache(N * sizeof(npy_intp));
+ idxbuffer = (npy_intp *)PyDataMem_UserNEW(N * sizeof(npy_intp),
+ mem_handler);
if (idxbuffer == NULL) {
ret = -1;
goto fail;
@@ -1212,8 +1227,9 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
fail:
NPY_END_THREADS_DESCR(PyArray_DESCR(op));
- npy_free_cache(valbuffer, N * elsize);
- npy_free_cache(idxbuffer, N * sizeof(npy_intp));
+ /* cleanup internal buffers */
+ PyDataMem_UserFREE(valbuffer, N * elsize, mem_handler);
+ PyDataMem_UserFREE(idxbuffer, N * sizeof(npy_intp), mem_handler);
if (ret < 0) {
if (!PyErr_Occurred()) {
/* Out of memory during sorting or buffer creation */
@@ -1224,6 +1240,7 @@ fail:
}
Py_XDECREF(it);
Py_XDECREF(rit);
+ Py_DECREF(mem_handler);
return (PyObject *)rop;
}
@@ -2398,19 +2415,14 @@ PyArray_CountNonzero(PyArrayObject *self)
npy_intp *strideptr, *innersizeptr;
NPY_BEGIN_THREADS_DEF;
- // Special low-overhead version specific to the boolean/int types
dtype = PyArray_DESCR(self);
- switch(dtype->kind) {
- case 'u':
- case 'i':
- case 'b':
- if (dtype->elsize > 8) {
- break;
- }
- return count_nonzero_int(
- PyArray_NDIM(self), PyArray_BYTES(self), PyArray_DIMS(self),
- PyArray_STRIDES(self), dtype->elsize
- );
+ /* Special low-overhead version specific to the boolean/int types */
+ if (PyArray_ISALIGNED(self) && (
+ PyDataType_ISBOOL(dtype) || PyDataType_ISINTEGER(dtype))) {
+ return count_nonzero_int(
+ PyArray_NDIM(self), PyArray_BYTES(self), PyArray_DIMS(self),
+ PyArray_STRIDES(self), dtype->elsize
+ );
}
nonzero = PyArray_DESCR(self)->f->nonzero;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 391e65f6a..b0b6f42f1 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -26,6 +26,7 @@
#include "shape.h"
#include "strfuncs.h"
#include "array_assign.h"
+#include "npy_dlpack.h"
#include "methods.h"
#include "alloc.h"
@@ -833,15 +834,15 @@ array_astype(PyArrayObject *self,
*/
NPY_CASTING casting = NPY_UNSAFE_CASTING;
NPY_ORDER order = NPY_KEEPORDER;
- int forcecopy = 1, subok = 1;
+ _PyArray_CopyMode forcecopy = 1;
+ int subok = 1;
NPY_PREPARE_ARGPARSER;
-
if (npy_parse_arguments("astype", args, len_args, kwnames,
"dtype", &PyArray_DescrConverter, &dtype,
"|order", &PyArray_OrderConverter, &order,
"|casting", &PyArray_CastingConverter, &casting,
"|subok", &PyArray_PythonPyIntFromInt, &subok,
- "|copy", &PyArray_PythonPyIntFromInt, &forcecopy,
+ "|copy", &PyArray_CopyConverter, &forcecopy,
NULL, NULL, NULL) < 0) {
Py_XDECREF(dtype);
return NULL;
@@ -858,20 +859,29 @@ array_astype(PyArrayObject *self,
* and it's not a subtype if subok is False, then we
* can skip the copy.
*/
- if (!forcecopy && (order == NPY_KEEPORDER ||
- (order == NPY_ANYORDER &&
- (PyArray_IS_C_CONTIGUOUS(self) ||
- PyArray_IS_F_CONTIGUOUS(self))) ||
- (order == NPY_CORDER &&
- PyArray_IS_C_CONTIGUOUS(self)) ||
- (order == NPY_FORTRANORDER &&
- PyArray_IS_F_CONTIGUOUS(self))) &&
- (subok || PyArray_CheckExact(self)) &&
- PyArray_EquivTypes(dtype, PyArray_DESCR(self))) {
+ if (forcecopy != NPY_COPY_ALWAYS &&
+ (order == NPY_KEEPORDER ||
+ (order == NPY_ANYORDER &&
+ (PyArray_IS_C_CONTIGUOUS(self) ||
+ PyArray_IS_F_CONTIGUOUS(self))) ||
+ (order == NPY_CORDER &&
+ PyArray_IS_C_CONTIGUOUS(self)) ||
+ (order == NPY_FORTRANORDER &&
+ PyArray_IS_F_CONTIGUOUS(self))) &&
+ (subok || PyArray_CheckExact(self)) &&
+ PyArray_EquivTypes(dtype, PyArray_DESCR(self))) {
Py_DECREF(dtype);
Py_INCREF(self);
return (PyObject *)self;
}
+
+ if (forcecopy == NPY_COPY_NEVER) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while casting in never copy mode.");
+ Py_DECREF(dtype);
+ return NULL;
+ }
+
if (!PyArray_CanCastArrayTo(self, dtype, casting)) {
PyErr_Clear();
npy_set_invalid_cast_error(
@@ -1821,22 +1831,8 @@ array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol)
descr = PyArray_DESCR(self);
- /* if the python version is below 3.8, the pickle module does not provide
- * built-in support for protocol 5. We try importing the pickle5
- * backport instead */
-#if PY_VERSION_HEX >= 0x03080000
/* we expect protocol 5 to be available in Python 3.8 */
pickle_module = PyImport_ImportModule("pickle");
-#else
- pickle_module = PyImport_ImportModule("pickle5");
- if (pickle_module == NULL) {
- /* for protocol 5, raise a clear ImportError if pickle5 is not found
- */
- PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
- "requires the pickle5 module for Python >=3.6 and <3.8");
- return NULL;
- }
-#endif
if (pickle_module == NULL){
return NULL;
}
@@ -1975,6 +1971,16 @@ array_setstate(PyArrayObject *self, PyObject *args)
return NULL;
}
+ /*
+ * Reassigning fa->descr messes with the reallocation strategy,
+ * since fa could be a 0-d or scalar, and then
+ * PyDataMem_UserFREE will be confused
+ */
+ size_t n_tofree = PyArray_NBYTES(self);
+ if (n_tofree == 0) {
+ PyArray_Descr *dtype = PyArray_DESCR(self);
+ n_tofree = dtype->elsize ? dtype->elsize : 1;
+ }
Py_XDECREF(PyArray_DESCR(self));
fa->descr = typecode;
Py_INCREF(typecode);
@@ -2041,7 +2047,18 @@ array_setstate(PyArrayObject *self, PyObject *args)
}
if ((PyArray_FLAGS(self) & NPY_ARRAY_OWNDATA)) {
- PyDataMem_FREE(PyArray_DATA(self));
+ /*
+ * Allocation will never be 0, see comment in ctors.c
+ * line 820
+ */
+ PyObject *handler = PyArray_HANDLER(self);
+ if (handler == NULL) {
+ /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */
+ PyErr_SetString(PyExc_RuntimeError,
+ "no memory handler found but OWNDATA flag set");
+ return NULL;
+ }
+ PyDataMem_UserFREE(PyArray_DATA(self), n_tofree, handler);
PyArray_CLEARFLAGS(self, NPY_ARRAY_OWNDATA);
}
Py_XDECREF(PyArray_BASE(self));
@@ -2077,7 +2094,6 @@ array_setstate(PyArrayObject *self, PyObject *args)
if (!PyDataType_FLAGCHK(typecode, NPY_LIST_PICKLE)) {
int swap = PyArray_ISBYTESWAPPED(self);
- fa->data = datastr;
/* Bytes should always be considered immutable, but we just grab the
* pointer if they are large, to save memory. */
if (!IsAligned(self) || swap || (len <= 1000)) {
@@ -2086,8 +2102,16 @@ array_setstate(PyArrayObject *self, PyObject *args)
Py_DECREF(rawdata);
Py_RETURN_NONE;
}
- fa->data = PyDataMem_NEW(num);
+ /* Store the handler in case the default is modified */
+ Py_XDECREF(fa->mem_handler);
+ fa->mem_handler = PyDataMem_GetHandler();
+ if (fa->mem_handler == NULL) {
+ Py_DECREF(rawdata);
+ return NULL;
+ }
+ fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
+ Py_DECREF(fa->mem_handler);
Py_DECREF(rawdata);
return PyErr_NoMemory();
}
@@ -2123,7 +2147,12 @@ array_setstate(PyArrayObject *self, PyObject *args)
Py_DECREF(rawdata);
}
else {
+ /* The handlers should never be called in this case */
+ Py_XDECREF(fa->mem_handler);
+ fa->mem_handler = NULL;
+ fa->data = datastr;
if (PyArray_SetBaseObject(self, rawdata) < 0) {
+ Py_DECREF(rawdata);
return NULL;
}
}
@@ -2134,8 +2163,15 @@ array_setstate(PyArrayObject *self, PyObject *args)
if (num == 0 || elsize == 0) {
Py_RETURN_NONE;
}
- fa->data = PyDataMem_NEW(num);
+ /* Store the functions in case the default handler is modified */
+ Py_XDECREF(fa->mem_handler);
+ fa->mem_handler = PyDataMem_GetHandler();
+ if (fa->mem_handler == NULL) {
+ return NULL;
+ }
+ fa->data = PyDataMem_UserNEW(num, PyArray_HANDLER(self));
if (PyArray_DATA(self) == NULL) {
+ Py_DECREF(fa->mem_handler);
return PyErr_NoMemory();
}
if (PyDataType_FLAGCHK(PyArray_DESCR(self), NPY_NEEDS_INIT)) {
@@ -2144,6 +2180,7 @@ array_setstate(PyArrayObject *self, PyObject *args)
PyArray_ENABLEFLAGS(self, NPY_ARRAY_OWNDATA);
fa->base = NULL;
if (_setlist_pkl(self, rawdata) < 0) {
+ Py_DECREF(fa->mem_handler);
return NULL;
}
}
@@ -2209,7 +2246,7 @@ array_dumps(PyArrayObject *self, PyObject *args, PyObject *kwds)
static PyObject *
-array_sizeof(PyArrayObject *self)
+array_sizeof(PyArrayObject *self, PyObject *NPY_UNUSED(args))
{
/* object + dimension and strides */
Py_ssize_t nbytes = Py_TYPE(self)->tp_basicsize +
@@ -2948,5 +2985,13 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
{"view",
(PyCFunction)array_view,
METH_FASTCALL | METH_KEYWORDS, NULL},
+ // For data interchange between libraries
+ {"__dlpack__",
+ (PyCFunction)array_dlpack,
+ METH_FASTCALL | METH_KEYWORDS, NULL},
+
+ {"__dlpack_device__",
+ (PyCFunction)array_dlpack_device,
+ METH_NOARGS, NULL},
{NULL, NULL, 0, NULL} /* sentinel */
};
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index d211f01bc..cf0160a2b 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -70,6 +70,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
#include "get_attr_string.h"
#include "experimental_public_dtype_api.h" /* _get_experimental_dtype_api */
+#include "npy_dlpack.h"
+
/*
*****************************************************************************
** INCLUDE GENERATED CODE **
@@ -1560,7 +1562,7 @@ _prepend_ones(PyArrayObject *arr, int nd, int ndmin, NPY_ORDER order)
static NPY_INLINE PyObject *
_array_fromobject_generic(
- PyObject *op, PyArray_Descr *type, npy_bool copy, NPY_ORDER order,
+ PyObject *op, PyArray_Descr *type, _PyArray_CopyMode copy, NPY_ORDER order,
npy_bool subok, int ndmin)
{
PyArrayObject *oparr = NULL, *ret = NULL;
@@ -1577,12 +1579,17 @@ _array_fromobject_generic(
if (PyArray_CheckExact(op) || (subok && PyArray_Check(op))) {
oparr = (PyArrayObject *)op;
if (type == NULL) {
- if (!copy && STRIDING_OK(oparr, order)) {
+ if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) {
ret = oparr;
Py_INCREF(ret);
goto finish;
}
else {
+ if (copy == NPY_COPY_NEVER) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while creating a new array.");
+ return NULL;
+ }
ret = (PyArrayObject *)PyArray_NewCopy(oparr, order);
goto finish;
}
@@ -1590,12 +1597,17 @@ _array_fromobject_generic(
/* One more chance */
oldtype = PyArray_DESCR(oparr);
if (PyArray_EquivTypes(oldtype, type)) {
- if (!copy && STRIDING_OK(oparr, order)) {
+ if (copy != NPY_COPY_ALWAYS && STRIDING_OK(oparr, order)) {
Py_INCREF(op);
ret = oparr;
goto finish;
}
else {
+ if (copy == NPY_COPY_NEVER) {
+ PyErr_SetString(PyExc_ValueError,
+ "Unable to avoid copy while creating a new array.");
+ return NULL;
+ }
ret = (PyArrayObject *)PyArray_NewCopy(oparr, order);
if (oldtype == type || ret == NULL) {
goto finish;
@@ -1608,9 +1620,12 @@ _array_fromobject_generic(
}
}
- if (copy) {
+ if (copy == NPY_COPY_ALWAYS) {
flags = NPY_ARRAY_ENSURECOPY;
}
+ else if (copy == NPY_COPY_NEVER ) {
+ flags = NPY_ARRAY_ENSURENOCOPY;
+ }
if (order == NPY_CORDER) {
flags |= NPY_ARRAY_C_CONTIGUOUS;
}
@@ -1654,7 +1669,7 @@ array_array(PyObject *NPY_UNUSED(ignored),
{
PyObject *op;
npy_bool subok = NPY_FALSE;
- npy_bool copy = NPY_TRUE;
+ _PyArray_CopyMode copy = NPY_COPY_ALWAYS;
int ndmin = 0;
PyArray_Descr *type = NULL;
NPY_ORDER order = NPY_KEEPORDER;
@@ -1665,7 +1680,7 @@ array_array(PyObject *NPY_UNUSED(ignored),
if (npy_parse_arguments("array", args, len_args, kwnames,
"object", NULL, &op,
"|dtype", &PyArray_DescrConverter2, &type,
- "$copy", &PyArray_BoolConverter, &copy,
+ "$copy", &PyArray_CopyConverter, &copy,
"$order", &PyArray_OrderConverter, &order,
"$subok", &PyArray_BoolConverter, &subok,
"$ndmin", &PyArray_PythonPyIntFromInt, &ndmin,
@@ -4197,7 +4212,7 @@ normalize_axis_index(PyObject *NPY_UNUSED(self),
static PyObject *
-_reload_guard(PyObject *NPY_UNUSED(self)) {
+_reload_guard(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args)) {
static int initialized = 0;
#if !defined(PYPY_VERSION)
@@ -4231,7 +4246,6 @@ _reload_guard(PyObject *NPY_UNUSED(self)) {
Py_RETURN_NONE;
}
-
static struct PyMethodDef array_module_methods[] = {
{"_get_implementing_args",
(PyCFunction)array__get_implementing_args,
@@ -4433,6 +4447,12 @@ static struct PyMethodDef array_module_methods[] = {
{"geterrobj",
(PyCFunction) ufunc_geterr,
METH_VARARGS, NULL},
+ {"get_handler_name",
+ (PyCFunction) get_handler_name,
+ METH_VARARGS, NULL},
+ {"get_handler_version",
+ (PyCFunction) get_handler_version,
+ METH_VARARGS, NULL},
{"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc,
METH_VARARGS, NULL},
{"_get_sfloat_dtype",
@@ -4442,6 +4462,8 @@ static struct PyMethodDef array_module_methods[] = {
{"_reload_guard", (PyCFunction)_reload_guard,
METH_NOARGS,
"Give a warning on reload and big warning in sub-interpreters."},
+ {"_from_dlpack", (PyCFunction)_from_dlpack,
+ METH_O, NULL},
{NULL, NULL, 0, NULL} /* sentinel */
};
@@ -4672,14 +4694,14 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
PyObject *m, *d, *s;
PyObject *c_api;
- /* Initialize CPU features */
- if (npy_cpu_init() < 0) {
- goto err;
- }
-
/* Create the module and add the functions */
m = PyModule_Create(&moduledef);
if (!m) {
+ return NULL;
+ }
+
+ /* Initialize CPU features */
+ if (npy_cpu_init() < 0) {
goto err;
}
@@ -4910,6 +4932,23 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
if (initumath(m) != 0) {
goto err;
}
+ /*
+ * Initialize the default PyDataMem_Handler capsule singleton.
+ */
+ PyDataMem_DefaultHandler = PyCapsule_New(&default_handler, "mem_handler", NULL);
+ if (PyDataMem_DefaultHandler == NULL) {
+ goto err;
+ }
+#if (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM >= 0x07030600)
+ /*
+ * Initialize the context-local current handler
+ * with the default PyDataMem_Handler capsule.
+ */
+ current_handler = PyContextVar_New("current_allocator", PyDataMem_DefaultHandler);
+ if (current_handler == NULL) {
+ goto err;
+ }
+#endif
return m;
err:
@@ -4917,5 +4956,6 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) {
PyErr_SetString(PyExc_RuntimeError,
"cannot load multiarray module.");
}
+ Py_DECREF(m);
return NULL;
}
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 8e072d5f4..2675496ab 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -1190,7 +1190,7 @@ npyiter_resetbasepointers(NewNpyArrayIterObject *self)
}
static PyObject *
-npyiter_reset(NewNpyArrayIterObject *self)
+npyiter_reset(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter == NULL) {
PyErr_SetString(PyExc_ValueError,
@@ -1227,7 +1227,7 @@ npyiter_reset(NewNpyArrayIterObject *self)
* copied.
*/
static PyObject *
-npyiter_copy(NewNpyArrayIterObject *self)
+npyiter_copy(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
NewNpyArrayIterObject *iter;
@@ -1263,7 +1263,7 @@ npyiter_copy(NewNpyArrayIterObject *self)
}
static PyObject *
-npyiter_iternext(NewNpyArrayIterObject *self)
+npyiter_iternext(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter != NULL && self->iternext != NULL &&
!self->finished && self->iternext(self->iter)) {
@@ -1320,7 +1320,8 @@ npyiter_remove_axis(NewNpyArrayIterObject *self, PyObject *args)
}
static PyObject *
-npyiter_remove_multi_index(NewNpyArrayIterObject *self)
+npyiter_remove_multi_index(
+ NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter == NULL) {
PyErr_SetString(PyExc_ValueError,
@@ -1345,7 +1346,8 @@ npyiter_remove_multi_index(NewNpyArrayIterObject *self)
}
static PyObject *
-npyiter_enable_external_loop(NewNpyArrayIterObject *self)
+npyiter_enable_external_loop(
+ NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter == NULL) {
PyErr_SetString(PyExc_ValueError,
@@ -1370,7 +1372,7 @@ npyiter_enable_external_loop(NewNpyArrayIterObject *self)
}
static PyObject *
-npyiter_debug_print(NewNpyArrayIterObject *self)
+npyiter_debug_print(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter != NULL) {
NpyIter_DebugPrint(self->iter);
@@ -2315,7 +2317,7 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op,
}
static PyObject *
-npyiter_enter(NewNpyArrayIterObject *self)
+npyiter_enter(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
if (self->iter == NULL) {
PyErr_SetString(PyExc_RuntimeError, "operation on non-initialized iterator");
@@ -2326,7 +2328,7 @@ npyiter_enter(NewNpyArrayIterObject *self)
}
static PyObject *
-npyiter_close(NewNpyArrayIterObject *self)
+npyiter_close(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
NpyIter *iter = self->iter;
int ret;
@@ -2347,7 +2349,7 @@ static PyObject *
npyiter_exit(NewNpyArrayIterObject *self, PyObject *NPY_UNUSED(args))
{
/* even if called via exception handling, writeback any data */
- return npyiter_close(self);
+ return npyiter_close(self, NULL);
}
static PyMethodDef npyiter_methods[] = {
diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c
index e409e9874..564352f1f 100644
--- a/numpy/core/src/multiarray/scalarapi.c
+++ b/numpy/core/src/multiarray/scalarapi.c
@@ -233,8 +233,12 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr,
PyArray_VectorUnaryFunc* castfunc;
descr = PyArray_DescrFromScalar(scalar);
+ if (descr == NULL) {
+ return -1;
+ }
castfunc = PyArray_GetCastFunc(descr, outcode->type_num);
if (castfunc == NULL) {
+ Py_DECREF(descr);
return -1;
}
if (PyTypeNum_ISEXTENDED(descr->type_num) ||
@@ -254,6 +258,7 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr,
NPY_ARRAY_CARRAY, NULL);
if (aout == NULL) {
Py_DECREF(ain);
+ Py_DECREF(descr);
return -1;
}
castfunc(PyArray_DATA(ain), PyArray_DATA(aout), 1, ain, aout);
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 56f17431a..db1e49db8 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -34,6 +34,16 @@
#include "binop_override.h"
+/*
+ * used for allocating a single scalar, so use the default numpy
+ * memory allocators instead of the (maybe) user overrides
+ */
+NPY_NO_EXPORT void *
+npy_alloc_cache_zero(size_t nmemb, size_t size);
+
+NPY_NO_EXPORT void
+npy_free_cache(void * p, npy_uintp sz);
+
NPY_NO_EXPORT PyBoolScalarObject _PyArrayScalar_BoolValues[] = {
{PyObject_HEAD_INIT(&PyBoolArrType_Type) 0},
{PyObject_HEAD_INIT(&PyBoolArrType_Type) 1},
@@ -209,6 +219,27 @@ gentype_multiply(PyObject *m1, PyObject *m2)
}
/**begin repeat
+ * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+ * LONG, ULONG, LONGLONG, ULONGLONG#
+ * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
+ * npy_long, npy_ulong, npy_longlong, npy_ulonglong#
+ * #c = hh, uhh, h, uh,, u, l, ul, ll, ull#
+ * #Name = Byte, UByte, Short, UShort, Int, UInt,
+ * Long, ULong, LongLong, ULongLong#
+ * #convert = Long*8, LongLong*2#
+ */
+static PyObject *
+@type@_bit_count(PyObject *self, PyObject *NPY_UNUSED(args))
+{
+ @type@ scalar = PyArrayScalar_VAL(self, @Name@);
+ uint8_t count = npy_popcount@c@(scalar);
+ PyObject *result = PyLong_From@convert@(count);
+
+ return result;
+}
+/**end repeat**/
+
+/**begin repeat
*
* #name = positive, negative, absolute, invert, int, float#
*/
@@ -1129,7 +1160,7 @@ gentype_size_get(PyObject *NPY_UNUSED(self), void *NPY_UNUSED(ignored))
}
static PyObject *
-gentype_sizeof(PyObject *self)
+gentype_sizeof(PyObject *self, PyObject *NPY_UNUSED(args))
{
Py_ssize_t nbytes;
PyObject * isz = gentype_itemsize_get(self, NULL);
@@ -1321,7 +1352,7 @@ gentype_imag_get(PyObject *self, void *NPY_UNUSED(ignored))
int elsize;
typecode = PyArray_DescrFromScalar(self);
elsize = typecode->elsize;
- temp = npy_alloc_cache_zero(elsize);
+ temp = npy_alloc_cache_zero(1, elsize);
ret = PyArray_Scalar(temp, typecode, NULL);
npy_free_cache(temp, elsize);
}
@@ -1887,7 +1918,7 @@ static PyObject *
*/
/* Heavily copied from the builtin float.as_integer_ratio */
static PyObject *
-@name@_as_integer_ratio(PyObject *self)
+@name@_as_integer_ratio(PyObject *self, PyObject *NPY_UNUSED(args))
{
#if @is_half@
npy_double val = npy_half_to_double(PyArrayScalar_VAL(self, @Name@));
@@ -1968,7 +1999,7 @@ error:
* #c = f, f, , l#
*/
static PyObject *
-@name@_is_integer(PyObject *self)
+@name@_is_integer(PyObject *self, PyObject *NPY_UNUSED(args))
{
#if @is_half@
npy_double val = npy_half_to_double(PyArrayScalar_VAL(self, @Name@));
@@ -1991,7 +2022,7 @@ static PyObject *
/**end repeat**/
static PyObject *
-integer_is_integer(PyObject *self) {
+integer_is_integer(PyObject *self, PyObject *NPY_UNUSED(args)) {
Py_RETURN_TRUE;
}
@@ -2306,8 +2337,7 @@ static PyMethodDef @name@type_methods[] = {
/**end repeat**/
/**begin repeat
- * #name = byte, short, int, long, longlong, ubyte, ushort,
- * uint, ulong, ulonglong, timedelta, cdouble#
+ * #name = timedelta, cdouble#
*/
static PyMethodDef @name@type_methods[] = {
/* for typing; requires python >= 3.9 */
@@ -2318,6 +2348,23 @@ static PyMethodDef @name@type_methods[] = {
};
/**end repeat**/
+/**begin repeat
+ * #name = byte, ubyte, short, ushort, int, uint,
+ * long, ulong, longlong, ulonglong#
+ */
+static PyMethodDef @name@type_methods[] = {
+ /* for typing; requires python >= 3.9 */
+ {"__class_getitem__",
+ (PyCFunction)numbertype_class_getitem,
+ METH_CLASS | METH_O, NULL},
+ {"bit_count",
+ (PyCFunction)npy_@name@_bit_count,
+ METH_NOARGS, NULL},
+ {NULL, NULL, 0, NULL} /* sentinel */
+};
+/**end repeat**/
+
+
/************* As_mapping functions for void array scalar ************/
static Py_ssize_t
@@ -3151,7 +3198,10 @@ void_arrtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
(int) NPY_MAX_INT);
return NULL;
}
- destptr = npy_alloc_cache_zero(memu);
+ if (memu == 0) {
+ memu = 1;
+ }
+ destptr = npy_alloc_cache_zero(memu, 1);
if (destptr == NULL) {
return PyErr_NoMemory();
}
@@ -4092,6 +4142,17 @@ initialize_numeric_types(void)
/**end repeat**/
/**begin repeat
+ * #name = byte, short, int, long, longlong,
+ * ubyte, ushort, uint, ulong, ulonglong#
+ * #Name = Byte, Short, Int, Long, LongLong,
+ * UByte, UShort, UInt, ULong, ULongLong#
+ */
+
+ Py@Name@ArrType_Type.tp_methods = @name@type_methods;
+
+ /**end repeat**/
+
+ /**begin repeat
* #name = half, float, double, longdouble#
* #Name = Half, Float, Double, LongDouble#
*/
diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c
index 5a4e8c0f3..162abd6a4 100644
--- a/numpy/core/src/multiarray/shape.c
+++ b/numpy/core/src/multiarray/shape.c
@@ -121,8 +121,16 @@ PyArray_Resize(PyArrayObject *self, PyArray_Dims *newshape, int refcheck,
}
/* Reallocate space if needed - allocating 0 is forbidden */
- new_data = PyDataMem_RENEW(
- PyArray_DATA(self), newnbytes == 0 ? elsize : newnbytes);
+ PyObject *handler = PyArray_HANDLER(self);
+ if (handler == NULL) {
+ /* This can happen if someone arbitrarily sets NPY_ARRAY_OWNDATA */
+ PyErr_SetString(PyExc_RuntimeError,
+ "no memory handler found but OWNDATA flag set");
+ return NULL;
+ }
+ new_data = PyDataMem_UserRENEW(PyArray_DATA(self),
+ newnbytes == 0 ? elsize : newnbytes,
+ handler);
if (new_data == NULL) {
PyErr_SetString(PyExc_MemoryError,
"cannot allocate memory for array");
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
index cae84befe..5b418342f 100644
--- a/numpy/core/src/npymath/npy_math_internal.h.src
+++ b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -55,6 +55,29 @@
*/
#include "npy_math_private.h"
+/* Magic binary numbers used by bit_count
+ * For type T, the magic numbers are computed as follows:
+ * Magic[0]: 01 01 01 01 01 01... = (T)~(T)0/3
+ * Magic[1]: 0011 0011 0011... = (T)~(T)0/15 * 3
+ * Magic[2]: 00001111 00001111... = (T)~(T)0/255 * 15
+ * Magic[3]: 00000001 00000001... = (T)~(T)0/255
+ *
+ * Counting bits set, in parallel
+ * Based on: http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ *
+ * Generic Algorithm for type T:
+ * a = a - ((a >> 1) & (T)~(T)0/3);
+ * a = (a & (T)~(T)0/15*3) + ((a >> 2) & (T)~(T)0/15*3);
+ * a = (a + (a >> 4)) & (T)~(T)0/255*15;
+ * c = (T)(a * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT;
+*/
+
+static const npy_uint8 MAGIC8[] = {0x55u, 0x33u, 0x0Fu, 0x01u};
+static const npy_uint16 MAGIC16[] = {0x5555u, 0x3333u, 0x0F0Fu, 0x0101u};
+static const npy_uint32 MAGIC32[] = {0x55555555ul, 0x33333333ul, 0x0F0F0F0Ful, 0x01010101ul};
+static const npy_uint64 MAGIC64[] = {0x5555555555555555ull, 0x3333333333333333ull, 0x0F0F0F0F0F0F0F0Full, 0x0101010101010101ull};
+
+
/*
*****************************************************************************
** BASIC MATH FUNCTIONS **
@@ -454,10 +477,16 @@ NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp)
/**begin repeat
* #type = npy_longdouble, npy_double, npy_float#
+ * #TYPE = LONGDOUBLE, DOUBLE, FLOAT#
* #c = l,,f#
* #C = L,,F#
*/
-
+#undef NPY__FP_SFX
+#if NPY_SIZEOF_@TYPE@ == NPY_SIZEOF_DOUBLE
+ #define NPY__FP_SFX(X) X
+#else
+ #define NPY__FP_SFX(X) NPY_CAT(X, @c@)
+#endif
/*
* On arm64 macOS, there's a bug with sin, cos, and tan where they don't
* raise "invalid" when given INFINITY as input.
@@ -483,7 +512,7 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x)
return (x - x);
}
#endif
- return @kind@@c@(x);
+ return NPY__FP_SFX(@kind@)(x);
}
#endif
@@ -498,7 +527,7 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x)
#ifdef HAVE_@KIND@@C@
NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y)
{
- return @kind@@c@(x, y);
+ return NPY__FP_SFX(@kind@)(x, y);
}
#endif
/**end repeat1**/
@@ -506,21 +535,21 @@ NPY_INPLACE @type@ npy_@kind@@c@(@type@ x, @type@ y)
#ifdef HAVE_MODF@C@
NPY_INPLACE @type@ npy_modf@c@(@type@ x, @type@ *iptr)
{
- return modf@c@(x, iptr);
+ return NPY__FP_SFX(modf)(x, iptr);
}
#endif
#ifdef HAVE_LDEXP@C@
NPY_INPLACE @type@ npy_ldexp@c@(@type@ x, int exp)
{
- return ldexp@c@(x, exp);
+ return NPY__FP_SFX(ldexp)(x, exp);
}
#endif
#ifdef HAVE_FREXP@C@
NPY_INPLACE @type@ npy_frexp@c@(@type@ x, int* exp)
{
- return frexp@c@(x, exp);
+ return NPY__FP_SFX(frexp)(x, exp);
}
#endif
@@ -543,10 +572,10 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x)
#else
NPY_INPLACE @type@ npy_cbrt@c@(@type@ x)
{
- return cbrt@c@(x);
+ return NPY__FP_SFX(cbrt)(x);
}
#endif
-
+#undef NPY__FP_SFX
/**end repeat**/
@@ -556,10 +585,16 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x)
/**begin repeat
* #type = npy_float, npy_double, npy_longdouble#
+ * #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
* #c = f, ,l#
* #C = F, ,L#
*/
-
+#undef NPY__FP_SFX
+#if NPY_SIZEOF_@TYPE@ == NPY_SIZEOF_DOUBLE
+ #define NPY__FP_SFX(X) X
+#else
+ #define NPY__FP_SFX(X) NPY_CAT(X, @c@)
+#endif
@type@ npy_heaviside@c@(@type@ x, @type@ h0)
{
if (npy_isnan(x)) {
@@ -576,10 +611,10 @@ NPY_INPLACE @type@ npy_cbrt@c@(@type@ x)
}
}
-#define LOGE2 NPY_LOGE2@c@
-#define LOG2E NPY_LOG2E@c@
-#define RAD2DEG (180.0@c@/NPY_PI@c@)
-#define DEG2RAD (NPY_PI@c@/180.0@c@)
+#define LOGE2 NPY__FP_SFX(NPY_LOGE2)
+#define LOG2E NPY__FP_SFX(NPY_LOG2E)
+#define RAD2DEG (NPY__FP_SFX(180.0)/NPY__FP_SFX(NPY_PI))
+#define DEG2RAD (NPY__FP_SFX(NPY_PI)/NPY__FP_SFX(180.0))
NPY_INPLACE @type@ npy_rad2deg@c@(@type@ x)
{
@@ -733,7 +768,7 @@ npy_divmod@c@(@type@ a, @type@ b, @type@ *modulus)
#undef LOG2E
#undef RAD2DEG
#undef DEG2RAD
-
+#undef NPY__FP_SFX
/**end repeat**/
/**begin repeat
@@ -814,3 +849,66 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
}
/**end repeat1**/
/**end repeat**/
+
+
+#define __popcnt32 __popcnt
+/**begin repeat
+ *
+ * #type = ubyte, ushort, uint, ulong, ulonglong#
+ * #STYPE = BYTE, SHORT, INT, LONG, LONGLONG#
+ * #c = hh, h, , l, ll#
+ */
+#undef TO_BITS_LEN
+#if 0
+/**begin repeat1
+ * #len = 8, 16, 32, 64#
+ */
+#elif NPY_BITSOF_@STYPE@ == @len@
+ #define TO_BITS_LEN(X) X##@len@
+/**end repeat1**/
+#endif
+
+
+NPY_INPLACE uint8_t
+npy_popcount_parallel@c@(npy_@type@ a)
+{
+ a = a - ((a >> 1) & (npy_@type@) TO_BITS_LEN(MAGIC)[0]);
+ a = ((a & (npy_@type@) TO_BITS_LEN(MAGIC)[1])) + ((a >> 2) & (npy_@type@) TO_BITS_LEN(MAGIC)[1]);
+ a = (a + (a >> 4)) & (npy_@type@) TO_BITS_LEN(MAGIC)[2];
+ return (npy_@type@) (a * (npy_@type@) TO_BITS_LEN(MAGIC)[3]) >> ((NPY_SIZEOF_@STYPE@ - 1) * CHAR_BIT);
+}
+
+NPY_INPLACE uint8_t
+npy_popcountu@c@(npy_@type@ a)
+{
+/* use built-in popcount if present, else use our implementation */
+#if (defined(__clang__) || defined(__GNUC__)) && NPY_BITSOF_@STYPE@ >= 32
+ return __builtin_popcount@c@(a);
+#elif defined(_MSC_VER) && NPY_BITSOF_@STYPE@ >= 16
+ /* no builtin __popcnt64 for 32 bits */
+ #if defined(_WIN64) || (defined(_WIN32) && NPY_BITSOF_@STYPE@ != 64)
+ return TO_BITS_LEN(__popcnt)(a);
+ /* split 64 bit number into two 32 bit ints and return sum of counts */
+ #elif (defined(_WIN32) && NPY_BITSOF_@STYPE@ == 64)
+ npy_uint32 left = (npy_uint32) (a>>32);
+ npy_uint32 right = (npy_uint32) a;
+ return __popcnt32(left) + __popcnt32(right);
+ #endif
+#else
+ return npy_popcount_parallel@c@(a);
+#endif
+}
+/**end repeat**/
+
+/**begin repeat
+ *
+ * #type = byte, short, int, long, longlong#
+ * #c = hh, h, , l, ll#
+ */
+NPY_INPLACE uint8_t
+npy_popcount@c@(npy_@type@ a)
+{
+ /* Return popcount of abs(a) */
+ return npy_popcountu@c@(a < 0 ? -a : a);
+}
+/**end repeat**/
diff --git a/numpy/core/src/npymath/npy_math_private.h b/numpy/core/src/npymath/npy_math_private.h
index 212d11a0b..7ca0c5ba0 100644
--- a/numpy/core/src/npymath/npy_math_private.h
+++ b/numpy/core/src/npymath/npy_math_private.h
@@ -19,7 +19,13 @@
#define _NPY_MATH_PRIVATE_H_
#include <Python.h>
+#ifdef __cplusplus
+#include <cmath>
+using std::isgreater;
+using std::isless;
+#else
#include <math.h>
+#endif
#include "npy_config.h"
#include "npy_fpmath.h"
@@ -507,17 +513,29 @@ typedef union {
#else /* !_MSC_VER */
typedef union {
npy_cdouble npy_z;
+#ifdef __cplusplus
+ std::complex<double> c99z;
+#else
complex double c99_z;
+#endif
} __npy_cdouble_to_c99_cast;
typedef union {
npy_cfloat npy_z;
+#ifdef __cplusplus
+ std::complex<float> c99z;
+#else
complex float c99_z;
+#endif
} __npy_cfloat_to_c99_cast;
typedef union {
npy_clongdouble npy_z;
+#ifdef __cplusplus
+ std::complex<long double> c99_z;
+#else
complex long double c99_z;
+#endif
} __npy_clongdouble_to_c99_cast;
#endif /* !_MSC_VER */
diff --git a/numpy/core/src/npysort/radixsort.c.src b/numpy/core/src/npysort/radixsort.c.src
deleted file mode 100644
index 99d8ed42a..000000000
--- a/numpy/core/src/npysort/radixsort.c.src
+++ /dev/null
@@ -1,231 +0,0 @@
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "npy_sort.h"
-#include "npysort_common.h"
-#include <stdlib.h>
-
-/*
- *****************************************************************************
- ** INTEGER SORTS **
- *****************************************************************************
- */
-
-
-/**begin repeat
- *
- * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
- * LONGLONG, ULONGLONG#
- * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
- * longlong, ulonglong#
- * #type = npy_ubyte, npy_ubyte, npy_ubyte, npy_ushort, npy_ushort, npy_uint,
- * npy_uint, npy_ulong, npy_ulong, npy_ulonglong, npy_ulonglong#
- * #sign = 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
- * #floating = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0#
- */
-
-// Reference: https://github.com/eloj/radix-sorting#-key-derivation
-#if @sign@
- // Floating-point is currently disabled.
- // Floating-point tests succeed for double and float on macOS but not on Windows/Linux.
- // Basic sorting tests succeed but others relying on sort fail.
- // Possibly related to floating-point normalisation or multiple NaN reprs? Not sure.
- #if @floating@
- // For floats, we invert the key if the sign bit is set, else we invert the sign bit.
- #define KEY_OF(x) ((x) ^ (-((x) >> (sizeof(@type@) * 8 - 1)) | ((@type@)1 << (sizeof(@type@) * 8 - 1))))
- #else
- // For signed ints, we flip the sign bit so the negatives are below the positives.
- #define KEY_OF(x) ((x) ^ ((@type@)1 << (sizeof(@type@) * 8 - 1)))
- #endif
-#else
- // For unsigned ints, the key is as-is
- #define KEY_OF(x) (x)
-#endif
-
-static inline npy_ubyte
-nth_byte_@suff@(@type@ key, npy_intp l) {
- return (key >> (l << 3)) & 0xFF;
-}
-
-static @type@*
-radixsort0_@suff@(@type@ *arr, @type@ *aux, npy_intp num)
-{
- npy_intp cnt[sizeof(@type@)][1 << 8] = { { 0 } };
- npy_intp i;
- size_t l;
- @type@ key0 = KEY_OF(arr[0]);
- size_t ncols = 0;
- npy_ubyte cols[sizeof(@type@)];
-
- for (i = 0; i < num; i++) {
- @type@ k = KEY_OF(arr[i]);
-
- for (l = 0; l < sizeof(@type@); l++) {
- cnt[l][nth_byte_@suff@(k, l)]++;
- }
- }
-
- for (l = 0; l < sizeof(@type@); l++) {
- if (cnt[l][nth_byte_@suff@(key0, l)] != num) {
- cols[ncols++] = l;
- }
- }
-
- for (l = 0; l < ncols; l++) {
- npy_intp a = 0;
- for (i = 0; i < 256; i++) {
- npy_intp b = cnt[cols[l]][i];
- cnt[cols[l]][i] = a;
- a += b;
- }
- }
-
- for (l = 0; l < ncols; l++) {
- @type@* temp;
- for (i = 0; i < num; i++) {
- @type@ k = KEY_OF(arr[i]);
- npy_intp dst = cnt[cols[l]][nth_byte_@suff@(k, cols[l])]++;
- aux[dst] = arr[i];
- }
-
- temp = aux;
- aux = arr;
- arr = temp;
- }
-
- return arr;
-}
-
-NPY_NO_EXPORT int
-radixsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr))
-{
- void *sorted;
- @type@ *aux;
- @type@ *arr = start;
- @type@ k1, k2;
- npy_bool all_sorted = 1;
-
- if (num < 2) {
- return 0;
- }
-
- k1 = KEY_OF(arr[0]);
- for (npy_intp i = 1; i < num; i++) {
- k2 = KEY_OF(arr[i]);
- if (k1 > k2) {
- all_sorted = 0;
- break;
- }
- k1 = k2;
- }
-
- if (all_sorted) {
- return 0;
- }
-
- aux = malloc(num * sizeof(@type@));
- if (aux == NULL) {
- return -NPY_ENOMEM;
- }
-
- sorted = radixsort0_@suff@(start, aux, num);
- if (sorted != start) {
- memcpy(start, sorted, num * sizeof(@type@));
- }
-
- free(aux);
- return 0;
-}
-
-static npy_intp*
-aradixsort0_@suff@(@type@ *arr, npy_intp *aux, npy_intp *tosort, npy_intp num)
-{
- npy_intp cnt[sizeof(@type@)][1 << 8] = { { 0 } };
- npy_intp i;
- size_t l;
- @type@ key0 = KEY_OF(arr[0]);
- size_t ncols = 0;
- npy_ubyte cols[sizeof(@type@)];
-
- for (i = 0; i < num; i++) {
- @type@ k = KEY_OF(arr[i]);
-
- for (l = 0; l < sizeof(@type@); l++) {
- cnt[l][nth_byte_@suff@(k, l)]++;
- }
- }
-
- for (l = 0; l < sizeof(@type@); l++) {
- if (cnt[l][nth_byte_@suff@(key0, l)] != num) {
- cols[ncols++] = l;
- }
- }
-
- for (l = 0; l < ncols; l++) {
- npy_intp a = 0;
- for (i = 0; i < 256; i++) {
- npy_intp b = cnt[cols[l]][i];
- cnt[cols[l]][i] = a;
- a += b;
- }
- }
-
- for (l = 0; l < ncols; l++) {
- npy_intp* temp;
- for (i = 0; i < num; i++) {
- @type@ k = KEY_OF(arr[tosort[i]]);
- npy_intp dst = cnt[cols[l]][nth_byte_@suff@(k, cols[l])]++;
- aux[dst] = tosort[i];
- }
-
- temp = aux;
- aux = tosort;
- tosort = temp;
- }
-
- return tosort;
-}
-
-NPY_NO_EXPORT int
-aradixsort_@suff@(void *start, npy_intp* tosort, npy_intp num, void *NPY_UNUSED(varr))
-{
- npy_intp *sorted;
- npy_intp *aux;
- @type@ *arr = start;
- @type@ k1, k2;
- npy_bool all_sorted = 1;
-
- if (num < 2) {
- return 0;
- }
-
- k1 = KEY_OF(arr[tosort[0]]);
- for (npy_intp i = 1; i < num; i++) {
- k2 = KEY_OF(arr[tosort[i]]);
- if (k1 > k2) {
- all_sorted = 0;
- break;
- }
- k1 = k2;
- }
-
- if (all_sorted) {
- return 0;
- }
-
- aux = malloc(num * sizeof(npy_intp));
- if (aux == NULL) {
- return -NPY_ENOMEM;
- }
-
- sorted = aradixsort0_@suff@(start, aux, tosort, num);
- if (sorted != tosort) {
- memcpy(tosort, sorted, num * sizeof(npy_intp));
- }
-
- free(aux);
- return 0;
-}
-
-#undef KEY_OF
-
-/**end repeat**/
diff --git a/numpy/core/src/npysort/radixsort.cpp b/numpy/core/src/npysort/radixsort.cpp
new file mode 100644
index 000000000..017ea43b6
--- /dev/null
+++ b/numpy/core/src/npysort/radixsort.cpp
@@ -0,0 +1,354 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include "npy_sort.h"
+#include "npysort_common.h"
+
+#include "../common/numpy_tag.h"
+#include <stdlib.h>
+#include <type_traits>
+
+/*
+ *****************************************************************************
+ ** INTEGER SORTS **
+ *****************************************************************************
+ */
+
+// Reference: https://github.com/eloj/radix-sorting#-key-derivation
+template <class T>
+T
+KEY_OF(T x)
+{
+ // Floating-point is currently disabled.
+ // Floating-point tests succeed for double and float on macOS but not on
+ // Windows/Linux. Basic sorting tests succeed but others relying on sort
+ // fail. Possibly related to floating-point normalisation or multiple NaN
+ // reprs? Not sure.
+ if (std::is_floating_point<T>::value) {
+ // For floats, we invert the key if the sign bit is set, else we invert
+ // the sign bit.
+ return ((x) ^ (-((x) >> (sizeof(T) * 8 - 1)) |
+ ((T)1 << (sizeof(T) * 8 - 1))));
+ }
+ else if (std::is_signed<T>::value) {
+ // For signed ints, we flip the sign bit so the negatives are below the
+ // positives.
+ return ((x) ^ ((T)1 << (sizeof(T) * 8 - 1)));
+ }
+ else {
+ return x;
+ }
+}
+
+template <class T>
+static inline npy_ubyte
+nth_byte(T key, npy_intp l)
+{
+ return (key >> (l << 3)) & 0xFF;
+}
+
+template <class T>
+static T *
+radixsort0(T *start, T *aux, npy_intp num)
+{
+ npy_intp cnt[sizeof(T)][1 << 8] = {{0}};
+ T key0 = KEY_OF(start[0]);
+
+ for (npy_intp i = 0; i < num; i++) {
+ T k = KEY_OF(start[i]);
+
+ for (size_t l = 0; l < sizeof(T); l++) {
+ cnt[l][nth_byte(k, l)]++;
+ }
+ }
+
+ size_t ncols = 0;
+ npy_ubyte cols[sizeof(T)];
+ for (size_t l = 0; l < sizeof(T); l++) {
+ if (cnt[l][nth_byte(key0, l)] != num) {
+ cols[ncols++] = l;
+ }
+ }
+
+ for (size_t l = 0; l < ncols; l++) {
+ npy_intp a = 0;
+ for (npy_intp i = 0; i < 256; i++) {
+ npy_intp b = cnt[cols[l]][i];
+ cnt[cols[l]][i] = a;
+ a += b;
+ }
+ }
+
+ for (size_t l = 0; l < ncols; l++) {
+ T *temp;
+ for (npy_intp i = 0; i < num; i++) {
+ T k = KEY_OF(start[i]);
+ npy_intp dst = cnt[cols[l]][nth_byte(k, cols[l])]++;
+ aux[dst] = start[i];
+ }
+
+ temp = aux;
+ aux = start;
+ start = temp;
+ }
+
+ return start;
+}
+
+template <class T>
+static int
+radixsort_(T *start, npy_intp num)
+{
+ if (num < 2) {
+ return 0;
+ }
+
+ npy_bool all_sorted = 1;
+ T k1 = KEY_OF(start[0]), k2;
+ for (npy_intp i = 1; i < num; i++) {
+ k2 = KEY_OF(start[i]);
+ if (k1 > k2) {
+ all_sorted = 0;
+ break;
+ }
+ k1 = k2;
+ }
+
+ if (all_sorted) {
+ return 0;
+ }
+
+ T *aux = (T *)malloc(num * sizeof(T));
+ if (aux == nullptr) {
+ return -NPY_ENOMEM;
+ }
+
+ T *sorted = radixsort0(start, aux, num);
+ if (sorted != start) {
+ memcpy(start, sorted, num * sizeof(T));
+ }
+
+ free(aux);
+ return 0;
+}
+
+template <class T>
+static int
+radixsort(void *start, npy_intp num)
+{
+ return radixsort_((T *)start, num);
+}
+
+template <class T>
+static npy_intp *
+aradixsort0(T *start, npy_intp *aux, npy_intp *tosort, npy_intp num)
+{
+ npy_intp cnt[sizeof(T)][1 << 8] = {{0}};
+ T key0 = KEY_OF(start[0]);
+
+ for (npy_intp i = 0; i < num; i++) {
+ T k = KEY_OF(start[i]);
+
+ for (size_t l = 0; l < sizeof(T); l++) {
+ cnt[l][nth_byte(k, l)]++;
+ }
+ }
+
+ size_t ncols = 0;
+ npy_ubyte cols[sizeof(T)];
+ for (size_t l = 0; l < sizeof(T); l++) {
+ if (cnt[l][nth_byte(key0, l)] != num) {
+ cols[ncols++] = l;
+ }
+ }
+
+ for (size_t l = 0; l < ncols; l++) {
+ npy_intp a = 0;
+ for (npy_intp i = 0; i < 256; i++) {
+ npy_intp b = cnt[cols[l]][i];
+ cnt[cols[l]][i] = a;
+ a += b;
+ }
+ }
+
+ for (size_t l = 0; l < ncols; l++) {
+ npy_intp *temp;
+ for (npy_intp i = 0; i < num; i++) {
+ T k = KEY_OF(start[tosort[i]]);
+ npy_intp dst = cnt[cols[l]][nth_byte(k, cols[l])]++;
+ aux[dst] = tosort[i];
+ }
+
+ temp = aux;
+ aux = tosort;
+ tosort = temp;
+ }
+
+ return tosort;
+}
+
+template <class T>
+static int
+aradixsort_(T *start, npy_intp *tosort, npy_intp num)
+{
+ npy_intp *sorted;
+ npy_intp *aux;
+ T k1, k2;
+ npy_bool all_sorted = 1;
+
+ if (num < 2) {
+ return 0;
+ }
+
+ k1 = KEY_OF(start[tosort[0]]);
+ for (npy_intp i = 1; i < num; i++) {
+ k2 = KEY_OF(start[tosort[i]]);
+ if (k1 > k2) {
+ all_sorted = 0;
+ break;
+ }
+ k1 = k2;
+ }
+
+ if (all_sorted) {
+ return 0;
+ }
+
+ aux = (npy_intp *)malloc(num * sizeof(npy_intp));
+ if (aux == NULL) {
+ return -NPY_ENOMEM;
+ }
+
+ sorted = aradixsort0(start, aux, tosort, num);
+ if (sorted != tosort) {
+ memcpy(tosort, sorted, num * sizeof(npy_intp));
+ }
+
+ free(aux);
+ return 0;
+}
+
+template <class T>
+static int
+aradixsort(void *start, npy_intp *tosort, npy_intp num)
+{
+ return aradixsort_((T *)start, tosort, num);
+}
+
+extern "C" {
+NPY_NO_EXPORT int
+radixsort_bool(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_bool>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_byte(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_byte>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_ubyte(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_ubyte>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_short(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_short>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_ushort(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_ushort>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_int(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_int>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_uint(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_uint>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_long(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_long>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_ulong(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_ulong>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_longlong(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_longlong>(vec, cnt);
+}
+NPY_NO_EXPORT int
+radixsort_ulonglong(void *vec, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return radixsort<npy_ulonglong>(vec, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_bool>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_byte>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_ubyte(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_ubyte>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_short(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_short>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_ushort(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_ushort>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_int(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_int>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_uint>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_long(void *vec, npy_intp *ind, npy_intp cnt, void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_long>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_ulong(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_ulong>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_longlong(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_longlong>(vec, ind, cnt);
+}
+NPY_NO_EXPORT int
+aradixsort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt,
+ void *NPY_UNUSED(null))
+{
+ return aradixsort<npy_ulonglong>(vec, ind, cnt);
+}
+}
diff --git a/numpy/core/src/umath/_scaled_float_dtype.c b/numpy/core/src/umath/_scaled_float_dtype.c
index eeef33a3d..b6c19362a 100644
--- a/numpy/core/src/umath/_scaled_float_dtype.c
+++ b/numpy/core/src/umath/_scaled_float_dtype.c
@@ -398,6 +398,42 @@ float_to_from_sfloat_resolve_descriptors(
}
+/*
+ * Cast to boolean (for testing the logical functions a bit better).
+ */
+static int
+cast_sfloat_to_bool(PyArrayMethod_Context *NPY_UNUSED(context),
+ char *const data[], npy_intp const dimensions[],
+ npy_intp const strides[], NpyAuxData *NPY_UNUSED(auxdata))
+{
+ npy_intp N = dimensions[0];
+ char *in = data[0];
+ char *out = data[1];
+ for (npy_intp i = 0; i < N; i++) {
+ *(npy_bool *)out = *(double *)in != 0;
+ in += strides[0];
+ out += strides[1];
+ }
+ return 0;
+}
+
+static NPY_CASTING
+sfloat_to_bool_resolve_descriptors(
+ PyArrayMethodObject *NPY_UNUSED(self),
+ PyArray_DTypeMeta *NPY_UNUSED(dtypes[2]),
+ PyArray_Descr *given_descrs[2],
+ PyArray_Descr *loop_descrs[2])
+{
+ Py_INCREF(given_descrs[0]);
+ loop_descrs[0] = given_descrs[0];
+ if (loop_descrs[0] == NULL) {
+ return -1;
+ }
+ loop_descrs[1] = PyArray_DescrFromType(NPY_BOOL); /* cannot fail */
+ return NPY_UNSAFE_CASTING;
+}
+
+
static int
init_casts(void)
{
@@ -453,6 +489,22 @@ init_casts(void)
return -1;
}
+ slots[0].slot = NPY_METH_resolve_descriptors;
+ slots[0].pfunc = &sfloat_to_bool_resolve_descriptors;
+ slots[1].slot = NPY_METH_strided_loop;
+ slots[1].pfunc = &cast_sfloat_to_bool;
+ slots[2].slot = 0;
+ slots[2].pfunc = NULL;
+
+ spec.name = "sfloat_to_bool_cast";
+ dtypes[0] = &PyArray_SFloatDType;
+ dtypes[1] = PyArray_DTypeFromTypeNum(NPY_BOOL);
+ Py_DECREF(dtypes[1]); /* immortal anyway */
+
+ if (PyArray_AddCastingImplementation_FromSpec(&spec, 0)) {
+ return -1;
+ }
+
return 0;
}
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index 33d8539d5..ce42fc271 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -400,6 +400,16 @@ addUfuncs(PyObject *dictionary) {
}
PyDict_SetItemString(dictionary, "always_error", f);
Py_DECREF(f);
+ f = PyUFunc_FromFuncAndDataAndSignature(always_error_functions,
+ always_error_data, always_error_signatures, 1, 2, 1, PyUFunc_None,
+ "always_error_gufunc",
+ "simply, broken, gufunc that sets an error (but releases the GIL).",
+ 0, "(i),()->()");
+ if (f == NULL) {
+ return -1;
+ }
+ PyDict_SetItemString(dictionary, "always_error_gufunc", f);
+ Py_DECREF(f);
f = PyUFunc_FromFuncAndDataAndSignature(inner1d_functions, inner1d_data,
inner1d_signatures, 2, 2, 1, PyUFunc_None, "inner1d",
"inner on the last dimension and broadcast on the rest \n"
diff --git a/numpy/core/src/umath/clip.c.src b/numpy/core/src/umath/clip.c.src
deleted file mode 100644
index bc966b7ac..000000000
--- a/numpy/core/src/umath/clip.c.src
+++ /dev/null
@@ -1,120 +0,0 @@
-/**
- * This module provides the inner loops for the clip ufunc
- */
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-#define _UMATHMODULE
-#define _MULTIARRAYMODULE
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-
-#include "numpy/halffloat.h"
-#include "numpy/npy_math.h"
-#include "numpy/ndarraytypes.h"
-#include "numpy/npy_common.h"
-#include "numpy/utils.h"
-#include "fast_loop_macros.h"
-
-/*
- * Produce macros that perform nan/nat-propagating min and max
- */
-
-/**begin repeat
- * #name = BOOL,
- * BYTE, UBYTE, SHORT, USHORT, INT, UINT,
- * LONG, ULONG, LONGLONG, ULONGLONG#
- */
-#define _NPY_@name@_MIN(a, b) PyArray_MIN(a, b)
-#define _NPY_@name@_MAX(a, b) PyArray_MAX(a, b)
-/**end repeat**/
-
-#define _NPY_HALF_MIN(a, b) (npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b))
-#define _NPY_HALF_MAX(a, b) (npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b))
-
-/**begin repeat
- * #name = FLOAT, DOUBLE, LONGDOUBLE#
- */
-#define _NPY_@name@_MIN(a, b) (npy_isnan(a) ? (a) : PyArray_MIN(a, b))
-#define _NPY_@name@_MAX(a, b) (npy_isnan(a) ? (a) : PyArray_MAX(a, b))
-/**end repeat**/
-
-/**begin repeat
- * #name = CFLOAT, CDOUBLE, CLONGDOUBLE#
- */
-#define _NPY_@name@_MIN(a, b) (npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CLT(a, b) ? (a) : (b))
-#define _NPY_@name@_MAX(a, b) (npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CGT(a, b) ? (a) : (b))
-/**end repeat**/
-
-/**begin repeat
- * #name = DATETIME, TIMEDELTA#
- */
-#define _NPY_@name@_MIN(a, b) ( \
- (a) == NPY_DATETIME_NAT ? (a) : \
- (b) == NPY_DATETIME_NAT ? (b) : \
- (a) < (b) ? (a) : (b) \
-)
-#define _NPY_@name@_MAX(a, b) ( \
- (a) == NPY_DATETIME_NAT ? (a) : \
- (b) == NPY_DATETIME_NAT ? (b) : \
- (a) > (b) ? (a) : (b) \
-)
-/**end repeat**/
-
-/**begin repeat
- *
- * #name = BOOL,
- * BYTE, UBYTE, SHORT, USHORT, INT, UINT,
- * LONG, ULONG, LONGLONG, ULONGLONG,
- * HALF, FLOAT, DOUBLE, LONGDOUBLE,
- * CFLOAT, CDOUBLE, CLONGDOUBLE,
- * DATETIME, TIMEDELTA#
- * #type = npy_bool,
- * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
- * npy_long, npy_ulong, npy_longlong, npy_ulonglong,
- * npy_half, npy_float, npy_double, npy_longdouble,
- * npy_cfloat, npy_cdouble, npy_clongdouble,
- * npy_datetime, npy_timedelta#
- */
-
-#define _NPY_CLIP(x, min, max) \
- _NPY_@name@_MIN(_NPY_@name@_MAX((x), (min)), (max))
-
-NPY_NO_EXPORT void
-@name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
-{
- if (steps[1] == 0 && steps[2] == 0) {
- /* min and max are constant throughout the loop, the most common case */
- /* NOTE: it may be possible to optimize these checks for nan */
- @type@ min_val = *(@type@ *)args[1];
- @type@ max_val = *(@type@ *)args[2];
-
- char *ip1 = args[0], *op1 = args[3];
- npy_intp is1 = steps[0], os1 = steps[3];
- npy_intp n = dimensions[0];
-
- /* contiguous, branch to let the compiler optimize */
- if (is1 == sizeof(@type@) && os1 == sizeof(@type@)) {
- for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
- *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val);
- }
- }
- else {
- for(npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
- *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, min_val, max_val);
- }
- }
- }
- else {
- TERNARY_LOOP {
- *(@type@ *)op1 = _NPY_CLIP(*(@type@ *)ip1, *(@type@ *)ip2, *(@type@ *)ip3);
- }
- }
- npy_clear_floatstatus_barrier((char*)dimensions);
-}
-
-// clean up the macros we defined above
-#undef _NPY_CLIP
-#undef _NPY_@name@_MAX
-#undef _NPY_@name@_MIN
-
-/**end repeat**/
diff --git a/numpy/core/src/umath/clip.cpp b/numpy/core/src/umath/clip.cpp
new file mode 100644
index 000000000..19d05c848
--- /dev/null
+++ b/numpy/core/src/umath/clip.cpp
@@ -0,0 +1,282 @@
+/**
+ * This module provides the inner loops for the clip ufunc
+ */
+#define _UMATHMODULE
+#define _MULTIARRAYMODULE
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#include "numpy/halffloat.h"
+#include "numpy/ndarraytypes.h"
+#include "numpy/npy_common.h"
+#include "numpy/npy_math.h"
+#include "numpy/utils.h"
+
+#include "fast_loop_macros.h"
+
+#include "../common/numpy_tag.h"
+
+template <class T>
+T
+_NPY_MIN(T a, T b, npy::integral_tag const &)
+{
+ return PyArray_MIN(a, b);
+}
+template <class T>
+T
+_NPY_MAX(T a, T b, npy::integral_tag const &)
+{
+ return PyArray_MAX(a, b);
+}
+
+npy_half
+_NPY_MIN(npy_half a, npy_half b, npy::half_tag const &)
+{
+ return npy_half_isnan(a) || npy_half_le(a, b) ? (a) : (b);
+}
+npy_half
+_NPY_MAX(npy_half a, npy_half b, npy::half_tag const &)
+{
+ return npy_half_isnan(a) || npy_half_ge(a, b) ? (a) : (b);
+}
+
+template <class T>
+T
+_NPY_MIN(T a, T b, npy::floating_point_tag const &)
+{
+ return npy_isnan(a) ? (a) : PyArray_MIN(a, b);
+}
+template <class T>
+T
+_NPY_MAX(T a, T b, npy::floating_point_tag const &)
+{
+ return npy_isnan(a) ? (a) : PyArray_MAX(a, b);
+}
+
+template <class T>
+T
+_NPY_MIN(T a, T b, npy::complex_tag const &)
+{
+ return npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CLT(a, b)
+ ? (a)
+ : (b);
+}
+template <class T>
+T
+_NPY_MAX(T a, T b, npy::complex_tag const &)
+{
+ return npy_isnan((a).real) || npy_isnan((a).imag) || PyArray_CGT(a, b)
+ ? (a)
+ : (b);
+}
+
+template <class T>
+T
+_NPY_MIN(T a, T b, npy::date_tag const &)
+{
+ return (a) == NPY_DATETIME_NAT ? (a)
+ : (b) == NPY_DATETIME_NAT ? (b)
+ : (a) < (b) ? (a)
+ : (b);
+}
+template <class T>
+T
+_NPY_MAX(T a, T b, npy::date_tag const &)
+{
+ return (a) == NPY_DATETIME_NAT ? (a)
+ : (b) == NPY_DATETIME_NAT ? (b)
+ : (a) > (b) ? (a)
+ : (b);
+}
+
+/* generic dispatcher */
+template <class Tag, class T = typename Tag::type>
+T
+_NPY_MIN(T const &a, T const &b)
+{
+ return _NPY_MIN(a, b, Tag{});
+}
+template <class Tag, class T = typename Tag::type>
+T
+_NPY_MAX(T const &a, T const &b)
+{
+ return _NPY_MAX(a, b, Tag{});
+}
+
+template <class Tag, class T>
+T
+_NPY_CLIP(T x, T min, T max)
+{
+ return _NPY_MIN<Tag>(_NPY_MAX<Tag>((x), (min)), (max));
+}
+
+template <class Tag, class T = typename Tag::type>
+static void
+_npy_clip_(T **args, npy_intp const *dimensions, npy_intp const *steps)
+{
+ npy_intp n = dimensions[0];
+ if (steps[1] == 0 && steps[2] == 0) {
+ /* min and max are constant throughout the loop, the most common case
+ */
+ /* NOTE: it may be possible to optimize these checks for nan */
+ T min_val = *args[1];
+ T max_val = *args[2];
+
+ T *ip1 = args[0], *op1 = args[3];
+ npy_intp is1 = steps[0] / sizeof(T), os1 = steps[3] / sizeof(T);
+
+ /* contiguous, branch to let the compiler optimize */
+ if (is1 == 1 && os1 == 1) {
+ for (npy_intp i = 0; i < n; i++, ip1++, op1++) {
+ *op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val);
+ }
+ }
+ else {
+ for (npy_intp i = 0; i < n; i++, ip1 += is1, op1 += os1) {
+ *op1 = _NPY_CLIP<Tag>(*ip1, min_val, max_val);
+ }
+ }
+ }
+ else {
+ T *ip1 = args[0], *ip2 = args[1], *ip3 = args[2], *op1 = args[3];
+ npy_intp is1 = steps[0] / sizeof(T), is2 = steps[1] / sizeof(T),
+ is3 = steps[2] / sizeof(T), os1 = steps[3] / sizeof(T);
+ for (npy_intp i = 0; i < n;
+ i++, ip1 += is1, ip2 += is2, ip3 += is3, op1 += os1)
+ *op1 = _NPY_CLIP<Tag>(*ip1, *ip2, *ip3);
+ }
+ npy_clear_floatstatus_barrier((char *)dimensions);
+}
+
+template <class Tag>
+static void
+_npy_clip(char **args, npy_intp const *dimensions, npy_intp const *steps)
+{
+ using T = typename Tag::type;
+ return _npy_clip_<Tag>((T **)args, dimensions, steps);
+}
+
+extern "C" {
+NPY_NO_EXPORT void
+BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::bool_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::byte_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::ubyte_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::short_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::ushort_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::int_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::uint_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::long_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::ulong_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::longlong_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::ulonglong_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::half_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::float_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::double_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::longdouble_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::cfloat_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::cdouble_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+CLONGDOUBLE_clip(char **args, npy_intp const *dimensions,
+ npy_intp const *steps, void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::clongdouble_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::datetime_tag>(args, dimensions, steps);
+}
+NPY_NO_EXPORT void
+TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func))
+{
+ return _npy_clip<npy::timedelta_tag>(args, dimensions, steps);
+}
+}
diff --git a/numpy/core/src/umath/clip.h b/numpy/core/src/umath/clip.h
new file mode 100644
index 000000000..f69ebd1e3
--- /dev/null
+++ b/numpy/core/src/umath/clip.h
@@ -0,0 +1,73 @@
+#ifndef _NPY_UMATH_CLIP_H_
+#define _NPY_UMATH_CLIP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+NPY_NO_EXPORT void
+BOOL_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+BYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+UBYTE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+SHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+USHORT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+INT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+UINT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+LONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+ULONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+LONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+ULONGLONG_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+HALF_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+FLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+DOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+LONGDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+CFLOAT_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+CDOUBLE_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+CLONGDOUBLE_clip(char **args, npy_intp const *dimensions,
+ npy_intp const *steps, void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+DATETIME_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+NPY_NO_EXPORT void
+TIMEDELTA_clip(char **args, npy_intp const *dimensions, npy_intp const *steps,
+ void *NPY_UNUSED(func));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/numpy/core/src/umath/clip.h.src b/numpy/core/src/umath/clip.h.src
deleted file mode 100644
index f16856cdf..000000000
--- a/numpy/core/src/umath/clip.h.src
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _NPY_UMATH_CLIP_H_
-#define _NPY_UMATH_CLIP_H_
-
-
-/**begin repeat
- *
- * #name = BOOL,
- * BYTE, UBYTE, SHORT, USHORT, INT, UINT,
- * LONG, ULONG, LONGLONG, ULONGLONG,
- * HALF, FLOAT, DOUBLE, LONGDOUBLE,
- * CFLOAT, CDOUBLE, CLONGDOUBLE,
- * DATETIME, TIMEDELTA#
- */
-NPY_NO_EXPORT void
-@name@_clip(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
-/**end repeat**/
-
-#endif
diff --git a/numpy/core/src/umath/dispatching.c b/numpy/core/src/umath/dispatching.c
index 40de28754..8e99c0420 100644
--- a/numpy/core/src/umath/dispatching.c
+++ b/numpy/core/src/umath/dispatching.c
@@ -193,6 +193,10 @@ resolve_implementation_info(PyUFuncObject *ufunc,
/* Unspecified out always matches (see below for inputs) */
continue;
}
+ if (resolver_dtype == (PyArray_DTypeMeta *)Py_None) {
+ /* always matches */
+ continue;
+ }
if (given_dtype == resolver_dtype) {
continue;
}
@@ -267,8 +271,39 @@ resolve_implementation_info(PyUFuncObject *ufunc,
* the subclass should be considered a better match
* (subclasses are always more specific).
*/
+ /* Whether this (normally output) dtype was specified at all */
+ if (op_dtypes[i] == NULL) {
+ /*
+ * When DType is completely unspecified, prefer abstract
+ * over concrete, assuming it will resolve.
+ * Furthermore, we cannot decide which abstract/None
+ * is "better", only concrete ones which are subclasses
+ * of Abstract ones are defined as worse.
+ */
+ npy_bool prev_is_concrete = NPY_FALSE;
+ npy_bool new_is_concrete = NPY_FALSE;
+ if ((prev_dtype != Py_None) &&
+ !NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
+ prev_is_concrete = NPY_TRUE;
+ }
+ if ((new_dtype != Py_None) &&
+ !NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
+ new_is_concrete = NPY_TRUE;
+ }
+ if (prev_is_concrete == new_is_concrete) {
+ best = -1;
+ }
+ else if (prev_is_concrete) {
+ unambiguously_equally_good = 0;
+ best = 1;
+ }
+ else {
+ unambiguously_equally_good = 0;
+ best = 0;
+ }
+ }
/* If either is None, the other is strictly more specific */
- if (prev_dtype == Py_None) {
+ else if (prev_dtype == Py_None) {
unambiguously_equally_good = 0;
best = 1;
}
@@ -289,13 +324,29 @@ resolve_implementation_info(PyUFuncObject *ufunc,
*/
best = -1;
}
+ else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)prev_dtype)) {
+ /* old is not abstract, so better (both not possible) */
+ unambiguously_equally_good = 0;
+ best = 0;
+ }
+ else if (!NPY_DT_is_abstract((PyArray_DTypeMeta *)new_dtype)) {
+ /* new is not abstract, so better (both not possible) */
+ unambiguously_equally_good = 0;
+ best = 1;
+ }
/*
- * TODO: Unreachable, but we will need logic for abstract
- * DTypes to decide if one is a subclass of the other
- * (And their subclass relation is well defined.)
+ * TODO: This will need logic for abstract DTypes to decide if
+ * one is a subclass of the other (And their subclass
+ * relation is well defined). For now, we bail out
+ * in cas someone manages to get here.
*/
else {
- assert(0);
+ PyErr_SetString(PyExc_NotImplementedError,
+ "deciding which one of two abstract dtypes is "
+ "a better match is not yet implemented. This "
+ "will pick the better (or bail) in the future.");
+ *out_info = NULL;
+ return -1;
}
if ((current_best != -1) && (current_best != best)) {
@@ -612,6 +663,35 @@ promote_and_get_info_and_ufuncimpl(PyUFuncObject *ufunc,
}
return info;
}
+ else if (info == NULL && op_dtypes[0] == NULL) {
+ /*
+ * If we have a reduction, fill in the unspecified input/array
+ * assuming it should have the same dtype as the operand input
+ * (or the output one if given).
+ * Then, try again. In some cases, this will choose different
+ * paths, such as `ll->?` instead of an `??->?` loop for `np.equal`
+ * when the input is `.l->.` (`.` meaning undefined). This will
+ * then cause an error. But cast to `?` would always lose
+ * information, and in many cases important information:
+ *
+ * ```python
+ * from operator import eq
+ * from functools import reduce
+ *
+ * reduce(eq, [1, 2, 3]) != reduce(eq, [True, True, True])
+ * ```
+ *
+ * The special cases being `logical_(and|or|xor)` which can always
+ * cast to boolean ahead of time and still give the right answer
+ * (unsafe cast to bool is fine here). We special case these at
+ * the time of this comment (NumPy 1.21).
+ */
+ assert(ufunc->nin == 2 && ufunc->nout == 1);
+ op_dtypes[0] = op_dtypes[2] != NULL ? op_dtypes[2] : op_dtypes[1];
+ Py_INCREF(op_dtypes[0]);
+ return promote_and_get_info_and_ufuncimpl(ufunc,
+ ops, signature, op_dtypes, allow_legacy_promotion, 1);
+ }
}
/*
@@ -743,3 +823,94 @@ promote_and_get_ufuncimpl(PyUFuncObject *ufunc,
return method;
}
+
+
+/*
+ * Special promoter for the logical ufuncs. The logical ufuncs can always
+ * use the ??->? and still get the correct output (as long as the output
+ * is not supposed to be `object`).
+ */
+static int
+logical_ufunc_promoter(PyUFuncObject *NPY_UNUSED(ufunc),
+ PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
+ PyArray_DTypeMeta *new_op_dtypes[])
+{
+ /*
+ * If we find any object DType at all, we currently force to object.
+ * However, if the output is specified and not object, there is no point,
+ * it should be just as well to cast the input rather than doing the
+ * unsafe out cast.
+ */
+ int force_object = 0;
+
+ for (int i = 0; i < 3; i++) {
+ PyArray_DTypeMeta *item;
+ if (signature[i] != NULL) {
+ item = signature[i];
+ Py_INCREF(item);
+ if (item->type_num == NPY_OBJECT) {
+ force_object = 1;
+ }
+ }
+ else {
+ /* Always override to boolean */
+ item = PyArray_DTypeFromTypeNum(NPY_BOOL);
+ if (op_dtypes[i] != NULL && op_dtypes[i]->type_num == NPY_OBJECT) {
+ force_object = 1;
+ }
+ }
+ new_op_dtypes[i] = item;
+ }
+
+ if (!force_object || (op_dtypes[2] != NULL
+ && op_dtypes[2]->type_num != NPY_OBJECT)) {
+ return 0;
+ }
+ /*
+ * Actually, we have to use the OBJECT loop after all, set all we can
+ * to object (that might not work out, but try).
+ *
+ * NOTE: Change this to check for `op_dtypes[0] == NULL` to STOP
+ * returning `object` for `np.logical_and.reduce(obj_arr)`
+ * which will also affect `np.all` and `np.any`!
+ */
+ for (int i = 0; i < 3; i++) {
+ if (signature[i] != NULL) {
+ continue;
+ }
+ Py_SETREF(new_op_dtypes[i], PyArray_DTypeFromTypeNum(NPY_OBJECT));
+ }
+ return 0;
+}
+
+
+NPY_NO_EXPORT int
+install_logical_ufunc_promoter(PyObject *ufunc)
+{
+ if (PyObject_Type(ufunc) != (PyObject *)&PyUFunc_Type) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "internal numpy array, logical ufunc was not a ufunc?!");
+ return -1;
+ }
+ PyObject *dtype_tuple = PyTuple_Pack(3,
+ &PyArrayDescr_Type, &PyArrayDescr_Type, &PyArrayDescr_Type, NULL);
+ if (dtype_tuple == NULL) {
+ return -1;
+ }
+ PyObject *promoter = PyCapsule_New(&logical_ufunc_promoter,
+ "numpy._ufunc_promoter", NULL);
+ if (promoter == NULL) {
+ Py_DECREF(dtype_tuple);
+ return -1;
+ }
+
+ PyObject *info = PyTuple_Pack(2, dtype_tuple, promoter);
+ Py_DECREF(dtype_tuple);
+ Py_DECREF(promoter);
+ if (info == NULL) {
+ return -1;
+ }
+
+ return PyUFunc_AddLoop((PyUFuncObject *)ufunc, info, 0);
+}
+
diff --git a/numpy/core/src/umath/dispatching.h b/numpy/core/src/umath/dispatching.h
index 8d116873c..2f314615d 100644
--- a/numpy/core/src/umath/dispatching.h
+++ b/numpy/core/src/umath/dispatching.h
@@ -26,4 +26,8 @@ NPY_NO_EXPORT PyObject *
add_and_return_legacy_wrapping_ufunc_loop(PyUFuncObject *ufunc,
PyArray_DTypeMeta *operation_dtypes[], int ignore_duplicate);
+NPY_NO_EXPORT int
+install_logical_ufunc_promoter(PyObject *ufunc);
+
+
#endif /*_NPY_DISPATCHING_H */
diff --git a/numpy/core/src/umath/legacy_array_method.c b/numpy/core/src/umath/legacy_array_method.c
index 77b1b9013..a423823d4 100644
--- a/numpy/core/src/umath/legacy_array_method.c
+++ b/numpy/core/src/umath/legacy_array_method.c
@@ -217,6 +217,25 @@ PyArray_NewLegacyWrappingArrayMethod(PyUFuncObject *ufunc,
*/
int any_output_flexible = 0;
NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufunc->nargs == 3 &&
+ signature[0]->type_num == NPY_BOOL &&
+ signature[1]->type_num == NPY_BOOL &&
+ signature[2]->type_num == NPY_BOOL && (
+ strcmp(ufunc->name, "logical_or") == 0 ||
+ strcmp(ufunc->name, "logical_and") == 0 ||
+ strcmp(ufunc->name, "logical_xor") == 0)) {
+ /*
+ * This is a logical ufunc, and the `??->?` loop`. It is always OK
+ * to cast any input to bool, because that cast is defined by
+ * truthiness.
+ * This allows to ensure two things:
+ * 1. `np.all`/`np.any` know that force casting the input is OK
+ * (they must do this since there are no `?l->?`, etc. loops)
+ * 2. The logical functions automatically work for any DType
+ * implementing a cast to boolean.
+ */
+ flags = _NPY_METH_FORCE_CAST_INPUTS;
+ }
for (int i = 0; i < ufunc->nin+ufunc->nout; i++) {
if (signature[i]->singleton->flags & (
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index fa7844014..6076e0b2d 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1506,8 +1506,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const *
*/
/**begin repeat
- * #func = rint, ceil, floor, trunc#
- * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ * #func = rint, floor, trunc#
+ * #scalarf = npy_rint, npy_floor, npy_trunc#
*/
/**begin repeat1
@@ -1542,8 +1542,8 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
*/
/**begin repeat2
- * #func = rint, ceil, floor, trunc#
- * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ * #func = rint, floor, trunc#
+ * #scalarf = npy_rint, npy_floor, npy_trunc#
*/
NPY_NO_EXPORT NPY_GCC_OPT_3 void
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index 90115006f..3eafbdf66 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -186,7 +186,7 @@ NPY_NO_EXPORT void
* #TYPE = FLOAT, DOUBLE#
*/
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)))
@@ -227,7 +227,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@,
/**end repeat**/
/**begin repeat
- * #func = sin, cos#
+ * #func = sin, cos#
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@,
@@ -274,7 +274,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
/**end repeat**/
/**begin repeat
- * #func = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
*/
/**begin repeat1
diff --git a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
index 8c2c83e7c..cd9b2ed54 100644
--- a/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
+++ b/numpy/core/src/umath/loops_trigonometric.dispatch.c.src
@@ -209,7 +209,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(FLOAT_@func@)
const npy_intp ssrc = steps[0] / lsize;
const npy_intp sdst = steps[1] / lsize;
npy_intp len = dimensions[0];
- assert(steps[0] % lsize == 0 && steps[1] % lsize == 0);
+ assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
#if NPY_SIMD_FMA3
if (is_mem_overlap(src, steps[0], dst, steps[1], len) ||
!npyv_loadable_stride_f32(ssrc) || !npyv_storable_stride_f32(sdst)
diff --git a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
index 852604655..a8289fc51 100644
--- a/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_umath_fp.dispatch.c.src
@@ -96,7 +96,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@func@)
const npy_intp ssrc = steps[0] / lsize;
const npy_intp sdst = steps[1] / lsize;
const npy_intp len = dimensions[0];
- assert(steps[0] % lsize == 0 && steps[1] % lsize == 0);
+ assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
if (!is_mem_overlap(src, steps[0], dst, steps[1], len) &&
npyv_loadable_stride_@sfx@(ssrc) &&
npyv_storable_stride_@sfx@(sdst)) {
@@ -125,7 +125,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(DOUBLE_@func@)
const npy_intp ssrc = steps[0] / lsize;
const npy_intp sdst = steps[1] / lsize;
const npy_intp len = dimensions[0];
- assert(steps[0] % lsize == 0 && steps[1] % lsize == 0);
+ assert(len <= 1 || (steps[0] % lsize == 0 && steps[1] % lsize == 0));
if (!is_mem_overlap(src, steps[0], dst, steps[1], len) &&
npyv_loadable_stride_f64(ssrc) &&
npyv_storable_stride_f64(sdst)) {
diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
index 2d5917282..93761b98c 100644
--- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
@@ -1,6 +1,8 @@
/*@targets
** $maxopt baseline
- ** sse2 vsx2 neon
+ ** sse2 sse41
+ ** vsx2
+ ** neon asimd
**/
/**
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
@@ -65,6 +67,9 @@ NPY_FINLINE double c_square_f64(double a)
#define c_sqrt_f64 npy_sqrt
#endif
+#define c_ceil_f32 npy_ceilf
+#define c_ceil_f64 npy_ceil
+
/********************************************************************************
** Defining the SIMD kernels
********************************************************************************/
@@ -134,10 +139,10 @@ NPY_FINLINE double c_square_f64(double a)
*/
#if @VCHK@
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
- * #intr = sqrt, abs, square, recip#
- * #repl_0w1 = 0, 0, 0, 1#
- * #RECIP_WORKAROUND = 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
+ * #intr = ceil, sqrt, abs, square, recip#
+ * #repl_0w1 = 0, 0, 0, 0, 1#
+ * #RECIP_WORKAROUND = 0, 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG#
*/
/**begin repeat2
* #STYPE = CONTIG, NCONTIG, CONTIG, NCONTIG#
@@ -245,9 +250,9 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@
* #VCHK = NPY_SIMD, NPY_SIMD_F64#
*/
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
- * #intr = sqrt, abs, square, recip#
- * #clear = 0, 1, 0, 0#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
+ * #intr = ceil, sqrt, abs, square, recip#
+ * #clear = 0, 0, 1, 0, 0#
*/
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
@@ -258,7 +263,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@)
npy_intp len = dimensions[0];
#if @VCHK@
const int lsize = sizeof(npyv_lanetype_@sfx@);
- assert(src_step % lsize == 0 && dst_step % lsize == 0);
+ assert(len <= 1 || (src_step % lsize == 0 && dst_step % lsize == 0));
if (is_mem_overlap(src, src_step, dst, dst_step, len)) {
goto no_unroll;
}
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index d5a251368..c28c8abd8 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -145,14 +145,12 @@ PyArray_CopyInitialReduceValues(
* boilerplate code, just calling the appropriate inner loop function where
* necessary.
*
+ * context : The ArrayMethod context (with ufunc, method, and descriptors).
* operand : The array to be reduced.
* out : NULL, or the array into which to place the result.
* wheremask : NOT YET SUPPORTED, but this parameter is placed here
* so that support can be added in the future without breaking
* API compatibility. Pass in NULL.
- * operand_dtype : The dtype the inner loop expects for the operand.
- * result_dtype : The dtype the inner loop expects for the result.
- * casting : The casting rule to apply to the operands.
* axis_flags : Flags indicating the reduction axes of 'operand'.
* reorderable : If True, the reduction being done is reorderable, which
* means specifying multiple axes of reduction at once is ok,
@@ -182,10 +180,8 @@ PyArray_CopyInitialReduceValues(
* generalized ufuncs!)
*/
NPY_NO_EXPORT PyArrayObject *
-PyUFunc_ReduceWrapper(
+PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
- PyArray_Descr *operand_dtype, PyArray_Descr *result_dtype,
- NPY_CASTING casting,
npy_bool *axis_flags, int reorderable, int keepdims,
PyObject *identity, PyArray_ReduceLoopFunc *loop,
void *data, npy_intp buffersize, const char *funcname, int errormask)
@@ -199,6 +195,8 @@ PyUFunc_ReduceWrapper(
PyArrayObject *op[3];
PyArray_Descr *op_dtypes[3];
npy_uint32 it_flags, op_flags[3];
+ /* Loop auxdata (must be freed on error) */
+ NpyAuxData *auxdata = NULL;
/* More than one axis means multiple orders are possible */
if (!reorderable && count_axes(PyArray_NDIM(operand), axis_flags) > 1) {
@@ -221,8 +219,8 @@ PyUFunc_ReduceWrapper(
/* Set up the iterator */
op[0] = out;
op[1] = operand;
- op_dtypes[0] = result_dtype;
- op_dtypes[1] = operand_dtype;
+ op_dtypes[0] = context->descriptors[0];
+ op_dtypes[1] = context->descriptors[1];
it_flags = NPY_ITER_BUFFERED |
NPY_ITER_EXTERNAL_LOOP |
@@ -291,7 +289,7 @@ PyUFunc_ReduceWrapper(
}
iter = NpyIter_AdvancedNew(wheremask == NULL ? 2 : 3, op, it_flags,
- NPY_KEEPORDER, casting,
+ NPY_KEEPORDER, NPY_UNSAFE_CASTING,
op_flags,
op_dtypes,
PyArray_NDIM(operand), op_axes, NULL, buffersize);
@@ -301,9 +299,29 @@ PyUFunc_ReduceWrapper(
result = NpyIter_GetOperandArray(iter)[0];
- int needs_api = NpyIter_IterationNeedsAPI(iter);
- /* Start with the floating-point exception flags cleared */
- npy_clear_floatstatus_barrier((char*)&iter);
+ PyArrayMethod_StridedLoop *strided_loop;
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ npy_intp fixed_strides[3];
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ if (wheremask != NULL) {
+ if (PyArrayMethod_GetMaskedStridedLoop(context,
+ 1, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ }
+ else {
+ if (context->method->get_strided_loop(context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ }
+
+ int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
/*
* Initialize the result to the reduction unit if possible,
@@ -345,16 +363,18 @@ PyUFunc_ReduceWrapper(
strideptr = NpyIter_GetInnerStrideArray(iter);
countptr = NpyIter_GetInnerLoopSizePtr(iter);
- if (loop(iter, dataptr, strideptr, countptr,
- iternext, needs_api, skip_first_count, data) < 0) {
+ if (loop(context, strided_loop, auxdata,
+ iter, dataptr, strideptr, countptr, iternext,
+ needs_api, skip_first_count) < 0) {
goto fail;
}
}
- /* Check whether any errors occurred during the loop */
- if (PyErr_Occurred() ||
- _check_ufunc_fperr(errormask, NULL, "reduce") < 0) {
- goto fail;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even on error */
+ if (_check_ufunc_fperr(errormask, NULL, "reduce") < 0) {
+ goto fail;
+ }
}
if (out != NULL) {
@@ -369,6 +389,7 @@ PyUFunc_ReduceWrapper(
return result;
fail:
+ NPY_AUXDATA_FREE(auxdata);
if (iter != NULL) {
NpyIter_Deallocate(iter);
}
diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h
index 372605dba..2170e27a7 100644
--- a/numpy/core/src/umath/reduction.h
+++ b/numpy/core/src/umath/reduction.h
@@ -19,93 +19,17 @@ typedef int (PyArray_AssignReduceIdentityFunc)(PyArrayObject *result,
void *data);
/*
- * This is a function for the reduce loop.
+ * Inner definition of the reduce loop, only used for a static function.
+ * At some point around NumPy 1.6, there was probably an intention to make
+ * the reduce loop customizable at this level (per ufunc?).
*
- * The needs_api parameter indicates whether it's ok to release the GIL during
- * the loop, such as when the iternext() function never calls
- * a function which could raise a Python exception.
- *
- * The skip_first_count parameter indicates how many elements need to be
- * skipped based on NpyIter_IsFirstVisit checks. This can only be positive
- * when the 'assign_identity' parameter was NULL when calling
- * PyArray_ReduceWrapper.
- *
- * The loop gets two data pointers and two strides, and should
- * look roughly like this:
- * {
- * NPY_BEGIN_THREADS_DEF;
- * if (!needs_api) {
- * NPY_BEGIN_THREADS;
- * }
- * // This first-visit loop can be skipped if 'assign_identity' was non-NULL
- * if (skip_first_count > 0) {
- * do {
- * char *data0 = dataptr[0], *data1 = dataptr[1];
- * npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
- * npy_intp count = *countptr;
- *
- * // Skip any first-visit elements
- * if (NpyIter_IsFirstVisit(iter, 0)) {
- * if (stride0 == 0) {
- * --count;
- * --skip_first_count;
- * data1 += stride1;
- * }
- * else {
- * skip_first_count -= count;
- * count = 0;
- * }
- * }
- *
- * while (count--) {
- * *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
- * *(operand_t *)data1);
- * data0 += stride0;
- * data1 += stride1;
- * }
- *
- * // Jump to the faster loop when skipping is done
- * if (skip_first_count == 0) {
- * if (iternext(iter)) {
- * break;
- * }
- * else {
- * goto finish_loop;
- * }
- * }
- * } while (iternext(iter));
- * }
- * do {
- * char *data0 = dataptr[0], *data1 = dataptr[1];
- * npy_intp stride0 = strideptr[0], stride1 = strideptr[1];
- * npy_intp count = *countptr;
- *
- * while (count--) {
- * *(result_t *)data0 = my_reduce_op(*(result_t *)data0,
- * *(operand_t *)data1);
- * data0 += stride0;
- * data1 += stride1;
- * }
- * } while (iternext(iter));
- * finish_loop:
- * if (!needs_api) {
- * NPY_END_THREADS;
- * }
- * return (needs_api && PyErr_Occurred()) ? -1 : 0;
- * }
- *
- * If needs_api is True, this function should call PyErr_Occurred()
- * to check if an error occurred during processing, and return -1 for
- * error, 0 for success.
+ * TODO: This should be refactored/removed.
*/
-typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
- char **dataptr,
- npy_intp const *strideptr,
- npy_intp const *countptr,
- NpyIter_IterNextFunc *iternext,
- int needs_api,
- npy_intp skip_first_count,
- void *data);
+typedef int (PyArray_ReduceLoopFunc)(PyArrayMethod_Context *context,
+ PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata,
+ NpyIter *iter, char **dataptrs, npy_intp const *strides,
+ npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
+ int needs_api, npy_intp skip_first_count);
/*
* This function executes all the standard NumPy reduction function
@@ -138,16 +62,10 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter,
* errormask : forwarded from _get_bufsize_errmask
*/
NPY_NO_EXPORT PyArrayObject *
-PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out,
- PyArrayObject *wheremask,
- PyArray_Descr *operand_dtype,
- PyArray_Descr *result_dtype,
- NPY_CASTING casting,
- npy_bool *axis_flags, int reorderable,
- int keepdims,
- PyObject *identity,
- PyArray_ReduceLoopFunc *loop,
- void *data, npy_intp buffersize, const char *funcname,
- int errormask);
+PyUFunc_ReduceWrapper(PyArrayMethod_Context *context,
+ PyArrayObject *operand, PyArrayObject *out, PyArrayObject *wheremask,
+ npy_bool *axis_flags, int reorderable, int keepdims,
+ PyObject *identity, PyArray_ReduceLoopFunc *loop,
+ void *data, npy_intp buffersize, const char *funcname, int errormask);
#endif
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index d47be9a30..0e2c1ab8b 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -169,7 +169,7 @@ run_@func@_avx512_skx_@TYPE@(char **args, npy_intp const *dimensions, npy_intp c
*/
/**begin repeat2
- * #func = rint, floor, ceil, trunc#
+ * #func = rint, floor, trunc#
*/
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -850,12 +850,6 @@ fma_floor_@vsub@(@vtype@ x)
}
NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
-fma_ceil_@vsub@(@vtype@ x)
-{
- return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF);
-}
-
-NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
fma_trunc_@vsub@(@vtype@ x)
{
return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO);
@@ -988,12 +982,6 @@ avx512_floor_@vsub@(@vtype@ x)
}
NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
-avx512_ceil_@vsub@(@vtype@ x)
-{
- return _mm512_roundscale_@vsub@(x, 0x0A);
-}
-
-NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
avx512_trunc_@vsub@(@vtype@ x)
{
return _mm512_roundscale_@vsub@(x, 0x0B);
@@ -1327,8 +1315,8 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s
*/
/**begin repeat1
- * #func = rint, ceil, floor, trunc#
- * #vectorf = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
+ * #vectorf = rint, floor, trunc#
*/
#if defined @CHK@
@@ -1398,8 +1386,8 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
*/
/**begin repeat1
- * #func = rint, ceil, floor, trunc#
- * #vectorf = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
+ * #vectorf = rint, floor, trunc#
*/
#if defined @CHK@
diff --git a/numpy/core/src/umath/svml b/numpy/core/src/umath/svml
-Subproject 9f8af767ed6c75455d9a382af829048f8dd1806
+Subproject 1c5260a61e7dce6be48073dfa96291edb0a11d7
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 42290e8c9..186f18a62 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -616,9 +616,24 @@ _is_same_name(const char* s1, const char* s2)
}
/*
- * Sets core_num_dim_ix, core_num_dims, core_dim_ixs, core_offsets,
- * and core_signature in PyUFuncObject "ufunc". Returns 0 unless an
- * error occurred.
+ * Sets the following fields in the PyUFuncObject 'ufunc':
+ *
+ * Field Type Array Length
+ * core_enabled int (effectively bool) N/A
+ * core_num_dim_ix int N/A
+ * core_dim_flags npy_uint32 * core_num_dim_ix
+ * core_dim_sizes npy_intp * core_num_dim_ix
+ * core_num_dims int * nargs (i.e. nin+nout)
+ * core_offsets int * nargs
+ * core_dim_ixs int * sum(core_num_dims)
+ * core_signature char * strlen(signature) + 1
+ *
+ * The function assumes that the values that are arrays have not
+ * been set already, and sets these pointers to memory allocated
+ * with PyArray_malloc. These are freed when the ufunc dealloc
+ * method is called.
+ *
+ * Returns 0 unless an error occurred.
*/
static int
_parse_signature(PyUFuncObject *ufunc, const char *signature)
@@ -990,6 +1005,7 @@ convert_ufunc_arguments(PyUFuncObject *ufunc,
}
/* Convert and fill in output arguments */
+ memset(out_op_DTypes + nin, 0, nout * sizeof(*out_op_DTypes));
if (full_args.out != NULL) {
for (int i = 0; i < nout; i++) {
obj = PyTuple_GET_ITEM(full_args.out, i);
@@ -1047,6 +1063,7 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
PyArrayObject **op, PyArray_Descr **dtypes,
NPY_CASTING casting, npy_intp buffersize)
{
+ int force_cast_input = ufuncimpl->flags & _NPY_METH_FORCE_CAST_INPUTS;
int i, nin = ufuncimpl->nin, nop = nin + ufuncimpl->nout;
for (i = 0; i < nop; ++i) {
@@ -1070,7 +1087,13 @@ check_for_trivial_loop(PyArrayMethodObject *ufuncimpl,
must_copy = 1;
}
- if (PyArray_MinCastSafety(safety, casting) != casting) {
+ if (force_cast_input && i < nin) {
+ /*
+ * ArrayMethod flagged to ignore casting (logical funcs
+ * can force cast to bool)
+ */
+ }
+ else if (PyArray_MinCastSafety(safety, casting) != casting) {
return 0; /* the cast is not safe enough */
}
}
@@ -1360,8 +1383,15 @@ validate_casting(PyArrayMethodObject *method, PyUFuncObject *ufunc,
*/
return 0;
}
- if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) {
- return -1;
+ if (method->flags & _NPY_METH_FORCE_CAST_INPUTS) {
+ if (PyUFunc_ValidateOutCasting(ufunc, casting, ops, descriptors) < 0) {
+ return -1;
+ }
+ }
+ else {
+ if (PyUFunc_ValidateCasting(ufunc, casting, ops, descriptors) < 0) {
+ return -1;
+ }
}
return 0;
}
@@ -2470,9 +2500,9 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
/* Final preparation of the arraymethod call */
PyArrayMethod_Context context = {
- .caller = (PyObject *)ufunc,
- .method = ufuncimpl,
- .descriptors = operation_descrs,
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
};
PyArrayMethod_StridedLoop *strided_loop;
NPY_ARRAYMETHOD_FLAGS flags = 0;
@@ -2527,7 +2557,7 @@ PyUFunc_GeneralizedFunctionInternal(PyUFuncObject *ufunc,
PyArray_free(inner_strides);
NPY_AUXDATA_FREE(auxdata);
- if (NpyIter_Deallocate(iter) < 0) {
+ if (!NpyIter_Deallocate(iter)) {
retval = -1;
}
@@ -2592,9 +2622,9 @@ PyUFunc_GenericFunctionInternal(PyUFuncObject *ufunc,
/* Final preparation of the arraymethod call */
PyArrayMethod_Context context = {
- .caller = (PyObject *)ufunc,
- .method = ufuncimpl,
- .descriptors = operation_descrs,
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
};
/* Do the ufunc loop */
@@ -2661,195 +2691,129 @@ PyUFunc_GenericFunction(PyUFuncObject *NPY_UNUSED(ufunc),
/*
- * Given the output type, finds the specified binary op. The
- * ufunc must have nin==2 and nout==1. The function may modify
- * otype if the given type isn't found.
+ * Promote and resolve a reduction like operation.
*
- * Returns 0 on success, -1 on failure.
+ * @param ufunc
+ * @param arr The operation array
+ * @param out The output array or NULL if not provided. Note that NumPy always
+ * used out to mean the same as `dtype=out.dtype` and never passed
+ * the array itself to the type-resolution.
+ * @param signature The DType signature, which may already be set due to the
+ * dtype passed in by the user, or the special cases (add, multiply).
+ * (Contains strong references and may be modified.)
+ * @param enforce_uniform_args If `NPY_TRUE` fully uniform dtypes/descriptors
+ * are enforced as required for accumulate and (currently) reduceat.
+ * @param out_descrs New references to the resolved descriptors (on success).
+ * @param method The ufunc method, "reduce", "reduceat", or "accumulate".
+
+ * @returns ufuncimpl The `ArrayMethod` implemention to use. Or NULL if an
+ * error occurred.
*/
-static int
-get_binary_op_function(PyUFuncObject *ufunc, int *otype,
- PyUFuncGenericFunction *out_innerloop,
- void **out_innerloopdata)
+static PyArrayMethodObject *
+reducelike_promote_and_resolve(PyUFuncObject *ufunc,
+ PyArrayObject *arr, PyArrayObject *out,
+ PyArray_DTypeMeta *signature[3],
+ npy_bool enforce_uniform_args, PyArray_Descr *out_descrs[3],
+ char *method)
{
- int i;
-
- NPY_UF_DBG_PRINT1("Getting binary op function for type number %d\n",
- *otype);
-
- /* If the type is custom and there are userloops, search for it here */
- if (ufunc->userloops != NULL && PyTypeNum_ISUSERDEF(*otype)) {
- PyObject *key, *obj;
- key = PyLong_FromLong(*otype);
- if (key == NULL) {
- return -1;
- }
- obj = PyDict_GetItemWithError(ufunc->userloops, key);
- Py_DECREF(key);
- if (obj == NULL && PyErr_Occurred()) {
- return -1;
- }
- else if (obj != NULL) {
- PyUFunc_Loop1d *funcdata = PyCapsule_GetPointer(obj, NULL);
- if (funcdata == NULL) {
- return -1;
- }
- while (funcdata != NULL) {
- int *types = funcdata->arg_types;
-
- if (types[0] == *otype && types[1] == *otype &&
- types[2] == *otype) {
- *out_innerloop = funcdata->func;
- *out_innerloopdata = funcdata->data;
- return 0;
- }
+ /*
+ * Note that the `ops` is not realy correct. But legacy resolution
+ * cannot quite handle the correct ops (e.g. a NULL first item if `out`
+ * is NULL), and it should only matter in very strange cases.
+ */
+ PyArrayObject *ops[3] = {arr, arr, NULL};
+ /*
+ * TODO: If `out` is not provided, arguably `initial` could define
+ * the first DType (and maybe also the out one), that way
+ * `np.add.reduce([1, 2, 3], initial=3.4)` would return a float
+ * value. As of 1.20, it returned an integer, so that should
+ * probably go to an error/warning first.
+ */
+ PyArray_DTypeMeta *operation_DTypes[3] = {
+ NULL, NPY_DTYPE(PyArray_DESCR(arr)), NULL};
+ Py_INCREF(operation_DTypes[1]);
- funcdata = funcdata->next;
- }
- }
+ if (out != NULL) {
+ operation_DTypes[0] = NPY_DTYPE(PyArray_DESCR(out));
+ Py_INCREF(operation_DTypes[0]);
+ operation_DTypes[2] = operation_DTypes[0];
+ Py_INCREF(operation_DTypes[2]);
}
- /* Search for a function with compatible inputs */
- for (i = 0; i < ufunc->ntypes; ++i) {
- char *types = ufunc->types + i*ufunc->nargs;
-
- NPY_UF_DBG_PRINT3("Trying loop with signature %d %d -> %d\n",
- types[0], types[1], types[2]);
-
- if (PyArray_CanCastSafely(*otype, types[0]) &&
- types[0] == types[1] &&
- (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
- /* If the signature is "xx->x", we found the loop */
- if (types[2] == types[0]) {
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- *otype = types[0];
- return 0;
- }
- /*
- * Otherwise, we found the natural type of the reduction,
- * replace otype and search again
- */
- else {
- *otype = types[2];
- break;
- }
- }
+ PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
+ ops, signature, operation_DTypes, NPY_FALSE, NPY_TRUE);
+ Py_DECREF(operation_DTypes[1]);
+ if (out != NULL) {
+ Py_DECREF(operation_DTypes[0]);
+ Py_DECREF(operation_DTypes[2]);
}
-
- /* Search for the exact function */
- for (i = 0; i < ufunc->ntypes; ++i) {
- char *types = ufunc->types + i*ufunc->nargs;
-
- if (PyArray_CanCastSafely(*otype, types[0]) &&
- types[0] == types[1] &&
- types[1] == types[2] &&
- (*otype == NPY_OBJECT || types[0] != NPY_OBJECT)) {
- /* Since the signature is "xx->x", we found the loop */
- *out_innerloop = ufunc->functions[i];
- *out_innerloopdata = ufunc->data[i];
- *otype = types[0];
- return 0;
- }
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- return -1;
-}
-
-static int
-reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr,
- PyArray_Descr *odtype, PyArray_Descr **out_dtype)
-{
- int i, retcode;
- PyArrayObject *op[3] = {arr, arr, NULL};
- PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
- const char *ufunc_name = ufunc_get_name_cstr(ufunc);
- PyObject *type_tup = NULL;
-
- *out_dtype = NULL;
-
/*
- * If odtype is specified, make a type tuple for the type
- * resolution.
+ * Find the correct descriptors for the operation. We use unsafe casting
+ * for historic reasons: The logic ufuncs required it to cast everything to
+ * boolean. However, we now special case the logical ufuncs, so that the
+ * casting safety could in principle be set to the default same-kind.
+ * (although this should possibly happen through a deprecation)
*/
- if (odtype != NULL) {
- type_tup = PyTuple_Pack(3, odtype, odtype, Py_None);
- if (type_tup == NULL) {
- return -1;
- }
- }
-
- /* Use the type resolution function to find our loop */
- retcode = ufunc->type_resolver(
- ufunc, NPY_UNSAFE_CASTING,
- op, type_tup, dtypes);
- Py_DECREF(type_tup);
- if (retcode == -1) {
- return -1;
- }
- else if (retcode == -2) {
- PyErr_Format(PyExc_RuntimeError,
- "type resolution returned NotImplemented to "
- "reduce ufunc %s", ufunc_name);
- return -1;
+ if (resolve_descriptors(3, ufunc, ufuncimpl,
+ ops, out_descrs, signature, NPY_UNSAFE_CASTING) < 0) {
+ return NULL;
}
/*
- * The first two type should be equivalent. Because of how
- * reduce has historically behaved in NumPy, the return type
- * could be different, and it is the return type on which the
- * reduction occurs.
+ * The first operand and output should be the same array, so they should
+ * be identical. The second argument can be different for reductions,
+ * but is checked to be identical for accumulate and reduceat.
*/
- if (!PyArray_EquivTypes(dtypes[0], dtypes[1])) {
- for (i = 0; i < 3; ++i) {
- Py_DECREF(dtypes[i]);
- }
- PyErr_Format(PyExc_RuntimeError,
- "could not find a type resolution appropriate for "
- "reduce ufunc %s", ufunc_name);
- return -1;
+ if (out_descrs[0] != out_descrs[2] || (
+ enforce_uniform_args && out_descrs[0] != out_descrs[1])) {
+ PyErr_Format(PyExc_TypeError,
+ "the resolved dtypes are not compatible with %s.%s",
+ ufunc_get_name_cstr(ufunc), method);
+ goto fail;
+ }
+ /* TODO: This really should _not_ be unsafe casting (same above)! */
+ if (validate_casting(ufuncimpl,
+ ufunc, ops, out_descrs, NPY_UNSAFE_CASTING) < 0) {
+ goto fail;
}
- Py_DECREF(dtypes[0]);
- Py_DECREF(dtypes[1]);
- *out_dtype = dtypes[2];
+ return ufuncimpl;
- return 0;
+ fail:
+ for (int i = 0; i < 3; ++i) {
+ Py_DECREF(out_descrs[i]);
+ }
+ return NULL;
}
+
static int
-reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
- npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
- int needs_api, npy_intp skip_first_count, void *data)
+reduce_loop(PyArrayMethod_Context *context,
+ PyArrayMethod_StridedLoop *strided_loop, NpyAuxData *auxdata,
+ NpyIter *iter, char **dataptrs, npy_intp const *strides,
+ npy_intp const *countptr, NpyIter_IterNextFunc *iternext,
+ int needs_api, npy_intp skip_first_count)
{
- PyArray_Descr *dtypes[3], **iter_dtypes;
- PyUFuncObject *ufunc = (PyUFuncObject *)data;
- char *dataptrs_copy[3];
- npy_intp strides_copy[3];
+ int retval;
+ char *dataptrs_copy[4];
+ npy_intp strides_copy[4];
npy_bool masked;
- /* The normal selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
-
NPY_BEGIN_THREADS_DEF;
/* Get the number of operands, to determine whether "where" is used */
masked = (NpyIter_GetNOp(iter) == 3);
- /* Get the inner loop */
- iter_dtypes = NpyIter_GetDescrArray(iter);
- dtypes[0] = iter_dtypes[0];
- dtypes[1] = iter_dtypes[1];
- dtypes[2] = iter_dtypes[0];
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
- return -1;
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
}
- NPY_BEGIN_THREADS_NDITER(iter);
-
if (skip_first_count > 0) {
- do {
+ assert(!masked); /* Path currently not available for masked */
+ while (1) {
npy_intp count = *countptr;
/* Skip any first-visit elements */
@@ -2872,27 +2836,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
strides_copy[0] = strides[0];
strides_copy[1] = strides[1];
strides_copy[2] = strides[0];
- innerloop(dataptrs_copy, &count,
- strides_copy, innerloopdata);
- if (needs_api && PyErr_Occurred()) {
+ retval = strided_loop(context,
+ dataptrs_copy, &count, strides_copy, auxdata);
+ if (retval < 0) {
goto finish_loop;
}
- /* Jump to the faster loop when skipping is done */
- if (skip_first_count == 0) {
- if (iternext(iter)) {
- break;
- }
- else {
- goto finish_loop;
- }
+ /* Advance loop, and abort on error (or finish) */
+ if (!iternext(iter)) {
+ goto finish_loop;
}
- } while (iternext(iter));
- }
- if (needs_api && PyErr_Occurred()) {
- goto finish_loop;
+ /* When skipping is done break and continue with faster loop */
+ if (skip_first_count == 0) {
+ break;
+ }
+ }
}
do {
@@ -2903,42 +2863,23 @@ reduce_loop(NpyIter *iter, char **dataptrs, npy_intp const *strides,
strides_copy[0] = strides[0];
strides_copy[1] = strides[1];
strides_copy[2] = strides[0];
-
- if (!masked) {
- innerloop(dataptrs_copy, countptr,
- strides_copy, innerloopdata);
+ if (masked) {
+ dataptrs_copy[3] = dataptrs[2];
+ strides_copy[3] = strides[2];
}
- else {
- npy_intp count = *countptr;
- char *maskptr = dataptrs[2];
- npy_intp mask_stride = strides[2];
- /* Optimization for when the mask is broadcast */
- npy_intp n = mask_stride == 0 ? count : 1;
- while (count) {
- char mask = *maskptr;
- maskptr += mask_stride;
- while (n < count && mask == *maskptr) {
- n++;
- maskptr += mask_stride;
- }
- /* If mask set, apply inner loop on this contiguous region */
- if (mask) {
- innerloop(dataptrs_copy, &n,
- strides_copy, innerloopdata);
- }
- dataptrs_copy[0] += n * strides[0];
- dataptrs_copy[1] += n * strides[1];
- dataptrs_copy[2] = dataptrs_copy[0];
- count -= n;
- n = 1;
- }
+
+ retval = strided_loop(context,
+ dataptrs_copy, countptr, strides_copy, auxdata);
+ if (retval < 0) {
+ goto finish_loop;
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+
+ } while (iternext(iter));
finish_loop:
NPY_END_THREADS;
- return (needs_api && PyErr_Occurred()) ? -1 : 0;
+ return retval;
}
/*
@@ -2959,15 +2900,14 @@ finish_loop:
* this function does not validate them.
*/
static PyArrayObject *
-PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
- int naxes, int *axes, PyArray_Descr *odtype, int keepdims,
+PyUFunc_Reduce(PyUFuncObject *ufunc,
+ PyArrayObject *arr, PyArrayObject *out,
+ int naxes, int *axes, PyArray_DTypeMeta *signature[3], int keepdims,
PyObject *initial, PyArrayObject *wheremask)
{
int iaxes, ndim;
npy_bool reorderable;
npy_bool axis_flags[NPY_MAXDIMS];
- PyArray_Descr *dtype;
- PyArrayObject *result;
PyObject *identity;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
/* These parameters come from a TLS global */
@@ -2994,6 +2934,7 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
}
/* Get the identity */
+ /* TODO: Both of these should be provided by the ArrayMethod! */
identity = _get_identity(ufunc, &reorderable);
if (identity == NULL) {
return NULL;
@@ -3017,21 +2958,27 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
Py_INCREF(initial); /* match the reference count in the if above */
}
- /* Get the reduction dtype */
- if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) {
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_FALSE, descrs, "reduce");
+ if (ufuncimpl == NULL) {
Py_DECREF(initial);
return NULL;
}
- result = PyUFunc_ReduceWrapper(arr, out, wheremask, dtype, dtype,
- NPY_UNSAFE_CASTING,
- axis_flags, reorderable,
- keepdims,
- initial,
- reduce_loop,
- ufunc, buffersize, ufunc_name, errormask);
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
- Py_DECREF(dtype);
+ PyArrayObject *result = PyUFunc_ReduceWrapper(&context,
+ arr, out, wheremask, axis_flags, reorderable, keepdims,
+ initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask);
+
+ for (int i = 0; i < 3; i++) {
+ Py_DECREF(descrs[i]);
+ }
Py_DECREF(initial);
return result;
}
@@ -3039,23 +2986,21 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
static PyObject *
PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
- int axis, int otype)
+ int axis, PyArray_DTypeMeta *signature[3])
{
PyArrayObject *op[2];
- PyArray_Descr *op_dtypes[2] = {NULL, NULL};
int op_axes_arrays[2][NPY_MAXDIMS];
int *op_axes[2] = {op_axes_arrays[0], op_axes_arrays[1]};
npy_uint32 op_flags[2];
- int idim, ndim, otype_final;
+ int idim, ndim;
int needs_api, need_outer_iterator;
- NpyIter *iter = NULL;
+ int res = 0;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
- const char *ufunc_name = ufunc_get_name_cstr(ufunc);
+ NpyIter *iter = NULL;
/* These parameters come from extobj= or from a TLS global */
int buffersize = 0, errormask = 0;
@@ -3077,42 +3022,32 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
/* Take a reference to out for later returning */
Py_XINCREF(out);
- otype_final = otype;
- if (get_binary_op_function(ufunc, &otype_final,
- &innerloop, &innerloopdata) < 0) {
- PyArray_Descr *dtype = PyArray_DescrFromType(otype);
- PyErr_Format(PyExc_ValueError,
- "could not find a matching type for %s.accumulate, "
- "requested type has type code '%c'",
- ufunc_name, dtype ? dtype->type : '-');
- Py_XDECREF(dtype);
- goto fail;
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_TRUE, descrs, "accumulate");
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- ndim = PyArray_NDIM(arr);
+ /* The below code assumes that all descriptors are identical: */
+ assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]);
- /*
- * Set up the output data type, using the input's exact
- * data type if the type number didn't change to preserve
- * metadata
- */
- if (PyArray_DESCR(arr)->type_num == otype_final) {
- if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) {
- op_dtypes[0] = PyArray_DESCR(arr);
- Py_INCREF(op_dtypes[0]);
- }
- else {
- op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr),
- NPY_NATIVE);
- }
- }
- else {
- op_dtypes[0] = PyArray_DescrFromType(otype_final);
- }
- if (op_dtypes[0] == NULL) {
+ if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) {
+ /* This can be removed, but the initial element copy needs fixing */
+ PyErr_SetString(PyExc_TypeError,
+ "accumulation currently only supports `object` dtype with "
+ "references");
goto fail;
}
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
+
+ ndim = PyArray_NDIM(arr);
+
#if NPY_UF_DBG_TRACING
printf("Found %s.accumulate inner loop with dtype : ", ufunc_name);
PyObject_Print((PyObject *)op_dtypes[0], stdout, 0);
@@ -3138,9 +3073,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
need_outer_iterator = (ndim > 1);
/* We can't buffer, so must do UPDATEIFCOPY */
if (!PyArray_ISALIGNED(arr) || (out && !PyArray_ISALIGNED(out)) ||
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr)) ||
+ !PyArray_EquivTypes(descrs[1], PyArray_DESCR(arr)) ||
(out &&
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(out)))) {
+ !PyArray_EquivTypes(descrs[0], PyArray_DESCR(out)))) {
need_outer_iterator = 1;
}
/* If input and output overlap in memory, use iterator to figure it out */
@@ -3153,7 +3088,6 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
NPY_ITER_REFS_OK|
NPY_ITER_COPY_IF_OVERLAP;
- PyArray_Descr **op_dtypes_param = NULL;
/*
* The way accumulate is set up, we can't do buffering,
@@ -3170,13 +3104,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
*/
op_flags[0] |= NPY_ITER_UPDATEIFCOPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
op_flags[1] |= NPY_ITER_COPY|NPY_ITER_ALIGNED|NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE;
- op_dtypes_param = op_dtypes;
- op_dtypes[1] = op_dtypes[0];
+
NPY_UF_DBG_PRINT("Allocating outer iterator\n");
iter = NpyIter_AdvancedNew(2, op, flags,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
- op_flags,
- op_dtypes_param,
+ op_flags, descrs,
ndim_iter, op_axes, NULL, 0);
if (iter == NULL) {
goto fail;
@@ -3194,14 +3126,14 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
}
}
- /* Get the output */
+ /* Get the output from the iterator if it was allocated */
if (out == NULL) {
if (iter) {
op[0] = out = NpyIter_GetOperandArray(iter)[0];
Py_INCREF(out);
}
else {
- PyArray_Descr *dtype = op_dtypes[0];
+ PyArray_Descr *dtype = descrs[0];
Py_INCREF(dtype);
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
&PyArray_Type, dtype,
@@ -3210,10 +3142,31 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
if (out == NULL) {
goto fail;
}
-
}
}
+ npy_intp fixed_strides[3];
+ if (need_outer_iterator) {
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ }
+ else {
+ fixed_strides[0] = PyArray_STRIDES(op[0])[axis];
+ fixed_strides[1] = PyArray_STRIDES(op[1])[axis];
+ fixed_strides[2] = fixed_strides[0];
+ }
+
+
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufuncimpl->get_strided_loop(&context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
+
/*
* If the reduction axis has size zero, either return the reduction
* unit for UFUNC_REDUCE, or return the zero-sized output array
@@ -3234,7 +3187,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
NpyIter_IterNextFunc *iternext;
char **dataptr;
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
@@ -3242,8 +3195,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
goto fail;
}
dataptr = NpyIter_GetDataPtrArray(iter);
- needs_api = NpyIter_IterationNeedsAPI(iter);
-
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
/* Execute the loop with just the outer iterator */
count_m1 = PyArray_DIM(op[1], axis)-1;
@@ -3257,7 +3209,9 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
stride_copy[1] = stride1;
stride_copy[2] = stride0;
- NPY_BEGIN_THREADS_NDITER(iter);
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
+ }
do {
dataptr_copy[0] = dataptr[0];
@@ -3270,7 +3224,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
* Output (dataptr[0]) and input (dataptr[1]) may point to
* the same memory, e.g. np.add.accumulate(a, out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3290,18 +3244,17 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
dataptr_copy[2] += stride0;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count_m1);
- innerloop(dataptr_copy, &count_m1,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count_m1, stride_copy, auxdata);
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ } while (res == 0 && iternext(iter));
NPY_END_THREADS;
}
else if (iter == NULL) {
char *dataptr_copy[3];
- npy_intp stride_copy[3];
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
/* Execute the loop with no iterators */
npy_intp count = PyArray_DIM(op[1], axis);
@@ -3315,15 +3268,11 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
PyArray_NDIM(op[0]))) {
PyErr_SetString(PyExc_ValueError,
"provided out is the wrong size "
- "for the reduction");
+ "for the accumulation.");
goto fail;
}
stride0 = PyArray_STRIDE(op[0], axis);
- stride_copy[0] = stride0;
- stride_copy[1] = stride1;
- stride_copy[2] = stride0;
-
/* Turn the two items into three for the inner loop */
dataptr_copy[0] = PyArray_BYTES(op[0]);
dataptr_copy[1] = PyArray_BYTES(op[1]);
@@ -3335,7 +3284,7 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
* Output (dataptr[0]) and input (dataptr[1]) may point to the
* same memory, e.g. np.add.accumulate(a, out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3356,25 +3305,34 @@ PyUFunc_Accumulate(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out,
NPY_UF_DBG_PRINT1("iterator loop count %d\n", (int)count);
- needs_api = PyDataType_REFCHK(op_dtypes[0]);
+ needs_api = PyDataType_REFCHK(descrs[0]);
if (!needs_api) {
NPY_BEGIN_THREADS_THRESHOLDED(count);
}
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, fixed_strides, auxdata);
NPY_END_THREADS;
}
}
finish:
- Py_XDECREF(op_dtypes[0]);
- int res = 0;
+ NPY_AUXDATA_FREE(auxdata);
+ Py_DECREF(descrs[0]);
+ Py_DECREF(descrs[1]);
+ Py_DECREF(descrs[2]);
+
if (!NpyIter_Deallocate(iter)) {
res = -1;
}
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ res = _check_ufunc_fperr(errormask, NULL, "accumulate");
+ }
+
if (res < 0) {
Py_DECREF(out);
return NULL;
@@ -3384,7 +3342,11 @@ finish:
fail:
Py_XDECREF(out);
- Py_XDECREF(op_dtypes[0]);
+
+ NPY_AUXDATA_FREE(auxdata);
+ Py_XDECREF(descrs[0]);
+ Py_XDECREF(descrs[1]);
+ Py_XDECREF(descrs[2]);
NpyIter_Deallocate(iter);
@@ -3409,28 +3371,31 @@ fail:
* indices[1::2] = range(1,len(array))
*
* output shape is based on the size of indices
+ *
+ * TODO: Reduceat duplicates too much code from accumulate!
*/
static PyObject *
PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
- PyArrayObject *out, int axis, int otype)
+ PyArrayObject *out, int axis, PyArray_DTypeMeta *signature[3])
{
PyArrayObject *op[3];
- PyArray_Descr *op_dtypes[3] = {NULL, NULL, NULL};
int op_axes_arrays[3][NPY_MAXDIMS];
int *op_axes[3] = {op_axes_arrays[0], op_axes_arrays[1],
op_axes_arrays[2]};
npy_uint32 op_flags[3];
- int idim, ndim, otype_final;
- int need_outer_iterator = 0;
+ int idim, ndim;
+ int needs_api, need_outer_iterator = 0;
+
+ int res = 0;
NpyIter *iter = NULL;
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
+
/* The reduceat indices - ind must be validated outside this call */
npy_intp *reduceat_ind;
npy_intp i, ind_size, red_axis_size;
- /* The selected inner loop */
- PyUFuncGenericFunction innerloop = NULL;
- void *innerloopdata = NULL;
const char *ufunc_name = ufunc_get_name_cstr(ufunc);
char *opname = "reduceat";
@@ -3470,42 +3435,32 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
/* Take a reference to out for later returning */
Py_XINCREF(out);
- otype_final = otype;
- if (get_binary_op_function(ufunc, &otype_final,
- &innerloop, &innerloopdata) < 0) {
- PyArray_Descr *dtype = PyArray_DescrFromType(otype);
- PyErr_Format(PyExc_ValueError,
- "could not find a matching type for %s.%s, "
- "requested type has type code '%c'",
- ufunc_name, opname, dtype ? dtype->type : '-');
- Py_XDECREF(dtype);
- goto fail;
+ PyArray_Descr *descrs[3];
+ PyArrayMethodObject *ufuncimpl = reducelike_promote_and_resolve(ufunc,
+ arr, out, signature, NPY_TRUE, descrs, "reduceat");
+ if (ufuncimpl == NULL) {
+ return NULL;
}
- ndim = PyArray_NDIM(arr);
+ /* The below code assumes that all descriptors are identical: */
+ assert(descrs[0] == descrs[1] && descrs[0] == descrs[2]);
- /*
- * Set up the output data type, using the input's exact
- * data type if the type number didn't change to preserve
- * metadata
- */
- if (PyArray_DESCR(arr)->type_num == otype_final) {
- if (PyArray_ISNBO(PyArray_DESCR(arr)->byteorder)) {
- op_dtypes[0] = PyArray_DESCR(arr);
- Py_INCREF(op_dtypes[0]);
- }
- else {
- op_dtypes[0] = PyArray_DescrNewByteorder(PyArray_DESCR(arr),
- NPY_NATIVE);
- }
- }
- else {
- op_dtypes[0] = PyArray_DescrFromType(otype_final);
- }
- if (op_dtypes[0] == NULL) {
+ if (PyDataType_REFCHK(descrs[2]) && descrs[2]->type_num != NPY_OBJECT) {
+ /* This can be removed, but the initial element copy needs fixing */
+ PyErr_SetString(PyExc_TypeError,
+ "reduceat currently only supports `object` dtype with "
+ "references");
goto fail;
}
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = descrs,
+ };
+
+ ndim = PyArray_NDIM(arr);
+
#if NPY_UF_DBG_TRACING
printf("Found %s.%s inner loop with dtype : ", ufunc_name, opname);
PyObject_Print((PyObject *)op_dtypes[0], stdout, 0);
@@ -3532,11 +3487,13 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
op[2] = ind;
if (out != NULL || ndim > 1 || !PyArray_ISALIGNED(arr) ||
- !PyArray_EquivTypes(op_dtypes[0], PyArray_DESCR(arr))) {
+ !PyArray_EquivTypes(descrs[0], PyArray_DESCR(arr))) {
need_outer_iterator = 1;
}
if (need_outer_iterator) {
+ PyArray_Descr *op_dtypes[3] = {descrs[0], descrs[1], NULL};
+
npy_uint32 flags = NPY_ITER_ZEROSIZE_OK|
NPY_ITER_REFS_OK|
NPY_ITER_MULTI_INDEX|
@@ -3565,8 +3522,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
NPY_UF_DBG_PRINT("Allocating outer iterator\n");
iter = NpyIter_AdvancedNew(3, op, flags,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
- op_flags,
- op_dtypes,
+ op_flags, op_dtypes,
ndim, op_axes, NULL, 0);
if (iter == NULL) {
goto fail;
@@ -3590,11 +3546,15 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
Py_INCREF(out);
}
}
- /* Allocate the output for when there's no outer iterator */
- else if (out == NULL) {
- Py_INCREF(op_dtypes[0]);
+ else {
+ /*
+ * Allocate the output for when there's no outer iterator, we always
+ * use the outer_iteration path when `out` is passed.
+ */
+ assert(out == NULL);
+ Py_INCREF(descrs[0]);
op[0] = out = (PyArrayObject *)PyArray_NewFromDescr(
- &PyArray_Type, op_dtypes[0],
+ &PyArray_Type, descrs[0],
1, &ind_size, NULL, NULL,
0, NULL);
if (out == NULL) {
@@ -3602,6 +3562,28 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
}
}
+ npy_intp fixed_strides[3];
+ if (need_outer_iterator) {
+ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
+ }
+ else {
+ fixed_strides[1] = PyArray_STRIDES(op[1])[axis];
+ }
+ /* The reduce axis does not advance here in the strided-loop */
+ fixed_strides[0] = 0;
+ fixed_strides[2] = 0;
+
+ NPY_ARRAYMETHOD_FLAGS flags = 0;
+ if (ufuncimpl->get_strided_loop(&context,
+ 1, 0, fixed_strides, &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
+
/*
* If the output has zero elements, return now.
*/
@@ -3619,8 +3601,8 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
npy_intp stride0, stride1;
npy_intp stride0_ind = PyArray_STRIDE(op[0], axis);
- int itemsize = op_dtypes[0]->elsize;
- int needs_api = NpyIter_IterationNeedsAPI(iter);
+ int itemsize = descrs[0]->elsize;
+ needs_api |= NpyIter_IterationNeedsAPI(iter);
/* Get the variables needed for the loop */
iternext = NpyIter_GetIterNext(iter, NULL);
@@ -3640,10 +3622,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
stride_copy[1] = stride1;
stride_copy[2] = stride0;
- NPY_BEGIN_THREADS_NDITER(iter);
+ if (!needs_api) {
+ NPY_BEGIN_THREADS_THRESHOLDED(NpyIter_GetIterSize(iter));
+ }
do {
-
for (i = 0; i < ind_size; ++i) {
npy_intp start = reduceat_ind[i],
end = (i == ind_size-1) ? count_m1+1 :
@@ -3661,7 +3644,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
* to the same memory, e.g.
* np.add.reduceat(a, np.arange(len(a)), out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of
* the reference count being zero temporarily.
@@ -3681,33 +3664,24 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
dataptr_copy[1] += stride1;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count);
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, stride_copy, auxdata);
}
}
- } while (!(needs_api && PyErr_Occurred()) && iternext(iter));
+ } while (res == 0 && iternext(iter));
NPY_END_THREADS;
}
else if (iter == NULL) {
char *dataptr_copy[3];
- npy_intp stride_copy[3];
- int itemsize = op_dtypes[0]->elsize;
+ int itemsize = descrs[0]->elsize;
npy_intp stride0_ind = PyArray_STRIDE(op[0], axis);
-
- /* Execute the loop with no iterators */
- npy_intp stride0 = 0, stride1 = PyArray_STRIDE(op[1], axis);
-
- int needs_api = PyDataType_REFCHK(op_dtypes[0]);
+ npy_intp stride1 = PyArray_STRIDE(op[1], axis);
NPY_UF_DBG_PRINT("UFunc: Reduce loop with no iterators\n");
- stride_copy[0] = stride0;
- stride_copy[1] = stride1;
- stride_copy[2] = stride0;
-
if (!needs_api) {
NPY_BEGIN_THREADS;
}
@@ -3729,7 +3703,7 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
* the same memory, e.g.
* np.add.reduceat(a, np.arange(len(a)), out=a).
*/
- if (otype == NPY_OBJECT) {
+ if (descrs[2]->type_num == NPY_OBJECT) {
/*
* Incref before decref to avoid the possibility of the
* reference count being zero temporarily.
@@ -3749,8 +3723,11 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
dataptr_copy[1] += stride1;
NPY_UF_DBG_PRINT1("iterator loop count %d\n",
(int)count);
- innerloop(dataptr_copy, &count,
- stride_copy, innerloopdata);
+ res = strided_loop(&context,
+ dataptr_copy, &count, fixed_strides, auxdata);
+ if (res != 0) {
+ break;
+ }
}
}
@@ -3758,8 +3735,21 @@ PyUFunc_Reduceat(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *ind,
}
finish:
- Py_XDECREF(op_dtypes[0]);
+ NPY_AUXDATA_FREE(auxdata);
+ Py_DECREF(descrs[0]);
+ Py_DECREF(descrs[1]);
+ Py_DECREF(descrs[2]);
+
if (!NpyIter_Deallocate(iter)) {
+ res = -1;
+ }
+
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ res = _check_ufunc_fperr(errormask, NULL, "reduceat");
+ }
+
+ if (res < 0) {
Py_DECREF(out);
return NULL;
}
@@ -3768,9 +3758,14 @@ finish:
fail:
Py_XDECREF(out);
- Py_XDECREF(op_dtypes[0]);
+
+ NPY_AUXDATA_FREE(auxdata);
+ Py_XDECREF(descrs[0]);
+ Py_XDECREF(descrs[1]);
+ Py_XDECREF(descrs[2]);
NpyIter_Deallocate(iter);
+
return NULL;
}
@@ -3868,7 +3863,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
PyArrayObject *mp = NULL, *wheremask = NULL, *ret = NULL;
PyObject *op = NULL;
PyArrayObject *indices = NULL;
- PyArray_Descr *otype = NULL;
+ PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL};
PyArrayObject *out = NULL;
int keepdims = 0;
PyObject *initial = NULL;
@@ -4012,13 +4007,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
if (otype_obj && otype_obj != Py_None) {
/* Use `_get_dtype` because `dtype` is a DType and not the instance */
- PyArray_DTypeMeta *dtype = _get_dtype(otype_obj);
- if (dtype == NULL) {
+ signature[0] = _get_dtype(otype_obj);
+ if (signature[0] == NULL) {
goto fail;
}
- otype = dtype->singleton;
- Py_INCREF(otype);
- Py_DECREF(dtype);
}
if (out_obj && !PyArray_OutputConverter(out_obj, &out)) {
goto fail;
@@ -4038,15 +4030,6 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
ndim = PyArray_NDIM(mp);
- /* Check to see that type (and otype) is not FLEXIBLE */
- if (PyArray_ISFLEXIBLE(mp) ||
- (otype && PyTypeNum_ISFLEXIBLE(otype->type_num))) {
- PyErr_Format(PyExc_TypeError,
- "cannot perform %s with flexible type",
- _reduce_type[operation]);
- goto fail;
- }
-
/* Convert the 'axis' parameter into a list of axes */
if (axes_obj == NULL) {
/* apply defaults */
@@ -4109,14 +4092,12 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
/*
- * If out is specified it determines otype
- * unless otype already specified.
+ * If no dtype is specified and out is not specified, we override the
+ * integer and bool dtype used for add and multiply.
+ *
+ * TODO: The following should be handled by a promoter!
*/
- if (otype == NULL && out != NULL) {
- otype = PyArray_DESCR(out);
- Py_INCREF(otype);
- }
- if (otype == NULL) {
+ if (signature[0] == NULL && out == NULL) {
/*
* For integer types --- make sure at least a long
* is used for add and multiply reduction to avoid overflow
@@ -4136,16 +4117,17 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
typenum = NPY_LONG;
}
}
+ signature[0] = PyArray_DTypeFromTypeNum(typenum);
}
- otype = PyArray_DescrFromType(typenum);
}
-
+ Py_XINCREF(signature[0]);
+ signature[2] = signature[0];
switch(operation) {
case UFUNC_REDUCE:
- ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes,
- otype, keepdims, initial, wheremask);
- Py_XDECREF(wheremask);
+ ret = PyUFunc_Reduce(ufunc,
+ mp, out, naxes, axes, signature, keepdims, initial, wheremask);
+ Py_XSETREF(wheremask, NULL);
break;
case UFUNC_ACCUMULATE:
if (ndim == 0) {
@@ -4157,8 +4139,8 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
"accumulate does not allow multiple axes");
goto fail;
}
- ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc, mp, out, axes[0],
- otype->type_num);
+ ret = (PyArrayObject *)PyUFunc_Accumulate(ufunc,
+ mp, out, axes[0], signature);
break;
case UFUNC_REDUCEAT:
if (ndim == 0) {
@@ -4171,19 +4153,22 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
goto fail;
}
ret = (PyArrayObject *)PyUFunc_Reduceat(ufunc,
- mp, indices, out, axes[0], otype->type_num);
+ mp, indices, out, axes[0], signature);
Py_SETREF(indices, NULL);
break;
}
+ if (ret == NULL) {
+ goto fail;
+ }
+
+ Py_DECREF(signature[0]);
+ Py_DECREF(signature[1]);
+ Py_DECREF(signature[2]);
+
Py_DECREF(mp);
- Py_DECREF(otype);
Py_XDECREF(full_args.in);
Py_XDECREF(full_args.out);
- if (ret == NULL) {
- return NULL;
- }
-
/* Wrap and return the output */
{
/* Find __array_wrap__ - note that these rules are different to the
@@ -4211,7 +4196,10 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc,
}
fail:
- Py_XDECREF(otype);
+ Py_XDECREF(signature[0]);
+ Py_XDECREF(signature[1]);
+ Py_XDECREF(signature[2]);
+
Py_XDECREF(mp);
Py_XDECREF(wheremask);
Py_XDECREF(indices);
@@ -4938,65 +4926,6 @@ fail:
/*
- * TODO: The implementation below can be replaced with PyVectorcall_Call
- * when available (should be Python 3.8+).
- */
-static PyObject *
-ufunc_generic_call(
- PyUFuncObject *ufunc, PyObject *args, PyObject *kwds)
-{
- Py_ssize_t len_args = PyTuple_GET_SIZE(args);
- /*
- * Wrapper for tp_call to tp_fastcall, to support both on older versions
- * of Python. (and generally simplifying support of both versions in the
- * same codebase.
- */
- if (kwds == NULL) {
- return ufunc_generic_fastcall(ufunc,
- PySequence_Fast_ITEMS(args), len_args, NULL, NPY_FALSE);
- }
-
- PyObject *new_args[NPY_MAXARGS];
- Py_ssize_t len_kwds = PyDict_Size(kwds);
-
- if (NPY_UNLIKELY(len_args + len_kwds > NPY_MAXARGS)) {
- /*
- * We do not have enough scratch-space, so we have to abort;
- * In practice this error should not be seen by users.
- */
- PyErr_Format(PyExc_ValueError,
- "%s() takes from %d to %d positional arguments but "
- "%zd were given",
- ufunc_get_name_cstr(ufunc) , ufunc->nin, ufunc->nargs, len_args);
- return NULL;
- }
-
- /* Copy args into the scratch space */
- for (Py_ssize_t i = 0; i < len_args; i++) {
- new_args[i] = PyTuple_GET_ITEM(args, i);
- }
-
- PyObject *kwnames = PyTuple_New(len_kwds);
-
- PyObject *key, *value;
- Py_ssize_t pos = 0;
- Py_ssize_t i = 0;
- while (PyDict_Next(kwds, &pos, &key, &value)) {
- Py_INCREF(key);
- PyTuple_SET_ITEM(kwnames, i, key);
- new_args[i + len_args] = value;
- i++;
- }
-
- PyObject *res = ufunc_generic_fastcall(ufunc,
- new_args, len_args, kwnames, NPY_FALSE);
- Py_DECREF(kwnames);
- return res;
-}
-
-
-#if PY_VERSION_HEX >= 0x03080000
-/*
* Implement vectorcallfunc which should be defined with Python 3.8+.
* In principle this could be backported, but the speed gain seems moderate
* since ufunc calls often do not have keyword arguments and always have
@@ -5013,7 +4942,6 @@ ufunc_generic_vectorcall(PyObject *ufunc,
return ufunc_generic_fastcall((PyUFuncObject *)ufunc,
args, PyVectorcall_NARGS(len_args), kwnames, NPY_FALSE);
}
-#endif /* PY_VERSION_HEX >= 0x03080000 */
NPY_NO_EXPORT PyObject *
@@ -5190,11 +5118,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi
ufunc->core_dim_flags = NULL;
ufunc->userloops = NULL;
ufunc->ptr = NULL;
-#if PY_VERSION_HEX >= 0x03080000
ufunc->vectorcall = &ufunc_generic_vectorcall;
-#else
- ufunc->reserved2 = NULL;
-#endif
ufunc->reserved1 = 0;
ufunc->iter_flags = 0;
@@ -5892,15 +5816,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
PyArrayObject *op2_array = NULL;
PyArrayMapIterObject *iter = NULL;
PyArrayIterObject *iter2 = NULL;
- PyArray_Descr *dtypes[3] = {NULL, NULL, NULL};
PyArrayObject *operands[3] = {NULL, NULL, NULL};
PyArrayObject *array_operands[3] = {NULL, NULL, NULL};
- int needs_api = 0;
+ PyArray_DTypeMeta *signature[3] = {NULL, NULL, NULL};
+ PyArray_DTypeMeta *operand_DTypes[3] = {NULL, NULL, NULL};
+ PyArray_Descr *operation_descrs[3] = {NULL, NULL, NULL};
- PyUFuncGenericFunction innerloop;
- void *innerloopdata;
- npy_intp i;
int nop;
/* override vars */
@@ -5913,6 +5835,10 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
int buffersize;
int errormask = 0;
char * err_msg = NULL;
+
+ PyArrayMethod_StridedLoop *strided_loop;
+ NpyAuxData *auxdata = NULL;
+
NPY_BEGIN_THREADS_DEF;
if (ufunc->nin > 2) {
@@ -6000,26 +5926,51 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
/*
* Create dtypes array for either one or two input operands.
- * The output operand is set to the first input operand
+ * Compare to the logic in `convert_ufunc_arguments`.
+ * TODO: It may be good to review some of this behaviour, since the
+ * operand array is special (it is written to) similar to reductions.
+ * Using unsafe-casting as done here, is likely not desirable.
*/
operands[0] = op1_array;
+ operand_DTypes[0] = NPY_DTYPE(PyArray_DESCR(op1_array));
+ Py_INCREF(operand_DTypes[0]);
+ int force_legacy_promotion = 0;
+ int allow_legacy_promotion = NPY_DT_is_legacy(operand_DTypes[0]);
+
if (op2_array != NULL) {
operands[1] = op2_array;
- operands[2] = op1_array;
+ operand_DTypes[1] = NPY_DTYPE(PyArray_DESCR(op2_array));
+ Py_INCREF(operand_DTypes[1]);
+ allow_legacy_promotion &= NPY_DT_is_legacy(operand_DTypes[1]);
+ operands[2] = operands[0];
+ operand_DTypes[2] = operand_DTypes[0];
+ Py_INCREF(operand_DTypes[2]);
+
nop = 3;
+ if (allow_legacy_promotion && ((PyArray_NDIM(op1_array) == 0)
+ != (PyArray_NDIM(op2_array) == 0))) {
+ /* both are legacy and only one is 0-D: force legacy */
+ force_legacy_promotion = should_use_min_scalar(2, operands, 0, NULL);
+ }
}
else {
- operands[1] = op1_array;
+ operands[1] = operands[0];
+ operand_DTypes[1] = operand_DTypes[0];
+ Py_INCREF(operand_DTypes[1]);
operands[2] = NULL;
nop = 2;
}
- if (ufunc->type_resolver(ufunc, NPY_UNSAFE_CASTING,
- operands, NULL, dtypes) < 0) {
+ PyArrayMethodObject *ufuncimpl = promote_and_get_ufuncimpl(ufunc,
+ operands, signature, operand_DTypes,
+ force_legacy_promotion, allow_legacy_promotion);
+ if (ufuncimpl == NULL) {
goto fail;
}
- if (ufunc->legacy_inner_loop_selector(ufunc, dtypes,
- &innerloop, &innerloopdata, &needs_api) < 0) {
+
+ /* Find the correct descriptors for the operation */
+ if (resolve_descriptors(nop, ufunc, ufuncimpl,
+ operands, operation_descrs, signature, NPY_UNSAFE_CASTING) < 0) {
goto fail;
}
@@ -6080,21 +6031,44 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
NPY_ITER_GROWINNER|
NPY_ITER_DELAY_BUFALLOC,
NPY_KEEPORDER, NPY_UNSAFE_CASTING,
- op_flags, dtypes,
+ op_flags, operation_descrs,
-1, NULL, NULL, buffersize);
if (iter_buffer == NULL) {
goto fail;
}
- needs_api = needs_api | NpyIter_IterationNeedsAPI(iter_buffer);
-
iternext = NpyIter_GetIterNext(iter_buffer, NULL);
if (iternext == NULL) {
NpyIter_Deallocate(iter_buffer);
goto fail;
}
+ PyArrayMethod_Context context = {
+ .caller = (PyObject *)ufunc,
+ .method = ufuncimpl,
+ .descriptors = operation_descrs,
+ };
+
+ NPY_ARRAYMETHOD_FLAGS flags;
+ /* Use contiguous strides; if there is such a loop it may be faster */
+ npy_intp strides[3] = {
+ operation_descrs[0]->elsize, operation_descrs[1]->elsize, 0};
+ if (nop == 3) {
+ strides[2] = operation_descrs[2]->elsize;
+ }
+
+ if (ufuncimpl->get_strided_loop(&context, 1, 0, strides,
+ &strided_loop, &auxdata, &flags) < 0) {
+ goto fail;
+ }
+ int needs_api = (flags & NPY_METH_REQUIRES_PYAPI) != 0;
+ needs_api |= NpyIter_IterationNeedsAPI(iter_buffer);
+ if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* Start with the floating-point exception flags cleared */
+ npy_clear_floatstatus_barrier((char*)&iter);
+ }
+
if (!needs_api) {
NPY_BEGIN_THREADS;
}
@@ -6103,14 +6077,13 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
* Iterate over first and second operands and call ufunc
* for each pair of inputs
*/
- i = iter->size;
- while (i > 0)
+ int res = 0;
+ for (npy_intp i = iter->size; i > 0; i--)
{
char *dataptr[3];
char **buffer_dataptr;
/* one element at a time, no stride required but read by innerloop */
- npy_intp count[3] = {1, 0xDEADBEEF, 0xDEADBEEF};
- npy_intp stride[3] = {0xDEADBEEF, 0xDEADBEEF, 0xDEADBEEF};
+ npy_intp count = 1;
/*
* Set up data pointers for either one or two input operands.
@@ -6129,14 +6102,14 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
/* Reset NpyIter data pointers which will trigger a buffer copy */
NpyIter_ResetBasePointers(iter_buffer, dataptr, &err_msg);
if (err_msg) {
+ res = -1;
break;
}
buffer_dataptr = NpyIter_GetDataPtrArray(iter_buffer);
- innerloop(buffer_dataptr, count, stride, innerloopdata);
-
- if (needs_api && PyErr_Occurred()) {
+ res = strided_loop(&context, buffer_dataptr, &count, strides, auxdata);
+ if (res != 0) {
break;
}
@@ -6150,32 +6123,35 @@ ufunc_at(PyUFuncObject *ufunc, PyObject *args)
if (iter2 != NULL) {
PyArray_ITER_NEXT(iter2);
}
-
- i--;
}
NPY_END_THREADS;
- if (err_msg) {
+ if (res != 0 && err_msg) {
PyErr_SetString(PyExc_ValueError, err_msg);
}
+ if (res == 0 && !(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
+ /* NOTE: We could check float errors even when `res < 0` */
+ res = _check_ufunc_fperr(errormask, NULL, "at");
+ }
+ NPY_AUXDATA_FREE(auxdata);
NpyIter_Deallocate(iter_buffer);
Py_XDECREF(op2_array);
Py_XDECREF(iter);
Py_XDECREF(iter2);
- for (i = 0; i < 3; i++) {
- Py_XDECREF(dtypes[i]);
+ for (int i = 0; i < 3; i++) {
+ Py_XDECREF(operation_descrs[i]);
Py_XDECREF(array_operands[i]);
}
/*
- * An error should only be possible if needs_api is true, but this is not
- * strictly correct for old-style ufuncs (e.g. `power` released the GIL
- * but manually set an Exception).
+ * An error should only be possible if needs_api is true or `res != 0`,
+ * but this is not strictly correct for old-style ufuncs
+ * (e.g. `power` released the GIL but manually set an Exception).
*/
- if (PyErr_Occurred()) {
+ if (res != 0 || PyErr_Occurred()) {
return NULL;
}
else {
@@ -6190,10 +6166,11 @@ fail:
Py_XDECREF(op2_array);
Py_XDECREF(iter);
Py_XDECREF(iter2);
- for (i = 0; i < 3; i++) {
- Py_XDECREF(dtypes[i]);
+ for (int i = 0; i < 3; i++) {
+ Py_XDECREF(operation_descrs[i]);
Py_XDECREF(array_operands[i]);
}
+ NPY_AUXDATA_FREE(auxdata);
return NULL;
}
@@ -6396,19 +6373,15 @@ NPY_NO_EXPORT PyTypeObject PyUFunc_Type = {
.tp_basicsize = sizeof(PyUFuncObject),
.tp_dealloc = (destructor)ufunc_dealloc,
.tp_repr = (reprfunc)ufunc_repr,
- .tp_call = (ternaryfunc)ufunc_generic_call,
+ .tp_call = &PyVectorcall_Call,
.tp_str = (reprfunc)ufunc_repr,
.tp_flags = Py_TPFLAGS_DEFAULT |
-#if PY_VERSION_HEX >= 0x03080000
_Py_TPFLAGS_HAVE_VECTORCALL |
-#endif
Py_TPFLAGS_HAVE_GC,
.tp_traverse = (traverseproc)ufunc_traverse,
.tp_methods = ufunc_methods,
.tp_getset = ufunc_getset,
-#if PY_VERSION_HEX >= 0x03080000
.tp_vectorcall_offset = offsetof(PyUFuncObject, vectorcall),
-#endif
};
/* End of code for ufunc objects */
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 7e24bc493..9ed923cf5 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -247,6 +247,28 @@ PyUFunc_ValidateCasting(PyUFuncObject *ufunc,
}
+/*
+ * Same as `PyUFunc_ValidateCasting` but only checks output casting.
+ */
+NPY_NO_EXPORT int
+PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc,
+ NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes)
+{
+ int i, nin = ufunc->nin, nop = nin + ufunc->nout;
+
+ for (i = nin; i < nop; ++i) {
+ if (operands[i] == NULL) {
+ continue;
+ }
+ if (!PyArray_CanCastTypeTo(dtypes[i],
+ PyArray_DESCR(operands[i]), casting)) {
+ return raise_output_casting_error(
+ ufunc, casting, dtypes[i], PyArray_DESCR(operands[i]), i);
+ }
+ }
+ return 0;
+}
+
/*UFUNC_API
*
* This function applies the default type resolution rules
@@ -2142,6 +2164,10 @@ type_tuple_type_resolver(PyUFuncObject *self,
* `signature=(None,)*nin + (dtype,)*nout`. If the signature matches that
* exactly (could be relaxed but that is not necessary for backcompat),
* we also try `signature=(dtype,)*(nin+nout)`.
+ * Since reduction pass in `(dtype, None, dtype)` we broaden this to
+ * replacing all unspecified dtypes with the homogeneous output one.
+ * Note that this can (and often will) lead to unsafe casting. This is
+ * normally rejected (but not currently for reductions!).
* This used to be the main meaning for `dtype=dtype`, but some calls broke
* the expectation, and changing it allows for `dtype=dtype` to be useful
* for ufuncs like `np.ldexp` in the future while also normalizing it to
@@ -2160,13 +2186,12 @@ type_tuple_type_resolver(PyUFuncObject *self,
if (homogeneous_type != NPY_NOTYPE) {
for (int i = 0; i < nin; i++) {
if (specified_types[i] != NPY_NOTYPE) {
- homogeneous_type = NPY_NOTYPE;
- break;
+ /* Never replace a specified type! */
+ continue;
}
specified_types[i] = homogeneous_type;
}
- }
- if (homogeneous_type != NPY_NOTYPE) {
+
/* Try again with the homogeneous specified types. */
res = type_tuple_type_resolver_core(self,
op, input_casting, casting, specified_types, any_object,
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index dd88a081a..84a2593f4 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -99,6 +99,10 @@ PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc,
PyObject *type_tup,
PyArray_Descr **out_dtypes);
+NPY_NO_EXPORT int
+PyUFunc_ValidateOutCasting(PyUFuncObject *ufunc,
+ NPY_CASTING casting, PyArrayObject **operands, PyArray_Descr **dtypes);
+
/*
* Does a linear search for the best inner loop of the ufunc.
*
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index a9954dfc1..272555704 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -22,6 +22,7 @@
#include "numpy/npy_math.h"
#include "number.h"
+#include "dispatching.h"
static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om};
@@ -305,5 +306,33 @@ int initumath(PyObject *m)
return -1;
}
+ /*
+ * Set up promoters for logical functions
+ * TODO: This should probably be done at a better place, or even in the
+ * code generator directly.
+ */
+ s = _PyDict_GetItemStringWithError(d, "logical_and");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
+
+ s = _PyDict_GetItemStringWithError(d, "logical_or");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
+
+ s = _PyDict_GetItemStringWithError(d, "logical_xor");
+ if (s == NULL) {
+ return -1;
+ }
+ if (install_logical_ufunc_promoter(s) < 0) {
+ return -1;
+ }
return 0;
}
diff --git a/numpy/core/tests/data/generate_umath_validation_data.cpp b/numpy/core/tests/data/generate_umath_validation_data.cpp
index 9d97ff4ab..418eae670 100644
--- a/numpy/core/tests/data/generate_umath_validation_data.cpp
+++ b/numpy/core/tests/data/generate_umath_validation_data.cpp
@@ -1,41 +1,46 @@
-#include<math.h>
-#include<stdio.h>
-#include<iostream>
-#include<algorithm>
-#include<vector>
-#include<random>
-#include<fstream>
-#include<time.h>
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <math.h>
+#include <random>
+#include <stdio.h>
+#include <time.h>
+#include <vector>
struct ufunc {
std::string name;
- double (*f32func) (double);
- long double (*f64func) (long double);
+ double (*f32func)(double);
+ long double (*f64func)(long double);
float f32ulp;
float f64ulp;
};
-template<typename T>
-T RandomFloat(T a, T b) {
- T random = ((T) rand()) / (T) RAND_MAX;
+template <typename T>
+T
+RandomFloat(T a, T b)
+{
+ T random = ((T)rand()) / (T)RAND_MAX;
T diff = b - a;
T r = random * diff;
return a + r;
}
-template<typename T>
-void append_random_array(std::vector<T>& arr, T min, T max, size_t N)
+template <typename T>
+void
+append_random_array(std::vector<T> &arr, T min, T max, size_t N)
{
for (size_t ii = 0; ii < N; ++ii)
arr.emplace_back(RandomFloat<T>(min, max));
}
-template<typename T1, typename T2>
-std::vector<T1> computeTrueVal(const std::vector<T1>& in, T2(*mathfunc)(T2)) {
+template <typename T1, typename T2>
+std::vector<T1>
+computeTrueVal(const std::vector<T1> &in, T2 (*mathfunc)(T2))
+{
std::vector<T1> out;
for (T1 elem : in) {
- T2 elem_d = (T2) elem;
- T1 out_elem = (T1) mathfunc(elem_d);
+ T2 elem_d = (T2)elem;
+ T1 out_elem = (T1)mathfunc(elem_d);
out.emplace_back(out_elem);
}
return out;
@@ -49,17 +54,20 @@ std::vector<T1> computeTrueVal(const std::vector<T1>& in, T2(*mathfunc)(T2)) {
#define MINDEN std::numeric_limits<T>::denorm_min()
#define MINFLT std::numeric_limits<T>::min()
#define MAXFLT std::numeric_limits<T>::max()
-#define INF std::numeric_limits<T>::infinity()
-#define qNAN std::numeric_limits<T>::quiet_NaN()
-#define sNAN std::numeric_limits<T>::signaling_NaN()
+#define INF std::numeric_limits<T>::infinity()
+#define qNAN std::numeric_limits<T>::quiet_NaN()
+#define sNAN std::numeric_limits<T>::signaling_NaN()
-template<typename T>
-std::vector<T> generate_input_vector(std::string func) {
- std::vector<T> input = {MINDEN, -MINDEN, MINFLT, -MINFLT, MAXFLT, -MAXFLT,
- INF, -INF, qNAN, sNAN, -1.0, 1.0, 0.0, -0.0};
+template <typename T>
+std::vector<T>
+generate_input_vector(std::string func)
+{
+ std::vector<T> input = {MINDEN, -MINDEN, MINFLT, -MINFLT, MAXFLT,
+ -MAXFLT, INF, -INF, qNAN, sNAN,
+ -1.0, 1.0, 0.0, -0.0};
// [-1.0, 1.0]
- if ((func == "arcsin") || (func == "arccos") || (func == "arctanh")){
+ if ((func == "arcsin") || (func == "arccos") || (func == "arctanh")) {
append_random_array<T>(input, -1.0, 1.0, 700);
}
// (0.0, INF]
@@ -98,57 +106,62 @@ std::vector<T> generate_input_vector(std::string func) {
return input;
}
-int main() {
- srand (42);
+int
+main()
+{
+ srand(42);
std::vector<struct ufunc> umathfunc = {
- {"sin",sin,sin,2.37,3.3},
- {"cos",cos,cos,2.36,3.38},
- {"tan",tan,tan,3.91,3.93},
- {"arcsin",asin,asin,3.12,2.55},
- {"arccos",acos,acos,2.1,1.67},
- {"arctan",atan,atan,2.3,2.52},
- {"sinh",sinh,sinh,1.55,1.89},
- {"cosh",cosh,cosh,2.48,1.97},
- {"tanh",tanh,tanh,1.38,1.19},
- {"arcsinh",asinh,asinh,1.01,1.48},
- {"arccosh",acosh,acosh,1.16,1.05},
- {"arctanh",atanh,atanh,1.45,1.46},
- {"cbrt",cbrt,cbrt,1.94,1.82},
- //{"exp",exp,exp,3.76,1.53},
- {"exp2",exp2,exp2,1.01,1.04},
- {"expm1",expm1,expm1,2.62,2.1},
- //{"log",log,log,1.84,1.67},
- {"log10",log10,log10,3.5,1.92},
- {"log1p",log1p,log1p,1.96,1.93},
- {"log2",log2,log2,2.12,1.84},
+ {"sin", sin, sin, 2.37, 3.3},
+ {"cos", cos, cos, 2.36, 3.38},
+ {"tan", tan, tan, 3.91, 3.93},
+ {"arcsin", asin, asin, 3.12, 2.55},
+ {"arccos", acos, acos, 2.1, 1.67},
+ {"arctan", atan, atan, 2.3, 2.52},
+ {"sinh", sinh, sinh, 1.55, 1.89},
+ {"cosh", cosh, cosh, 2.48, 1.97},
+ {"tanh", tanh, tanh, 1.38, 1.19},
+ {"arcsinh", asinh, asinh, 1.01, 1.48},
+ {"arccosh", acosh, acosh, 1.16, 1.05},
+ {"arctanh", atanh, atanh, 1.45, 1.46},
+ {"cbrt", cbrt, cbrt, 1.94, 1.82},
+ //{"exp",exp,exp,3.76,1.53},
+ {"exp2", exp2, exp2, 1.01, 1.04},
+ {"expm1", expm1, expm1, 2.62, 2.1},
+ //{"log",log,log,1.84,1.67},
+ {"log10", log10, log10, 3.5, 1.92},
+ {"log1p", log1p, log1p, 1.96, 1.93},
+ {"log2", log2, log2, 2.12, 1.84},
};
for (int ii = 0; ii < umathfunc.size(); ++ii) {
- // ignore sin/cos
+ // ignore sin/cos
if ((umathfunc[ii].name != "sin") && (umathfunc[ii].name != "cos")) {
- std::string fileName = "umath-validation-set-" + umathfunc[ii].name + ".csv";
+ std::string fileName =
+ "umath-validation-set-" + umathfunc[ii].name + ".csv";
std::ofstream txtOut;
- txtOut.open (fileName, std::ofstream::trunc);
+ txtOut.open(fileName, std::ofstream::trunc);
txtOut << "dtype,input,output,ulperrortol" << std::endl;
// Single Precision
auto f32in = generate_input_vector<float>(umathfunc[ii].name);
- auto f32out = computeTrueVal<float, double>(f32in, umathfunc[ii].f32func);
+ auto f32out = computeTrueVal<float, double>(f32in,
+ umathfunc[ii].f32func);
for (int jj = 0; jj < f32in.size(); ++jj) {
- txtOut << "np.float32" << std::hex <<
- ",0x" << *reinterpret_cast<uint32_t*>(&f32in[jj]) <<
- ",0x" << *reinterpret_cast<uint32_t*>(&f32out[jj]) <<
- "," << ceil(umathfunc[ii].f32ulp) << std::endl;
+ txtOut << "np.float32" << std::hex << ",0x"
+ << *reinterpret_cast<uint32_t *>(&f32in[jj]) << ",0x"
+ << *reinterpret_cast<uint32_t *>(&f32out[jj]) << ","
+ << ceil(umathfunc[ii].f32ulp) << std::endl;
}
// Double Precision
auto f64in = generate_input_vector<double>(umathfunc[ii].name);
- auto f64out = computeTrueVal<double, long double>(f64in, umathfunc[ii].f64func);
+ auto f64out = computeTrueVal<double, long double>(
+ f64in, umathfunc[ii].f64func);
for (int jj = 0; jj < f64in.size(); ++jj) {
- txtOut << "np.float64" << std::hex <<
- ",0x" << *reinterpret_cast<uint64_t*>(&f64in[jj]) <<
- ",0x" << *reinterpret_cast<uint64_t*>(&f64out[jj]) <<
- "," << ceil(umathfunc[ii].f64ulp) << std::endl;
+ txtOut << "np.float64" << std::hex << ",0x"
+ << *reinterpret_cast<uint64_t *>(&f64in[jj]) << ",0x"
+ << *reinterpret_cast<uint64_t *>(&f64out[jj]) << ","
+ << ceil(umathfunc[ii].f64ulp) << std::endl;
}
txtOut.close();
}
diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py
index 291cdae89..d3c7211cd 100644
--- a/numpy/core/tests/test_api.py
+++ b/numpy/core/tests/test_api.py
@@ -598,3 +598,31 @@ def test_broadcast_arrays():
def test_full_from_list(shape, fill_value, expected_output):
output = np.full(shape, fill_value)
assert_equal(output, expected_output)
+
+def test_astype_copyflag():
+ # test the various copyflag options
+ arr = np.arange(10, dtype=np.intp)
+
+ res_true = arr.astype(np.intp, copy=True)
+ assert not np.may_share_memory(arr, res_true)
+ res_always = arr.astype(np.intp, copy=np._CopyMode.ALWAYS)
+ assert not np.may_share_memory(arr, res_always)
+
+ res_false = arr.astype(np.intp, copy=False)
+ # `res_false is arr` currently, but check `may_share_memory`.
+ assert np.may_share_memory(arr, res_false)
+ res_if_needed = arr.astype(np.intp, copy=np._CopyMode.IF_NEEDED)
+ # `res_if_needed is arr` currently, but check `may_share_memory`.
+ assert np.may_share_memory(arr, res_if_needed)
+
+ res_never = arr.astype(np.intp, copy=np._CopyMode.NEVER)
+ assert np.may_share_memory(arr, res_never)
+
+ # Simple tests for when a copy is necessary:
+ res_false = arr.astype(np.float64, copy=False)
+ assert_array_equal(res_false, arr)
+ res_if_needed = arr.astype(np.float64,
+ copy=np._CopyMode.IF_NEEDED)
+ assert_array_equal(res_if_needed, arr)
+ assert_raises(ValueError, arr.astype, np.float64,
+ copy=np._CopyMode.NEVER)
diff --git a/numpy/core/tests/test_casting_unittests.py b/numpy/core/tests/test_casting_unittests.py
index b0d8ff503..cb4792090 100644
--- a/numpy/core/tests/test_casting_unittests.py
+++ b/numpy/core/tests/test_casting_unittests.py
@@ -9,7 +9,6 @@ than integration tests.
import pytest
import textwrap
import enum
-import itertools
import random
import numpy as np
diff --git a/numpy/core/tests/test_custom_dtypes.py b/numpy/core/tests/test_custom_dtypes.py
index 5eb82bc93..6bcc45d6b 100644
--- a/numpy/core/tests/test_custom_dtypes.py
+++ b/numpy/core/tests/test_custom_dtypes.py
@@ -101,18 +101,52 @@ class TestSFloat:
expected_view = a.view(np.float64) * b.view(np.float64)
assert_array_equal(res.view(np.float64), expected_view)
+ def test_possible_and_impossible_reduce(self):
+ # For reductions to work, the first and last operand must have the
+ # same dtype. For this parametric DType that is not necessarily true.
+ a = self._get_array(2.)
+ # Addition reductin works (as of writing requires to pass initial
+ # because setting a scaled-float from the default `0` fails).
+ res = np.add.reduce(a, initial=0.)
+ assert res == a.astype(np.float64).sum()
+
+ # But each multiplication changes the factor, so a reduction is not
+ # possible (the relaxed version of the old refusal to handle any
+ # flexible dtype).
+ with pytest.raises(TypeError,
+ match="the resolved dtypes are not compatible"):
+ np.multiply.reduce(a)
+
+ def test_basic_ufunc_at(self):
+ float_a = np.array([1., 2., 3.])
+ b = self._get_array(2.)
+
+ float_b = b.view(np.float64).copy()
+ np.multiply.at(float_b, [1, 1, 1], float_a)
+ np.multiply.at(b, [1, 1, 1], float_a)
+
+ assert_array_equal(b.view(np.float64), float_b)
+
def test_basic_multiply_promotion(self):
float_a = np.array([1., 2., 3.])
b = self._get_array(2.)
res1 = float_a * b
res2 = b * float_a
+
# one factor is one, so we get the factor of b:
assert res1.dtype == res2.dtype == b.dtype
expected_view = float_a * b.view(np.float64)
assert_array_equal(res1.view(np.float64), expected_view)
assert_array_equal(res2.view(np.float64), expected_view)
+ # Check that promotion works when `out` is used:
+ np.multiply(b, float_a, out=res2)
+ with pytest.raises(TypeError):
+ # The promoter accepts this (maybe it should not), but the SFloat
+ # result cannot be cast to integer:
+ np.multiply(b, float_a, out=np.arange(3))
+
def test_basic_addition(self):
a = self._get_array(2.)
b = self._get_array(4.)
@@ -145,3 +179,23 @@ class TestSFloat:
# Check that casting the output fails also (done by the ufunc here)
with pytest.raises(TypeError):
np.add(a, a, out=c, casting="safe")
+
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_casts_to_bool(self, ufunc):
+ a = self._get_array(2.)
+ a[0] = 0. # make sure first element is considered False.
+
+ float_equiv = a.astype(float)
+ expected = ufunc(float_equiv, float_equiv)
+ res = ufunc(a, a)
+ assert_array_equal(res, expected)
+
+ # also check that the same works for reductions:
+ expected = ufunc.reduce(float_equiv)
+ res = ufunc.reduce(a)
+ assert_array_equal(res, expected)
+
+ # The output casting does not match the bool, bool -> bool loop:
+ with pytest.raises(TypeError):
+ ufunc(a, a, out=np.empty(a.shape, dtype=int), casting="equiv")
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 69eba7ba0..b95d669a8 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -2029,6 +2029,21 @@ class TestDateTime:
assert_equal(np.maximum.reduce(a),
np.timedelta64(7, 's'))
+ def test_datetime_no_subtract_reducelike(self):
+ # subtracting two datetime64 works, but we cannot reduce it, since
+ # the result of that subtraction will have a different dtype.
+ arr = np.array(["2021-12-02", "2019-05-12"], dtype="M8[ms]")
+ msg = r"the resolved dtypes are not compatible with subtract\."
+
+ with pytest.raises(TypeError, match=msg + "reduce"):
+ np.subtract.reduce(arr)
+
+ with pytest.raises(TypeError, match=msg + "accumulate"):
+ np.subtract.accumulate(arr)
+
+ with pytest.raises(TypeError, match=msg + "reduceat"):
+ np.subtract.reduceat(arr, [0])
+
def test_datetime_busday_offset(self):
# First Monday in June
assert_equal(
diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py
index 898ff8075..e0b66defc 100644
--- a/numpy/core/tests/test_deprecations.py
+++ b/numpy/core/tests/test_deprecations.py
@@ -13,7 +13,8 @@ import sys
import numpy as np
from numpy.testing import (
- assert_raises, assert_warns, assert_, assert_array_equal, SkipTest, KnownFailureException
+ assert_raises, assert_warns, assert_, assert_array_equal, SkipTest,
+ KnownFailureException, break_cycles,
)
from numpy.core._multiarray_tests import fromstring_null_term_c_api
@@ -1215,3 +1216,57 @@ class TestPartitionBoolIndex(_DeprecationTestCase):
def test_not_deprecated(self, func):
self.assert_not_deprecated(lambda: func(1))
self.assert_not_deprecated(lambda: func([0, 1]))
+
+
+class TestMachAr(_DeprecationTestCase):
+ # Deprecated 2021-10-19, NumPy 1.22
+ warning_cls = DeprecationWarning
+
+ def test_deprecated(self):
+ self.assert_deprecated(lambda: np.MachAr)
+
+ def test_deprecated_module(self):
+ self.assert_deprecated(lambda: getattr(np.core, "machar"))
+
+ def test_deprecated_attr(self):
+ finfo = np.finfo(float)
+ self.assert_deprecated(lambda: getattr(finfo, "machar"))
+
+
+class TestQuantileInterpolationDeprecation(_DeprecationTestCase):
+ # Deprecated 2021-11-08, NumPy 1.22
+ @pytest.mark.parametrize("func",
+ [np.percentile, np.quantile, np.nanpercentile, np.nanquantile])
+ def test_deprecated(self, func):
+ self.assert_deprecated(
+ lambda: func([0., 1.], 0., interpolation="linear"))
+ self.assert_deprecated(
+ lambda: func([0., 1.], 0., interpolation="nearest"))
+
+ @pytest.mark.parametrize("func",
+ [np.percentile, np.quantile, np.nanpercentile, np.nanquantile])
+ def test_both_passed(self, func):
+ with warnings.catch_warnings():
+ # catch the DeprecationWarning so that it does not raise:
+ warnings.simplefilter("always", DeprecationWarning)
+ with pytest.raises(TypeError):
+ func([0., 1.], 0., interpolation="nearest", method="nearest")
+
+
+class TestMemEventHook(_DeprecationTestCase):
+ # Deprecated 2021-11-18, NumPy 1.23
+ def test_mem_seteventhook(self):
+ # The actual tests are within the C code in
+ # multiarray/_multiarray_tests.c.src
+ import numpy.core._multiarray_tests as ma_tests
+ with pytest.warns(DeprecationWarning,
+ match='PyDataMem_SetEventHook is deprecated'):
+ ma_tests.test_pydatamem_seteventhook_start()
+ # force an allocation and free of a numpy array
+ # needs to be larger then limit of small memory cacher in ctors.c
+ a = np.zeros(1000)
+ del a
+ break_cycles()
+ with pytest.warns(DeprecationWarning,
+ match='PyDataMem_SetEventHook is deprecated'):
+ ma_tests.test_pydatamem_seteventhook_end()
diff --git a/numpy/core/tests/test_dlpack.py b/numpy/core/tests/test_dlpack.py
new file mode 100644
index 000000000..f848b2008
--- /dev/null
+++ b/numpy/core/tests/test_dlpack.py
@@ -0,0 +1,109 @@
+import sys
+import pytest
+
+import numpy as np
+from numpy.testing import assert_array_equal, IS_PYPY
+
+
+class TestDLPack:
+ @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.")
+ def test_dunder_dlpack_refcount(self):
+ x = np.arange(5)
+ y = x.__dlpack__()
+ assert sys.getrefcount(x) == 3
+ del y
+ assert sys.getrefcount(x) == 2
+
+ def test_dunder_dlpack_stream(self):
+ x = np.arange(5)
+ x.__dlpack__(stream=None)
+
+ with pytest.raises(RuntimeError):
+ x.__dlpack__(stream=1)
+
+ def test_strides_not_multiple_of_itemsize(self):
+ dt = np.dtype([('int', np.int32), ('char', np.int8)])
+ y = np.zeros((5,), dtype=dt)
+ z = y['int']
+
+ with pytest.raises(RuntimeError):
+ np._from_dlpack(z)
+
+ @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.")
+ def test_from_dlpack_refcount(self):
+ x = np.arange(5)
+ y = np._from_dlpack(x)
+ assert sys.getrefcount(x) == 3
+ del y
+ assert sys.getrefcount(x) == 2
+
+ @pytest.mark.parametrize("dtype", [
+ np.int8, np.int16, np.int32, np.int64,
+ np.uint8, np.uint16, np.uint32, np.uint64,
+ np.float16, np.float32, np.float64,
+ np.complex64, np.complex128
+ ])
+ def test_dtype_passthrough(self, dtype):
+ x = np.arange(5, dtype=dtype)
+ y = np._from_dlpack(x)
+
+ assert y.dtype == x.dtype
+ assert_array_equal(x, y)
+
+ def test_invalid_dtype(self):
+ x = np.asarray(np.datetime64('2021-05-27'))
+
+ with pytest.raises(TypeError):
+ np._from_dlpack(x)
+
+ def test_invalid_byte_swapping(self):
+ dt = np.dtype('=i8').newbyteorder()
+ x = np.arange(5, dtype=dt)
+
+ with pytest.raises(TypeError):
+ np._from_dlpack(x)
+
+ def test_non_contiguous(self):
+ x = np.arange(25).reshape((5, 5))
+
+ y1 = x[0]
+ assert_array_equal(y1, np._from_dlpack(y1))
+
+ y2 = x[:, 0]
+ assert_array_equal(y2, np._from_dlpack(y2))
+
+ y3 = x[1, :]
+ assert_array_equal(y3, np._from_dlpack(y3))
+
+ y4 = x[1]
+ assert_array_equal(y4, np._from_dlpack(y4))
+
+ y5 = np.diagonal(x).copy()
+ assert_array_equal(y5, np._from_dlpack(y5))
+
+ @pytest.mark.parametrize("ndim", range(33))
+ def test_higher_dims(self, ndim):
+ shape = (1,) * ndim
+ x = np.zeros(shape, dtype=np.float64)
+
+ assert shape == np._from_dlpack(x).shape
+
+ def test_dlpack_device(self):
+ x = np.arange(5)
+ assert x.__dlpack_device__() == (1, 0)
+ assert np._from_dlpack(x).__dlpack_device__() == (1, 0)
+
+ def dlpack_deleter_exception(self):
+ x = np.arange(5)
+ _ = x.__dlpack__()
+ raise RuntimeError
+
+ def test_dlpack_destructor_exception(self):
+ with pytest.raises(RuntimeError):
+ self.dlpack_deleter_exception()
+
+ def test_readonly(self):
+ x = np.arange(5)
+ x.flags.writeable = False
+ with pytest.raises(TypeError):
+ x.__dlpack__()
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index 61dce2494..e49604e4d 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -3,7 +3,6 @@ import operator
import pytest
import ctypes
import gc
-import warnings
import types
from typing import Any
@@ -628,6 +627,12 @@ class TestSubarray:
t2 = np.dtype('2i4', align=True)
assert_equal(t1.alignment, t2.alignment)
+ def test_aligned_empty(self):
+ # Mainly regression test for gh-19696: construction failed completely
+ dt = np.dtype([], align=True)
+ assert dt == np.dtype([])
+ dt = np.dtype({"names": [], "formats": [], "itemsize": 0}, align=True)
+ assert dt == np.dtype([])
def iter_struct_object_dtypes():
"""
@@ -724,26 +729,30 @@ class TestStructuredObjectRefcounting:
def test_structured_object_indexing(self, shape, index, items_changed,
dt, pat, count, singleton):
"""Structured object reference counting for advanced indexing."""
- zero = 0
- one = 1
+ # Use two small negative values (should be singletons, but less likely
+ # to run into race-conditions). This failed in some threaded envs
+ # When using 0 and 1. If it fails again, should remove all explicit
+ # checks, and rely on `pytest-leaks` reference count checker only.
+ val0 = -4
+ val1 = -5
- arr = np.zeros(shape, dt)
+ arr = np.full(shape, val0, dt)
gc.collect()
- before_zero = sys.getrefcount(zero)
- before_one = sys.getrefcount(one)
+ before_val0 = sys.getrefcount(val0)
+ before_val1 = sys.getrefcount(val1)
# Test item getting:
part = arr[index]
- after_zero = sys.getrefcount(zero)
- assert after_zero - before_zero == count * items_changed
+ after_val0 = sys.getrefcount(val0)
+ assert after_val0 - before_val0 == count * items_changed
del part
# Test item setting:
- arr[index] = one
+ arr[index] = val1
gc.collect()
- after_zero = sys.getrefcount(zero)
- after_one = sys.getrefcount(one)
- assert before_zero - after_zero == count * items_changed
- assert after_one - before_one == count * items_changed
+ after_val0 = sys.getrefcount(val0)
+ after_val1 = sys.getrefcount(val1)
+ assert before_val0 - after_val0 == count * items_changed
+ assert after_val1 - before_val1 == count * items_changed
@pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'],
iter_struct_object_dtypes())
diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py
index 78c5e527b..172311624 100644
--- a/numpy/core/tests/test_einsum.py
+++ b/numpy/core/tests/test_einsum.py
@@ -1,5 +1,7 @@
import itertools
+import pytest
+
import numpy as np
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_almost_equal,
@@ -744,6 +746,52 @@ class TestEinsum:
np.einsum('ij,jk->ik', x, x, out=out)
assert_array_equal(out.base, correct_base)
+ @pytest.mark.parametrize("dtype",
+ np.typecodes["AllFloat"] + np.typecodes["AllInteger"])
+ def test_different_paths(self, dtype):
+ # Test originally added to cover broken float16 path: gh-20305
+ # Likely most are covered elsewhere, at least partially.
+ dtype = np.dtype(dtype)
+ # Simple test, designed to excersize most specialized code paths,
+ # note the +0.5 for floats. This makes sure we use a float value
+ # where the results must be exact.
+ arr = (np.arange(7) + 0.5).astype(dtype)
+ scalar = np.array(2, dtype=dtype)
+
+ # contig -> scalar:
+ res = np.einsum('i->', arr)
+ assert res == arr.sum()
+ # contig, contig -> contig:
+ res = np.einsum('i,i->i', arr, arr)
+ assert_array_equal(res, arr * arr)
+ # noncontig, noncontig -> contig:
+ res = np.einsum('i,i->i', arr.repeat(2)[::2], arr.repeat(2)[::2])
+ assert_array_equal(res, arr * arr)
+ # contig + contig -> scalar
+ assert np.einsum('i,i->', arr, arr) == (arr * arr).sum()
+ # contig + scalar -> contig (with out)
+ out = np.ones(7, dtype=dtype)
+ res = np.einsum('i,->i', arr, dtype.type(2), out=out)
+ assert_array_equal(res, arr * dtype.type(2))
+ # scalar + contig -> contig (with out)
+ res = np.einsum(',i->i', scalar, arr)
+ assert_array_equal(res, arr * dtype.type(2))
+ # scalar + contig -> scalar
+ res = np.einsum(',i->', scalar, arr)
+ # Use einsum to compare to not have difference due to sum round-offs:
+ assert res == np.einsum('i->', scalar * arr)
+ # contig + scalar -> scalar
+ res = np.einsum('i,->', arr, scalar)
+ # Use einsum to compare to not have difference due to sum round-offs:
+ assert res == np.einsum('i->', scalar * arr)
+ # contig + contig + contig -> scalar
+ arr = np.array([0.5, 0.5, 0.25, 4.5, 3.], dtype=dtype)
+ res = np.einsum('i,i,i->', arr, arr, arr)
+ assert_array_equal(res, (arr * arr * arr).sum())
+ # four arrays:
+ res = np.einsum('i,i,i,i->', arr, arr, arr, arr)
+ assert_array_equal(res, (arr * arr * arr * arr).sum())
+
def test_small_boolean_arrays(self):
# See gh-5946.
# Use array of True embedded in False.
diff --git a/numpy/core/tests/test_getlimits.py b/numpy/core/tests/test_getlimits.py
index de7b3e769..c5148db2c 100644
--- a/numpy/core/tests/test_getlimits.py
+++ b/numpy/core/tests/test_getlimits.py
@@ -46,7 +46,7 @@ class TestFinfo:
[np.float16, np.float32, np.float64, np.complex64,
np.complex128]))
for dt1, dt2 in dts:
- for attr in ('bits', 'eps', 'epsneg', 'iexp', 'machar', 'machep',
+ for attr in ('bits', 'eps', 'epsneg', 'iexp', 'machep',
'max', 'maxexp', 'min', 'minexp', 'negep', 'nexp',
'nmant', 'precision', 'resolution', 'tiny',
'smallest_normal', 'smallest_subnormal'):
diff --git a/numpy/core/tests/test_machar.py b/numpy/core/tests/test_machar.py
index 673f309f1..3a66ec51f 100644
--- a/numpy/core/tests/test_machar.py
+++ b/numpy/core/tests/test_machar.py
@@ -3,7 +3,7 @@ Test machar. Given recent changes to hardcode type data, we might want to get
rid of both MachAr and this test at some point.
"""
-from numpy.core.machar import MachAr
+from numpy.core._machar import MachAr
import numpy.core.numerictypes as ntypes
from numpy import errstate, array
diff --git a/numpy/core/tests/test_mem_policy.py b/numpy/core/tests/test_mem_policy.py
new file mode 100644
index 000000000..3dae36d5a
--- /dev/null
+++ b/numpy/core/tests/test_mem_policy.py
@@ -0,0 +1,423 @@
+import asyncio
+import gc
+import os
+import pytest
+import numpy as np
+import threading
+import warnings
+from numpy.testing import extbuild, assert_warns
+import sys
+
+
+@pytest.fixture
+def get_module(tmp_path):
+ """ Add a memory policy that returns a false pointer 64 bytes into the
+ actual allocation, and fill the prefix with some text. Then check at each
+ memory manipulation that the prefix exists, to make sure all alloc/realloc/
+ free/calloc go via the functions here.
+ """
+ if sys.platform.startswith('cygwin'):
+ pytest.skip('link fails on cygwin')
+ functions = [
+ ("get_default_policy", "METH_NOARGS", """
+ Py_INCREF(PyDataMem_DefaultHandler);
+ return PyDataMem_DefaultHandler;
+ """),
+ ("set_secret_data_policy", "METH_NOARGS", """
+ PyObject *secret_data =
+ PyCapsule_New(&secret_data_handler, "mem_handler", NULL);
+ if (secret_data == NULL) {
+ return NULL;
+ }
+ PyObject *old = PyDataMem_SetHandler(secret_data);
+ Py_DECREF(secret_data);
+ return old;
+ """),
+ ("set_old_policy", "METH_O", """
+ PyObject *old;
+ if (args != NULL && PyCapsule_CheckExact(args)) {
+ old = PyDataMem_SetHandler(args);
+ }
+ else {
+ old = PyDataMem_SetHandler(NULL);
+ }
+ return old;
+ """),
+ ("get_array", "METH_NOARGS", """
+ char *buf = (char *)malloc(20);
+ npy_intp dims[1];
+ dims[0] = 20;
+ PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8);
+ return PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims, NULL,
+ buf, NPY_ARRAY_WRITEABLE, NULL);
+ """),
+ ("set_own", "METH_O", """
+ if (!PyArray_Check(args)) {
+ PyErr_SetString(PyExc_ValueError,
+ "need an ndarray");
+ return NULL;
+ }
+ PyArray_ENABLEFLAGS((PyArrayObject*)args, NPY_ARRAY_OWNDATA);
+ // Maybe try this too?
+ // PyArray_BASE(PyArrayObject *)args) = NULL;
+ Py_RETURN_NONE;
+ """),
+ ("get_array_with_base", "METH_NOARGS", """
+ char *buf = (char *)malloc(20);
+ npy_intp dims[1];
+ dims[0] = 20;
+ PyArray_Descr *descr = PyArray_DescrNewFromType(NPY_UINT8);
+ PyObject *arr = PyArray_NewFromDescr(&PyArray_Type, descr, 1, dims,
+ NULL, buf,
+ NPY_ARRAY_WRITEABLE, NULL);
+ if (arr == NULL) return NULL;
+ PyObject *obj = PyCapsule_New(buf, "buf capsule",
+ (PyCapsule_Destructor)&warn_on_free);
+ if (obj == NULL) {
+ Py_DECREF(arr);
+ return NULL;
+ }
+ if (PyArray_SetBaseObject((PyArrayObject *)arr, obj) < 0) {
+ Py_DECREF(arr);
+ Py_DECREF(obj);
+ return NULL;
+ }
+ return arr;
+
+ """),
+ ]
+ prologue = '''
+ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+ #include <numpy/arrayobject.h>
+ /*
+ * This struct allows the dynamic configuration of the allocator funcs
+ * of the `secret_data_allocator`. It is provided here for
+ * demonstration purposes, as a valid `ctx` use-case scenario.
+ */
+ typedef struct {
+ void *(*malloc)(size_t);
+ void *(*calloc)(size_t, size_t);
+ void *(*realloc)(void *, size_t);
+ void (*free)(void *);
+ } SecretDataAllocatorFuncs;
+
+ NPY_NO_EXPORT void *
+ shift_alloc(void *ctx, size_t sz) {
+ SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx;
+ char *real = (char *)funcs->malloc(sz + 64);
+ if (real == NULL) {
+ return NULL;
+ }
+ snprintf(real, 64, "originally allocated %ld", (unsigned long)sz);
+ return (void *)(real + 64);
+ }
+ NPY_NO_EXPORT void *
+ shift_zero(void *ctx, size_t sz, size_t cnt) {
+ SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx;
+ char *real = (char *)funcs->calloc(sz + 64, cnt);
+ if (real == NULL) {
+ return NULL;
+ }
+ snprintf(real, 64, "originally allocated %ld via zero",
+ (unsigned long)sz);
+ return (void *)(real + 64);
+ }
+ NPY_NO_EXPORT void
+ shift_free(void *ctx, void * p, npy_uintp sz) {
+ SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx;
+ if (p == NULL) {
+ return ;
+ }
+ char *real = (char *)p - 64;
+ if (strncmp(real, "originally allocated", 20) != 0) {
+ fprintf(stdout, "uh-oh, unmatched shift_free, "
+ "no appropriate prefix\\n");
+ /* Make C runtime crash by calling free on the wrong address */
+ funcs->free((char *)p + 10);
+ /* funcs->free(real); */
+ }
+ else {
+ npy_uintp i = (npy_uintp)atoi(real +20);
+ if (i != sz) {
+ fprintf(stderr, "uh-oh, unmatched shift_free"
+ "(ptr, %ld) but allocated %ld\\n", sz, i);
+ /* This happens in some places, only print */
+ funcs->free(real);
+ }
+ else {
+ funcs->free(real);
+ }
+ }
+ }
+ NPY_NO_EXPORT void *
+ shift_realloc(void *ctx, void * p, npy_uintp sz) {
+ SecretDataAllocatorFuncs *funcs = (SecretDataAllocatorFuncs *)ctx;
+ if (p != NULL) {
+ char *real = (char *)p - 64;
+ if (strncmp(real, "originally allocated", 20) != 0) {
+ fprintf(stdout, "uh-oh, unmatched shift_realloc\\n");
+ return realloc(p, sz);
+ }
+ return (void *)((char *)funcs->realloc(real, sz + 64) + 64);
+ }
+ else {
+ char *real = (char *)funcs->realloc(p, sz + 64);
+ if (real == NULL) {
+ return NULL;
+ }
+ snprintf(real, 64, "originally allocated "
+ "%ld via realloc", (unsigned long)sz);
+ return (void *)(real + 64);
+ }
+ }
+ /* As an example, we use the standard {m|c|re}alloc/free funcs. */
+ static SecretDataAllocatorFuncs secret_data_handler_ctx = {
+ malloc,
+ calloc,
+ realloc,
+ free
+ };
+ static PyDataMem_Handler secret_data_handler = {
+ "secret_data_allocator",
+ 1,
+ {
+ &secret_data_handler_ctx, /* ctx */
+ shift_alloc, /* malloc */
+ shift_zero, /* calloc */
+ shift_realloc, /* realloc */
+ shift_free /* free */
+ }
+ };
+ void warn_on_free(void *capsule) {
+ PyErr_WarnEx(PyExc_UserWarning, "in warn_on_free", 1);
+ void * obj = PyCapsule_GetPointer(capsule,
+ PyCapsule_GetName(capsule));
+ free(obj);
+ };
+ '''
+ more_init = "import_array();"
+ try:
+ import mem_policy
+ return mem_policy
+ except ImportError:
+ pass
+ # if it does not exist, build and load it
+ return extbuild.build_and_import_extension('mem_policy',
+ functions,
+ prologue=prologue,
+ include_dirs=[np.get_include()],
+ build_dir=tmp_path,
+ more_init=more_init)
+
+
+def test_set_policy(get_module):
+
+ get_handler_name = np.core.multiarray.get_handler_name
+ get_handler_version = np.core.multiarray.get_handler_version
+ orig_policy_name = get_handler_name()
+
+ a = np.arange(10).reshape((2, 5)) # a doesn't own its own data
+ assert get_handler_name(a) is None
+ assert get_handler_version(a) is None
+ assert get_handler_name(a.base) == orig_policy_name
+ assert get_handler_version(a.base) == 1
+
+ orig_policy = get_module.set_secret_data_policy()
+
+ b = np.arange(10).reshape((2, 5)) # b doesn't own its own data
+ assert get_handler_name(b) is None
+ assert get_handler_version(b) is None
+ assert get_handler_name(b.base) == 'secret_data_allocator'
+ assert get_handler_version(b.base) == 1
+
+ if orig_policy_name == 'default_allocator':
+ get_module.set_old_policy(None) # tests PyDataMem_SetHandler(NULL)
+ assert get_handler_name() == 'default_allocator'
+ else:
+ get_module.set_old_policy(orig_policy)
+ assert get_handler_name() == orig_policy_name
+
+
+def test_default_policy_singleton(get_module):
+ get_handler_name = np.core.multiarray.get_handler_name
+
+ # set the policy to default
+ orig_policy = get_module.set_old_policy(None)
+
+ assert get_handler_name() == 'default_allocator'
+
+ # re-set the policy to default
+ def_policy_1 = get_module.set_old_policy(None)
+
+ assert get_handler_name() == 'default_allocator'
+
+ # set the policy to original
+ def_policy_2 = get_module.set_old_policy(orig_policy)
+
+ # since default policy is a singleton,
+ # these should be the same object
+ assert def_policy_1 is def_policy_2 is get_module.get_default_policy()
+
+
+def test_policy_propagation(get_module):
+ # The memory policy goes hand-in-hand with flags.owndata
+
+ class MyArr(np.ndarray):
+ pass
+
+ get_handler_name = np.core.multiarray.get_handler_name
+ orig_policy_name = get_handler_name()
+ a = np.arange(10).view(MyArr).reshape((2, 5))
+ assert get_handler_name(a) is None
+ assert a.flags.owndata is False
+
+ assert get_handler_name(a.base) is None
+ assert a.base.flags.owndata is False
+
+ assert get_handler_name(a.base.base) == orig_policy_name
+ assert a.base.base.flags.owndata is True
+
+
+async def concurrent_context1(get_module, orig_policy_name, event):
+ if orig_policy_name == 'default_allocator':
+ get_module.set_secret_data_policy()
+ assert np.core.multiarray.get_handler_name() == 'secret_data_allocator'
+ else:
+ get_module.set_old_policy(None)
+ assert np.core.multiarray.get_handler_name() == 'default_allocator'
+ event.set()
+
+
+async def concurrent_context2(get_module, orig_policy_name, event):
+ await event.wait()
+ # the policy is not affected by changes in parallel contexts
+ assert np.core.multiarray.get_handler_name() == orig_policy_name
+ # change policy in the child context
+ if orig_policy_name == 'default_allocator':
+ get_module.set_secret_data_policy()
+ assert np.core.multiarray.get_handler_name() == 'secret_data_allocator'
+ else:
+ get_module.set_old_policy(None)
+ assert np.core.multiarray.get_handler_name() == 'default_allocator'
+
+
+async def async_test_context_locality(get_module):
+ orig_policy_name = np.core.multiarray.get_handler_name()
+
+ event = asyncio.Event()
+ # the child contexts inherit the parent policy
+ concurrent_task1 = asyncio.create_task(
+ concurrent_context1(get_module, orig_policy_name, event))
+ concurrent_task2 = asyncio.create_task(
+ concurrent_context2(get_module, orig_policy_name, event))
+ await concurrent_task1
+ await concurrent_task2
+
+ # the parent context is not affected by child policy changes
+ assert np.core.multiarray.get_handler_name() == orig_policy_name
+
+
+def test_context_locality(get_module):
+ if (sys.implementation.name == 'pypy'
+ and sys.pypy_version_info[:3] < (7, 3, 6)):
+ pytest.skip('no context-locality support in PyPy < 7.3.6')
+ asyncio.run(async_test_context_locality(get_module))
+
+
+def concurrent_thread1(get_module, event):
+ get_module.set_secret_data_policy()
+ assert np.core.multiarray.get_handler_name() == 'secret_data_allocator'
+ event.set()
+
+
+def concurrent_thread2(get_module, event):
+ event.wait()
+ # the policy is not affected by changes in parallel threads
+ assert np.core.multiarray.get_handler_name() == 'default_allocator'
+ # change policy in the child thread
+ get_module.set_secret_data_policy()
+
+
+def test_thread_locality(get_module):
+ orig_policy_name = np.core.multiarray.get_handler_name()
+
+ event = threading.Event()
+ # the child threads do not inherit the parent policy
+ concurrent_task1 = threading.Thread(target=concurrent_thread1,
+ args=(get_module, event))
+ concurrent_task2 = threading.Thread(target=concurrent_thread2,
+ args=(get_module, event))
+ concurrent_task1.start()
+ concurrent_task2.start()
+ concurrent_task1.join()
+ concurrent_task2.join()
+
+ # the parent thread is not affected by child policy changes
+ assert np.core.multiarray.get_handler_name() == orig_policy_name
+
+
+@pytest.mark.slow
+def test_new_policy(get_module):
+ a = np.arange(10)
+ orig_policy_name = np.core.multiarray.get_handler_name(a)
+
+ orig_policy = get_module.set_secret_data_policy()
+
+ b = np.arange(10)
+ assert np.core.multiarray.get_handler_name(b) == 'secret_data_allocator'
+
+ # test array manipulation. This is slow
+ if orig_policy_name == 'default_allocator':
+ # when the np.core.test tests recurse into this test, the
+ # policy will be set so this "if" will be false, preventing
+ # infinite recursion
+ #
+ # if needed, debug this by
+ # - running tests with -- -s (to not capture stdout/stderr
+ # - setting extra_argv=['-vv'] here
+ assert np.core.test('full', verbose=2, extra_argv=['-vv'])
+ # also try the ma tests, the pickling test is quite tricky
+ assert np.ma.test('full', verbose=2, extra_argv=['-vv'])
+
+ get_module.set_old_policy(orig_policy)
+
+ c = np.arange(10)
+ assert np.core.multiarray.get_handler_name(c) == orig_policy_name
+
+@pytest.mark.xfail(sys.implementation.name == "pypy",
+ reason=("bad interaction between getenv and "
+ "os.environ inside pytest"))
+@pytest.mark.parametrize("policy", ["0", "1", None])
+def test_switch_owner(get_module, policy):
+ a = get_module.get_array()
+ assert np.core.multiarray.get_handler_name(a) is None
+ get_module.set_own(a)
+ oldval = os.environ.get('NUMPY_WARN_IF_NO_MEM_POLICY', None)
+ if policy is None:
+ if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ:
+ os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY')
+ else:
+ os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = policy
+ try:
+ # The policy should be NULL, so we have to assume we can call
+ # "free". A warning is given if the policy == "1"
+ if policy == "1":
+ with assert_warns(RuntimeWarning) as w:
+ del a
+ gc.collect()
+ else:
+ del a
+ gc.collect()
+
+ finally:
+ if oldval is None:
+ if 'NUMPY_WARN_IF_NO_MEM_POLICY' in os.environ:
+ os.environ.pop('NUMPY_WARN_IF_NO_MEM_POLICY')
+ else:
+ os.environ['NUMPY_WARN_IF_NO_MEM_POLICY'] = oldval
+
+def test_owner_is_base(get_module):
+ a = get_module.get_array_with_base()
+ with pytest.warns(UserWarning, match='warn_on_free'):
+ del a
+ gc.collect()
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 0da36bbea..23182470b 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7814,6 +7814,216 @@ class TestNewBufferProtocol:
_multiarray_tests.corrupt_or_fix_bufferinfo(obj)
+class TestArrayCreationCopyArgument(object):
+
+ class RaiseOnBool:
+
+ def __bool__(self):
+ raise ValueError
+
+ true_vals = [True, np._CopyMode.ALWAYS, np.True_]
+ false_vals = [False, np._CopyMode.IF_NEEDED, np.False_]
+
+ def test_scalars(self):
+ # Test both numpy and python scalars
+ for dtype in np.typecodes["All"]:
+ arr = np.zeros((), dtype=dtype)
+ scalar = arr[()]
+ pyscalar = arr.item(0)
+
+ # Test never-copy raises error:
+ assert_raises(ValueError, np.array, scalar,
+ copy=np._CopyMode.NEVER)
+ assert_raises(ValueError, np.array, pyscalar,
+ copy=np._CopyMode.NEVER)
+ assert_raises(ValueError, np.array, pyscalar,
+ copy=self.RaiseOnBool())
+ assert_raises(ValueError, _multiarray_tests.npy_ensurenocopy,
+ [1])
+ # Casting with a dtype (to unsigned integers) can be special:
+ with pytest.raises(ValueError):
+ np.array(pyscalar, dtype=np.int64, copy=np._CopyMode.NEVER)
+
+ def test_compatible_cast(self):
+
+ # Some types are compatible even though they are different, no
+ # copy is necessary for them. This is mostly true for some integers
+ def int_types(byteswap=False):
+ int_types = (np.typecodes["Integer"] +
+ np.typecodes["UnsignedInteger"])
+ for int_type in int_types:
+ yield np.dtype(int_type)
+ if byteswap:
+ yield np.dtype(int_type).newbyteorder()
+
+ for int1 in int_types():
+ for int2 in int_types(True):
+ arr = np.arange(10, dtype=int1)
+
+ for copy in self.true_vals:
+ res = np.array(arr, copy=copy, dtype=int2)
+ assert res is not arr and res.flags.owndata
+ assert_array_equal(res, arr)
+
+ if int1 == int2:
+ # Casting is not necessary, base check is sufficient here
+ for copy in self.false_vals:
+ res = np.array(arr, copy=copy, dtype=int2)
+ assert res is arr or res.base is arr
+
+ res = np.array(arr,
+ copy=np._CopyMode.NEVER,
+ dtype=int2)
+ assert res is arr or res.base is arr
+
+ else:
+ # Casting is necessary, assert copy works:
+ for copy in self.false_vals:
+ res = np.array(arr, copy=copy, dtype=int2)
+ assert res is not arr and res.flags.owndata
+ assert_array_equal(res, arr)
+
+ assert_raises(ValueError, np.array,
+ arr, copy=np._CopyMode.NEVER,
+ dtype=int2)
+ assert_raises(ValueError, np.array,
+ arr, copy=None,
+ dtype=int2)
+
+ def test_buffer_interface(self):
+
+ # Buffer interface gives direct memory access (no copy)
+ arr = np.arange(10)
+ view = memoryview(arr)
+
+ # Checking bases is a bit tricky since numpy creates another
+ # memoryview, so use may_share_memory.
+ for copy in self.true_vals:
+ res = np.array(view, copy=copy)
+ assert not np.may_share_memory(arr, res)
+ for copy in self.false_vals:
+ res = np.array(view, copy=copy)
+ assert np.may_share_memory(arr, res)
+ res = np.array(view, copy=np._CopyMode.NEVER)
+ assert np.may_share_memory(arr, res)
+
+ def test_array_interfaces(self):
+ # Array interface gives direct memory access (much like a memoryview)
+ base_arr = np.arange(10)
+
+ class ArrayLike:
+ __array_interface__ = base_arr.__array_interface__
+
+ arr = ArrayLike()
+
+ for copy, val in [(True, None), (np._CopyMode.ALWAYS, None),
+ (False, arr), (np._CopyMode.IF_NEEDED, arr),
+ (np._CopyMode.NEVER, arr)]:
+ res = np.array(arr, copy=copy)
+ assert res.base is val
+
+ def test___array__(self):
+ base_arr = np.arange(10)
+
+ class ArrayLike:
+ def __array__(self):
+ # __array__ should return a copy, numpy cannot know this
+ # however.
+ return base_arr
+
+ arr = ArrayLike()
+
+ for copy in self.true_vals:
+ res = np.array(arr, copy=copy)
+ assert_array_equal(res, base_arr)
+ # An additional copy is currently forced by numpy in this case,
+ # you could argue, numpy does not trust the ArrayLike. This
+ # may be open for change:
+ assert res is not base_arr
+
+ for copy in self.false_vals:
+ res = np.array(arr, copy=False)
+ assert_array_equal(res, base_arr)
+ assert res is base_arr # numpy trusts the ArrayLike
+
+ with pytest.raises(ValueError):
+ np.array(arr, copy=np._CopyMode.NEVER)
+
+ @pytest.mark.parametrize(
+ "arr", [np.ones(()), np.arange(81).reshape((9, 9))])
+ @pytest.mark.parametrize("order1", ["C", "F", None])
+ @pytest.mark.parametrize("order2", ["C", "F", "A", "K"])
+ def test_order_mismatch(self, arr, order1, order2):
+ # The order is the main (python side) reason that can cause
+ # a never-copy to fail.
+ # Prepare C-order, F-order and non-contiguous arrays:
+ arr = arr.copy(order1)
+ if order1 == "C":
+ assert arr.flags.c_contiguous
+ elif order1 == "F":
+ assert arr.flags.f_contiguous
+ elif arr.ndim != 0:
+ # Make array non-contiguous
+ arr = arr[::2, ::2]
+ assert not arr.flags.forc
+
+ # Whether a copy is necessary depends on the order of arr:
+ if order2 == "C":
+ no_copy_necessary = arr.flags.c_contiguous
+ elif order2 == "F":
+ no_copy_necessary = arr.flags.f_contiguous
+ else:
+ # Keeporder and Anyorder are OK with non-contiguous output.
+ # This is not consistent with the `astype` behaviour which
+ # enforces contiguity for "A". It is probably historic from when
+ # "K" did not exist.
+ no_copy_necessary = True
+
+ # Test it for both the array and a memoryview
+ for view in [arr, memoryview(arr)]:
+ for copy in self.true_vals:
+ res = np.array(view, copy=copy, order=order2)
+ assert res is not arr and res.flags.owndata
+ assert_array_equal(arr, res)
+
+ if no_copy_necessary:
+ for copy in self.false_vals:
+ res = np.array(view, copy=copy, order=order2)
+ # res.base.obj refers to the memoryview
+ if not IS_PYPY:
+ assert res is arr or res.base.obj is arr
+
+ res = np.array(view, copy=np._CopyMode.NEVER,
+ order=order2)
+ if not IS_PYPY:
+ assert res is arr or res.base.obj is arr
+ else:
+ for copy in self.false_vals:
+ res = np.array(arr, copy=copy, order=order2)
+ assert_array_equal(arr, res)
+ assert_raises(ValueError, np.array,
+ view, copy=np._CopyMode.NEVER,
+ order=order2)
+ assert_raises(ValueError, np.array,
+ view, copy=None,
+ order=order2)
+
+ def test_striding_not_ok(self):
+ arr = np.array([[1, 2, 4], [3, 4, 5]])
+ assert_raises(ValueError, np.array,
+ arr.T, copy=np._CopyMode.NEVER,
+ order='C')
+ assert_raises(ValueError, np.array,
+ arr.T, copy=np._CopyMode.NEVER,
+ order='C', dtype=np.int64)
+ assert_raises(ValueError, np.array,
+ arr, copy=np._CopyMode.NEVER,
+ order='F')
+ assert_raises(ValueError, np.array,
+ arr, copy=np._CopyMode.NEVER,
+ order='F', dtype=np.int64)
+
+
class TestArrayAttributeDeletion:
def test_multiarray_writable_attributes_deletion(self):
@@ -7977,18 +8187,6 @@ def test_scalar_element_deletion():
assert_raises(ValueError, a[0].__delitem__, 'x')
-class TestMemEventHook:
- def test_mem_seteventhook(self):
- # The actual tests are within the C code in
- # multiarray/_multiarray_tests.c.src
- _multiarray_tests.test_pydatamem_seteventhook_start()
- # force an allocation and free of a numpy array
- # needs to be larger then limit of small memory cacher in ctors.c
- a = np.zeros(1000)
- del a
- break_cycles()
- _multiarray_tests.test_pydatamem_seteventhook_end()
-
class TestMapIter:
def test_mapiter(self):
# The actual tests are within the C code in
diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py
index fbf6da0e1..ed775cac6 100644
--- a/numpy/core/tests/test_nditer.py
+++ b/numpy/core/tests/test_nditer.py
@@ -9,7 +9,7 @@ import numpy.core._multiarray_tests as _multiarray_tests
from numpy import array, arange, nditer, all
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_raises,
- HAS_REFCOUNT, suppress_warnings
+ HAS_REFCOUNT, suppress_warnings, break_cycles
)
@@ -3128,6 +3128,8 @@ def test_warn_noclose():
assert len(sup.log) == 1
+@pytest.mark.skipif(sys.version_info[:2] == (3, 9) and sys.platform == "win32",
+ reason="Errors with Python 3.9 on Windows")
@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
@pytest.mark.parametrize(["in_dtype", "buf_dtype"],
[("i", "O"), ("O", "i"), # most simple cases
@@ -3148,6 +3150,8 @@ def test_partial_iteration_cleanup(in_dtype, buf_dtype, steps):
# Note that resetting does not free references
del it
+ break_cycles()
+ break_cycles()
assert count == sys.getrefcount(value)
# Repeat the test with `iternext`
@@ -3157,6 +3161,8 @@ def test_partial_iteration_cleanup(in_dtype, buf_dtype, steps):
it.iternext()
del it # should ensure cleanup
+ break_cycles()
+ break_cycles()
assert count == sys.getrefcount(value)
diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py
index e36f76c53..ad9437911 100644
--- a/numpy/core/tests/test_numeric.py
+++ b/numpy/core/tests/test_numeric.py
@@ -16,7 +16,7 @@ from numpy.testing import (
)
from numpy.core._rational_tests import rational
-from hypothesis import assume, given, strategies as st
+from hypothesis import given, strategies as st
from hypothesis.extra import numpy as hynp
@@ -646,7 +646,7 @@ class TestFloatExceptions:
if np.dtype(ftype).kind == 'f':
# Get some extreme values for the type
fi = np.finfo(ftype)
- ft_tiny = fi.machar.tiny
+ ft_tiny = fi._machar.tiny
ft_max = fi.max
ft_eps = fi.eps
underflow = 'underflow'
@@ -655,7 +655,7 @@ class TestFloatExceptions:
# 'c', complex, corresponding real dtype
rtype = type(ftype(0).real)
fi = np.finfo(rtype)
- ft_tiny = ftype(fi.machar.tiny)
+ ft_tiny = ftype(fi._machar.tiny)
ft_max = ftype(fi.max)
ft_eps = ftype(fi.eps)
# The complex types raise different exceptions
@@ -932,25 +932,6 @@ class TestTypes:
# Promote with object:
assert_equal(promote_types('O', S+'30'), np.dtype('O'))
- @pytest.mark.parametrize(["dtype1", "dtype2"],
- [[np.dtype("V6"), np.dtype("V10")],
- [np.dtype([("name1", "i8")]), np.dtype([("name2", "i8")])],
- [np.dtype("i8,i8"), np.dtype("i4,i4")],
- ])
- def test_invalid_void_promotion(self, dtype1, dtype2):
- # Mainly test structured void promotion, which currently allows
- # byte-swapping, but nothing else:
- with pytest.raises(TypeError):
- np.promote_types(dtype1, dtype2)
-
- @pytest.mark.parametrize(["dtype1", "dtype2"],
- [[np.dtype("V10"), np.dtype("V10")],
- [np.dtype([("name1", "<i8")]), np.dtype([("name1", ">i8")])],
- [np.dtype("i8,i8"), np.dtype("i8,>i8")],
- ])
- def test_valid_void_promotion(self, dtype1, dtype2):
- assert np.promote_types(dtype1, dtype2) is dtype1
-
@pytest.mark.parametrize("dtype",
list(np.typecodes["All"]) +
["i,i", "S3", "S100", "U3", "U100", rational])
@@ -1503,6 +1484,18 @@ class TestNonzero:
a = np.array([[False], [TrueThenFalse()]])
assert_raises(RuntimeError, np.nonzero, a)
+ def test_nonzero_sideffects_structured_void(self):
+ # Checks that structured void does not mutate alignment flag of
+ # original array.
+ arr = np.zeros(5, dtype="i1,i8,i8") # `ones` may short-circuit
+ assert arr.flags.aligned # structs are considered "aligned"
+ assert not arr["f2"].flags.aligned
+ # make sure that nonzero/count_nonzero do not flip the flag:
+ np.nonzero(arr)
+ assert arr.flags.aligned
+ np.count_nonzero(arr)
+ assert arr.flags.aligned
+
def test_nonzero_exception_safe(self):
# gh-13930
diff --git a/numpy/core/tests/test_scalar_methods.py b/numpy/core/tests/test_scalar_methods.py
index 6077c8f75..eef4c1433 100644
--- a/numpy/core/tests/test_scalar_methods.py
+++ b/numpy/core/tests/test_scalar_methods.py
@@ -183,3 +183,21 @@ def test_class_getitem_38(cls: Type[np.number]) -> None:
match = "Type subscription requires python >= 3.9"
with pytest.raises(TypeError, match=match):
cls[Any]
+
+
+class TestBitCount:
+ # derived in part from the cpython test "test_bit_count"
+
+ @pytest.mark.parametrize("itype", np.sctypes['int']+np.sctypes['uint'])
+ def test_small(self, itype):
+ for a in range(max(np.iinfo(itype).min, 0), 128):
+ msg = f"Smoke test for {itype}({a}).bit_count()"
+ assert itype(a).bit_count() == bin(a).count("1"), msg
+
+ def test_bit_count(self):
+ for exp in [10, 17, 63]:
+ a = 2**exp
+ assert np.uint64(a).bit_count() == 1
+ assert np.uint64(a - 1).bit_count() == exp
+ assert np.uint64(a ^ 63).bit_count() == 7
+ assert np.uint64((a - 1) ^ 510).bit_count() == exp - 8
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index becd65b11..90078a2ea 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -5,14 +5,14 @@ import itertools
import operator
import platform
import pytest
-from hypothesis import given, settings, Verbosity, assume
+from hypothesis import given, settings, Verbosity
from hypothesis.strategies import sampled_from
import numpy as np
from numpy.testing import (
assert_, assert_equal, assert_raises, assert_almost_equal,
assert_array_equal, IS_PYPY, suppress_warnings, _gen_alignment_data,
- assert_warns, assert_raises_regex,
+ assert_warns,
)
types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc,
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index f0c60953b..12a67c44d 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -329,7 +329,38 @@ class _SIMD_FP(_Test_Utility):
data_square = [x*x for x in data]
square = self.square(vdata)
assert square == data_square
-
+
+ @pytest.mark.parametrize("intrin, func", [("self.ceil", math.ceil),
+ ("self.trunc", math.trunc)])
+ def test_rounding(self, intrin, func):
+ """
+ Test intrinsics:
+ npyv_ceil_##SFX
+ npyv_trunc_##SFX
+ """
+ intrin_name = intrin
+ intrin = eval(intrin)
+ pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
+ # special cases
+ round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf))
+ for case, desired in round_cases:
+ data_round = [desired]*self.nlanes
+ _round = intrin(self.setall(case))
+ assert _round == pytest.approx(data_round, nan_ok=True)
+ for x in range(0, 2**20, 256**2):
+ for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15):
+ data = [x*w+a for a in range(self.nlanes)]
+ vdata = self.load(data)
+ data_round = [func(x) for x in data]
+ _round = intrin(vdata)
+ assert _round == data_round
+ # signed zero
+ if "ceil" in intrin_name or "trunc" in intrin_name:
+ for w in (-0.25, -0.30, -0.45):
+ _round = self._to_unsigned(intrin(self.setall(w)))
+ data_round = self._to_unsigned(self.setall(-0.0))
+ assert _round == data_round
+
def test_max(self):
"""
Test intrinsics:
@@ -818,6 +849,7 @@ class _SIMD_ALL(_Test_Utility):
if self._is_fp():
return
+ int_min = self._int_min()
def trunc_div(a, d):
"""
Divide towards zero works with large integers > 2^53,
@@ -830,57 +862,31 @@ class _SIMD_ALL(_Test_Utility):
return a // d
return (a + sign_d - sign_a) // d + 1
- int_min = self._int_min() if self._is_signed() else 1
- int_max = self._int_max()
- rdata = (
- 0, 1, self.nlanes, int_max-self.nlanes,
- int_min, int_min//2 + 1
- )
- divisors = (1, 2, 9, 13, self.nlanes, int_min, int_max, int_max//2)
-
- for x, d in itertools.product(rdata, divisors):
- data = self._data(x)
- vdata = self.load(data)
- data_divc = [trunc_div(a, d) for a in data]
- divisor = self.divisor(d)
- divc = self.divc(vdata, divisor)
- assert divc == data_divc
-
- if not self._is_signed():
- return
-
- safe_neg = lambda x: -x-1 if -x > int_max else -x
- # test round division for signed integers
- for x, d in itertools.product(rdata, divisors):
- d_neg = safe_neg(d)
- data = self._data(x)
- data_neg = [safe_neg(a) for a in data]
- vdata = self.load(data)
- vdata_neg = self.load(data_neg)
- divisor = self.divisor(d)
- divisor_neg = self.divisor(d_neg)
-
- # round towards zero
- data_divc = [trunc_div(a, d_neg) for a in data]
- divc = self.divc(vdata, divisor_neg)
- assert divc == data_divc
- data_divc = [trunc_div(a, d) for a in data_neg]
- divc = self.divc(vdata_neg, divisor)
+ data = [1, -int_min] # to test overflow
+ data += range(0, 2**8, 2**5)
+ data += range(0, 2**8, 2**5-1)
+ bsize = self._scalar_size()
+ if bsize > 8:
+ data += range(2**8, 2**16, 2**13)
+ data += range(2**8, 2**16, 2**13-1)
+ if bsize > 16:
+ data += range(2**16, 2**32, 2**29)
+ data += range(2**16, 2**32, 2**29-1)
+ if bsize > 32:
+ data += range(2**32, 2**64, 2**61)
+ data += range(2**32, 2**64, 2**61-1)
+ # negate
+ data += [-x for x in data]
+ for dividend, divisor in itertools.product(data, data):
+ divisor = self.setall(divisor)[0] # cast
+ if divisor == 0:
+ continue
+ dividend = self.load(self._data(dividend))
+ data_divc = [trunc_div(a, divisor) for a in dividend]
+ divisor_parms = self.divisor(divisor)
+ divc = self.divc(dividend, divisor_parms)
assert divc == data_divc
- # test truncate sign if the dividend is zero
- vzero = self.zero()
- for d in (-1, -10, -100, int_min//2, int_min):
- divisor = self.divisor(d)
- divc = self.divc(vzero, divisor)
- assert divc == vzero
-
- # test overflow
- vmin = self.setall(int_min)
- divisor = self.divisor(-1)
- divc = self.divc(vmin, divisor)
- assert divc == vmin
-
def test_arithmetic_reduce_sum(self):
"""
Test reduce sum intrinsics:
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 30929ce91..ef0bac957 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1362,6 +1362,14 @@ class TestUfunc:
np.array([[2]*i for i in [1, 3, 6, 10]], dtype=object),
)
+ def test_object_array_accumulate_failure(self):
+ # Typical accumulation on object works as expected:
+ res = np.add.accumulate(np.array([1, 0, 2], dtype=object))
+ assert_array_equal(res, np.array([1, 1, 3], dtype=object))
+ # But errors are propagated from the inner-loop if they occur:
+ with pytest.raises(TypeError):
+ np.add.accumulate([1, None, 2])
+
def test_object_array_reduceat_inplace(self):
# Checks that in-place reduceats work, see also gh-7465
arr = np.empty(4, dtype=object)
@@ -1381,6 +1389,15 @@ class TestUfunc:
np.add.reduceat(arr, np.arange(4), out=arr, axis=-1)
assert_array_equal(arr, out)
+ def test_object_array_reduceat_failure(self):
+ # Reduceat works as expected when no invalid operation occurs (None is
+ # not involved in an operation here)
+ res = np.add.reduceat(np.array([1, None, 2], dtype=object), [1, 2])
+ assert_array_equal(res, np.array([None, 2], dtype=object))
+ # But errors when None would be involved in an operation:
+ with pytest.raises(TypeError):
+ np.add.reduceat([1, None, 2], [0, 2])
+
def test_zerosize_reduction(self):
# Test with default dtype and object dtype
for a in [[], np.array([], dtype=object)]:
@@ -2098,6 +2115,25 @@ class TestUfunc:
with pytest.raises(TypeError):
ufunc(a, a, signature=signature)
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_support_anything(self, ufunc):
+ # The logical ufuncs support even input that can't be promoted:
+ a = np.array('1')
+ c = np.array([1., 2.])
+ assert_array_equal(ufunc(a, c), ufunc([True, True], True))
+ assert ufunc.reduce(a) == True
+
+ @pytest.mark.parametrize("ufunc",
+ [np.logical_and, np.logical_or, np.logical_xor])
+ def test_logical_ufuncs_out_cast_check(self, ufunc):
+ a = np.array('1')
+ c = np.array([1., 2.])
+ out = a.copy()
+ with pytest.raises(TypeError):
+ # It would be safe, but not equiv casting:
+ ufunc(a, c, out=out, casting="equiv")
+
def test_reduce_noncontig_output(self):
# Check that reduction deals with non-contiguous output arrays
# appropriately.
@@ -2119,6 +2155,22 @@ class TestUfunc:
assert_equal(y_base[1,:], y_base_copy[1,:])
assert_equal(y_base[3,:], y_base_copy[3,:])
+ @pytest.mark.parametrize("with_cast", [True, False])
+ def test_reduceat_and_accumulate_out_shape_mismatch(self, with_cast):
+ # Should raise an error mentioning "shape" or "size"
+ arr = np.arange(5)
+ out = np.arange(3) # definitely wrong shape
+ if with_cast:
+ # If a cast is necessary on the output, we can be sure to use
+ # the generic NpyIter (non-fast) path.
+ out = out.astype(np.float64)
+
+ with pytest.raises(ValueError, match="(shape|size)"):
+ np.add.reduceat(arr, [0, 3], out=out)
+
+ with pytest.raises(ValueError, match="(shape|size)"):
+ np.add.accumulate(arr, out=out)
+
@pytest.mark.parametrize('out_shape',
[(), (1,), (3,), (1, 1), (1, 3), (4, 3)])
@pytest.mark.parametrize('keepdims', [True, False])
@@ -2331,8 +2383,9 @@ def test_reduce_casterrors(offset):
out = np.array(-1, dtype=np.intp)
count = sys.getrefcount(value)
- with pytest.raises(ValueError):
- # This is an unsafe cast, but we currently always allow that:
+ with pytest.raises(ValueError, match="invalid literal"):
+ # This is an unsafe cast, but we currently always allow that.
+ # Note that the double loop is picked, but the cast fails.
np.add.reduce(arr, dtype=np.intp, out=out)
assert count == sys.getrefcount(value)
# If an error occurred during casting, the operation is done at most until
@@ -2340,3 +2393,20 @@ def test_reduce_casterrors(offset):
# if the error happened immediately.
# This does not define behaviour, the output is invalid and thus undefined
assert out[()] < value * offset
+
+
+@pytest.mark.parametrize("method",
+ [np.add.accumulate, np.add.reduce,
+ pytest.param(lambda x: np.add.reduceat(x, [0]), id="reduceat"),
+ pytest.param(lambda x: np.log.at(x, [2]), id="at")])
+def test_ufunc_methods_floaterrors(method):
+ # adding inf and -inf (or log(-inf) creates an invalid float and warns
+ arr = np.array([np.inf, 0, -np.inf])
+ with np.errstate(all="warn"):
+ with pytest.warns(RuntimeWarning, match="invalid value"):
+ method(arr)
+
+ arr = np.array([np.inf, 0, -np.inf])
+ with np.errstate(all="raise"):
+ with pytest.raises(FloatingPointError):
+ method(arr)
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 8ff81ea51..fc7c592f0 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -15,7 +15,7 @@ from numpy.testing import (
assert_, assert_equal, assert_raises, assert_raises_regex,
assert_array_equal, assert_almost_equal, assert_array_almost_equal,
assert_array_max_ulp, assert_allclose, assert_no_warnings, suppress_warnings,
- _gen_alignment_data, assert_array_almost_equal_nulp, assert_warns
+ _gen_alignment_data, assert_array_almost_equal_nulp
)
def get_glibc_version():
@@ -28,9 +28,7 @@ def get_glibc_version():
glibcver = get_glibc_version()
-glibc_newerthan_2_17 = pytest.mark.xfail(
- glibcver != '0.0' and glibcver < '2.17',
- reason="Older glibc versions may not raise appropriate FP exceptions")
+glibc_older_than = lambda x: (glibcver != '0.0' and glibcver < x)
def on_powerpc():
""" True if we are running on a Power PC platform."""
@@ -50,14 +48,6 @@ def bad_arcsinh():
# The eps for float128 is 1-e33, so this is way bigger
return abs((v1 / v2) - 1.0) > 1e-23
-if platform.machine() == 'aarch64' and bad_arcsinh():
- skip_longcomplex_msg = ('Trig functions of np.longcomplex values known to be '
- 'inaccurate on aarch64 for some compilation '
- 'configurations, should be fixed by building on a '
- 'platform using glibc>2.17')
-else:
- skip_longcomplex_msg = ''
-
class _FilterInvalids:
def setup(self):
@@ -1022,9 +1012,11 @@ class TestSpecialFloats:
yf = np.array(y, dtype=dt)
assert_equal(np.exp(yf), xf)
- # Older version of glibc may not raise the correct FP exceptions
# See: https://github.com/numpy/numpy/issues/19192
- @glibc_newerthan_2_17
+ @pytest.mark.xfail(
+ glibc_older_than("2.17"),
+ reason="Older glibc versions may not raise appropriate FP exceptions"
+ )
def test_exp_exceptions(self):
with np.errstate(over='raise'):
assert_raises(FloatingPointError, np.exp, np.float32(100.))
@@ -1405,8 +1397,10 @@ class TestAVXFloat32Transcendental:
M = np.int_(N/20)
index = np.random.randint(low=0, high=N, size=M)
x_f32 = np.float32(np.random.uniform(low=-100.,high=100.,size=N))
- # test coverage for elements > 117435.992f for which glibc is used
- x_f32[index] = np.float32(10E+10*np.random.rand(M))
+ if not glibc_older_than("2.17"):
+ # test coverage for elements > 117435.992f for which glibc is used
+ # this is known to be problematic on old glibc, so skip it there
+ x_f32[index] = np.float32(10E+10*np.random.rand(M))
x_f64 = np.float64(x_f32)
assert_array_max_ulp(np.sin(x_f32), np.float32(np.sin(x_f64)), maxulp=2)
assert_array_max_ulp(np.cos(x_f32), np.float32(np.cos(x_f64)), maxulp=2)
@@ -3439,13 +3433,14 @@ class TestComplexFunctions:
x_series = np.logspace(-20, -3.001, 200)
x_basic = np.logspace(-2.999, 0, 10, endpoint=False)
- if dtype is np.longcomplex:
+ if glibc_older_than("2.19") and dtype is np.longcomplex:
+ if (platform.machine() == 'aarch64' and bad_arcsinh()):
+ pytest.skip("Trig functions of np.longcomplex values known "
+ "to be inaccurate on aarch64 for some compilation "
+ "configurations.")
# It's not guaranteed that the system-provided arc functions
# are accurate down to a few epsilons. (Eg. on Linux 64-bit)
# So, give more leeway for long complex tests here:
- # Can use 2.1 for > Ubuntu LTS Trusty (2014), glibc = 2.19.
- if skip_longcomplex_msg:
- pytest.skip(skip_longcomplex_msg)
check(x_series, 50.0*eps)
else:
check(x_series, 2.1*eps)
@@ -3886,3 +3881,11 @@ def test_bad_legacy_ufunc_silent_errors():
with pytest.raises(RuntimeError, match=r"How unexpected :\)!"):
ncu_tests.always_error.at(arr, [0, 1, 2], arr)
+
+
+@pytest.mark.parametrize('x1', [np.arange(3.0), [0.0, 1.0, 2.0]])
+def test_bad_legacy_gufunc_silent_errors(x1):
+ # Verify that an exception raised in a gufunc loop propagates correctly.
+ # The signature of always_error_gufunc is '(i),()->()'.
+ with pytest.raises(RuntimeError, match=r"How unexpected :\)!"):
+ ncu_tests.always_error_gufunc(x1, 0.0)
diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py
index a703c697a..32e2dca66 100644
--- a/numpy/core/tests/test_umath_accuracy.py
+++ b/numpy/core/tests/test_umath_accuracy.py
@@ -1,5 +1,4 @@
import numpy as np
-import platform
import os
from os import path
import sys