diff options
53 files changed, 1244 insertions, 325 deletions
diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 239a18602..86fb094c6 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -277,3 +277,35 @@ jobs: docker run --rm --interactive -v $(pwd):/numpy the_build /bin/bash -c " cd /numpy && python3 runtests.py -n -v -- -k test_simd " + + sde_simd_avx512_test: + # Intel Software Development Emulator (SDE) is used to run a given program + # on a specific instruction set architecture and capture various performance details. + # see https://www.intel.com/content/www/us/en/developer/articles/tool/software-development-emulator.html + needs: [smoke_test] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: ${{ env.PYTHON_VERSION }} + - name: Install Intel SDE + run: | + curl -o /tmp/sde.tar.bz2 https://www.intel.com/content/dam/develop/external/us/en/documents/downloads/sde-external-8.69.1-2021-07-18-lin.tar.bz2 + mkdir /tmp/sde && tar -xvf /tmp/sde.tar.bz2 -C /tmp/sde/ + sudo mv /tmp/sde/* /opt/sde && sudo ln -s /opt/sde/sde64 /usr/bin/sde + - name: Install dependencies + run: python -m pip install -r test_requirements.txt + - name: Build + run: python setup.py build + --simd-test="\$werror AVX512F AVX512_KNL AVX512_KNM AVX512_SKX AVX512_CLX AVX512_CNL AVX512_ICL" + install + # KNM implies KNL + - name: Run SIMD tests (Xeon PHI) + run: sde -knm -- python runtests.py -n -v -- -k test_simd + # ICL implies SKX, CLX and CNL + - name: Run SIMD tests (Ice Lake) + run: sde -icl -- python runtests.py -n -v -- -k test_simd diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f919debaa..3c382f8b3 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -40,7 +40,7 @@ jobs: name: Build wheel for cp${{ matrix.python }}-${{ matrix.platform }} needs: get_commit_message if: >- - contains(needs.get_commit_message.outputs.message, '[cd build]') || + contains(needs.get_commit_message.outputs.message, '[wheel build]') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' runs-on: ${{ matrix.os }} diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7c21087e1..9d2973b59 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -109,7 +109,7 @@ stages: # the docs even though i.e., numba uses another in their # azure config for mac os -- Microsoft has indicated # they will patch this issue - vmImage: 'macOS-10.14' + vmImage: 'macOS-1015' strategy: maxParallel: 3 matrix: diff --git a/doc/neps/nep-0047-array-api-standard.rst b/doc/neps/nep-0047-array-api-standard.rst index 3e63602cc..53b8e35b0 100644 --- a/doc/neps/nep-0047-array-api-standard.rst +++ b/doc/neps/nep-0047-array-api-standard.rst @@ -338,9 +338,10 @@ the options already present in NumPy are: Adding support for DLPack to NumPy entails: -- Adding a ``ndarray.__dlpack__`` method. -- Adding a ``from_dlpack`` function, which takes as input an object - supporting ``__dlpack__``, and returns an ``ndarray``. +- Adding a ``ndarray.__dlpack__()`` method which returns a ``dlpack`` C + structure wrapped in a ``PyCapsule``. +- Adding a ``np._from_dlpack(obj)`` function, where ``obj`` supports + ``__dlpack__()``, and returns an ``ndarray``. DLPack is currently a ~200 LoC header, and is meant to be included directly, so no external dependency is needed. Implementation should be straightforward. diff --git a/doc/neps/nep-0049.rst b/doc/neps/nep-0049.rst index 4758edb35..3bd1d102c 100644 --- a/doc/neps/nep-0049.rst +++ b/doc/neps/nep-0049.rst @@ -109,6 +109,15 @@ The name of the handler will be exposed on the python level via a ``numpy.core.multiarray.get_handler_name()`` it will return the name of the handler that will be used to allocate data for the next new `ndarrray`. +The version of the handler will be exposed on the python level via a +``numpy.core.multiarray.get_handler_version(arr)`` function. If called as +``numpy.core.multiarray.get_handler_version()`` it will return the version of the +handler that will be used to allocate data for the next new `ndarrray`. + +The version, currently 1, allows for future enhancements to the +``PyDataMemAllocator``. If fields are added, they must be added to the end. + + NumPy C-API functions ===================== @@ -119,7 +128,8 @@ NumPy C-API functions .. code-block:: c typedef struct { - char name[128]; /* multiple of 64 to keep the struct aligned */ + char name[127]; /* multiple of 64 to keep the struct aligned */ + uint8_t version; /* currently 1 */ PyDataMemAllocator allocator; } PyDataMem_Handler; @@ -279,6 +289,7 @@ the ``sz`` argument is correct. static PyDataMem_Handler new_handler = { "secret_data_allocator", + 1, { &new_handler_ctx, shift_alloc, /* malloc */ diff --git a/doc/release/upcoming_changes/19083.new_feature.rst b/doc/release/upcoming_changes/19083.new_feature.rst new file mode 100644 index 000000000..92f00c0d6 --- /dev/null +++ b/doc/release/upcoming_changes/19083.new_feature.rst @@ -0,0 +1,6 @@ +Add NEP 47-compatible dlpack support +------------------------------------ + +Add a ``ndarray.__dlpack__()`` method which returns a ``dlpack`` C structure +wrapped in a ``PyCapsule``. Also add a ``np._from_dlpack(obj)`` function, where +``obj`` supports ``__dlpack__()``, and returns an ``ndarray``. diff --git a/doc/source/reference/c-api/data_memory.rst b/doc/source/reference/c-api/data_memory.rst index 11a37adc4..b779026b4 100644 --- a/doc/source/reference/c-api/data_memory.rst +++ b/doc/source/reference/c-api/data_memory.rst @@ -62,7 +62,8 @@ reallocate or free the data memory of the instance. .. code-block:: c typedef struct { - char name[128]; /* multiple of 64 to keep the struct aligned */ + char name[127]; /* multiple of 64 to keep the struct aligned */ + uint8_t version; /* currently 1 */ PyDataMemAllocator allocator; } PyDataMem_Handler; diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 637f8578a..b2e9eec77 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -1413,6 +1413,7 @@ _SupportsBuffer = Union[ _T = TypeVar("_T") _T_co = TypeVar("_T_co", covariant=True) +_T_contra = TypeVar("_T_contra", contravariant=True) _2Tuple = Tuple[_T, _T] _CastingKind = L["no", "equiv", "safe", "same_kind", "unsafe"] @@ -1432,6 +1433,10 @@ _ArrayTD64_co = NDArray[Union[bool_, integer[Any], timedelta64]] # Introduce an alias for `dtype` to avoid naming conflicts. _dtype = dtype +# `builtins.PyCapsule` unfortunately lacks annotations as of the moment; +# use `Any` as a stopgap measure +_PyCapsule = Any + class _SupportsItem(Protocol[_T_co]): def item(self, args: Any, /) -> _T_co: ... @@ -2439,6 +2444,12 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]): def __ior__(self: NDArray[signedinteger[_NBit1]], other: _ArrayLikeInt_co) -> NDArray[signedinteger[_NBit1]]: ... @overload def __ior__(self: NDArray[object_], other: Any) -> NDArray[object_]: ... + @overload + def __ior__(self: NDArray[_ScalarType], other: _RecursiveSequence) -> NDArray[_ScalarType]: ... + @overload + def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ... + @overload + def __dlpack_device__(self) -> Tuple[int, L[0]]: ... # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype` @property @@ -4320,3 +4331,9 @@ class chararray(ndarray[_ShapeType, _CharDType]): # NOTE: Deprecated # class MachAr: ... + +class _SupportsDLPack(Protocol[_T_contra]): + def __dlpack__(self, *, stream: None | _T_contra = ...) -> _PyCapsule: ... + +def _from_dlpack(__obj: _SupportsDLPack[None]) -> NDArray[Any]: ... + diff --git a/numpy/array_api/__init__.py b/numpy/array_api/__init__.py index d8b29057e..36e3f3ed5 100644 --- a/numpy/array_api/__init__.py +++ b/numpy/array_api/__init__.py @@ -169,6 +169,7 @@ __all__ += [ ] from ._data_type_functions import ( + astype, broadcast_arrays, broadcast_to, can_cast, @@ -178,6 +179,7 @@ from ._data_type_functions import ( ) __all__ += [ + "astype", "broadcast_arrays", "broadcast_to", "can_cast", @@ -358,9 +360,9 @@ from ._searching_functions import argmax, argmin, nonzero, where __all__ += ["argmax", "argmin", "nonzero", "where"] -from ._set_functions import unique +from ._set_functions import unique_all, unique_counts, unique_inverse, unique_values -__all__ += ["unique"] +__all__ += ["unique_all", "unique_counts", "unique_inverse", "unique_values"] from ._sorting_functions import argsort, sort diff --git a/numpy/array_api/_array_object.py b/numpy/array_api/_array_object.py index ef66c5efd..dc74bb8c5 100644 --- a/numpy/array_api/_array_object.py +++ b/numpy/array_api/_array_object.py @@ -32,7 +32,7 @@ from ._dtypes import ( from typing import TYPE_CHECKING, Optional, Tuple, Union, Any if TYPE_CHECKING: - from ._typing import PyCapsule, Device, Dtype + from ._typing import Any, PyCapsule, Device, Dtype import numpy as np @@ -99,9 +99,13 @@ class Array: """ Performs the operation __repr__. """ - prefix = "Array(" suffix = f", dtype={self.dtype.name})" - mid = np.array2string(self._array, separator=', ', prefix=prefix, suffix=suffix) + if 0 in self.shape: + prefix = "empty(" + mid = str(self.shape) + else: + prefix = "Array(" + mid = np.array2string(self._array, separator=', ', prefix=prefix, suffix=suffix) return prefix + mid + suffix # These are various helper functions to make the array behavior match the @@ -244,6 +248,10 @@ class Array: The following cases are allowed by NumPy, but not specified by the array API specification: + - Indices to not include an implicit ellipsis at the end. That is, + every axis of an array must be explicitly indexed or an ellipsis + included. + - The start and stop of a slice may not be out of bounds. In particular, for a slice ``i:j:k`` on an axis of size ``n``, only the following are allowed: @@ -270,6 +278,10 @@ class Array: return key if shape == (): return key + if len(shape) > 1: + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) size = shape[0] # Ensure invalid slice entries are passed through. if key.start is not None: @@ -277,7 +289,7 @@ class Array: operator.index(key.start) except TypeError: return key - if not (-size <= key.start <= max(0, size - 1)): + if not (-size <= key.start <= size): raise IndexError( "Slices with out-of-bounds start are not allowed in the array API namespace" ) @@ -322,6 +334,10 @@ class Array: zip(key[:ellipsis_i:-1], shape[:ellipsis_i:-1]) ): Array._validate_index(idx, (size,)) + if n_ellipsis == 0 and len(key) < len(shape): + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) return key elif isinstance(key, bool): return key @@ -339,7 +355,12 @@ class Array: "newaxis indices are not allowed in the array API namespace" ) try: - return operator.index(key) + key = operator.index(key) + if shape is not None and len(shape) > 1: + raise IndexError( + "Multidimensional arrays must include an index for every axis or use an ellipsis" + ) + return key except TypeError: # Note: This also omits boolean arrays that are not already in # Array() form, like a list of booleans. @@ -403,16 +424,14 @@ class Array: """ Performs the operation __dlpack__. """ - res = self._array.__dlpack__(stream=stream) - return self.__class__._new(res) + return self._array.__dlpack__(stream=stream) def __dlpack_device__(self: Array, /) -> Tuple[IntEnum, int]: """ Performs the operation __dlpack_device__. """ # Note: device support is required for this - res = self._array.__dlpack_device__() - return self.__class__._new(res) + return self._array.__dlpack_device__() def __eq__(self: Array, other: Union[int, float, bool, Array], /) -> Array: """ @@ -527,13 +546,6 @@ class Array: res = self._array.__le__(other._array) return self.__class__._new(res) - # Note: __len__ may end up being removed from the array API spec. - def __len__(self, /) -> int: - """ - Performs the operation __len__. - """ - return self._array.__len__() - def __lshift__(self: Array, other: Union[int, Array], /) -> Array: """ Performs the operation __lshift__. @@ -995,7 +1007,9 @@ class Array: res = self._array.__rxor__(other._array) return self.__class__._new(res) - def to_device(self: Array, device: Device, /) -> Array: + def to_device(self: Array, device: Device, /, stream: None = None) -> Array: + if stream is not None: + raise ValueError("The stream argument to to_device() is not supported") if device == 'cpu': return self raise ValueError(f"Unsupported device {device!r}") diff --git a/numpy/array_api/_creation_functions.py b/numpy/array_api/_creation_functions.py index e36807468..23beec444 100644 --- a/numpy/array_api/_creation_functions.py +++ b/numpy/array_api/_creation_functions.py @@ -9,7 +9,6 @@ if TYPE_CHECKING: Device, Dtype, NestedSequence, - SupportsDLPack, SupportsBufferProtocol, ) from collections.abc import Sequence @@ -36,7 +35,6 @@ def asarray( int, float, NestedSequence[bool | int | float], - SupportsDLPack, SupportsBufferProtocol, ], /, @@ -60,7 +58,9 @@ def asarray( if copy is False: # Note: copy=False is not yet implemented in np.asarray raise NotImplementedError("copy=False is not yet implemented") - if isinstance(obj, Array) and (dtype is None or obj.dtype == dtype): + if isinstance(obj, Array): + if dtype is not None and obj.dtype != dtype: + copy = True if copy is True: return Array._new(np.array(obj._array, copy=True, dtype=dtype)) return obj @@ -152,8 +152,9 @@ def eye( def from_dlpack(x: object, /) -> Array: - # Note: dlpack support is not yet implemented on Array - raise NotImplementedError("DLPack support is not yet implemented") + from ._array_object import Array + + return Array._new(np._from_dlpack(x)) def full( @@ -240,6 +241,12 @@ def meshgrid(*arrays: Array, indexing: str = "xy") -> List[Array]: """ from ._array_object import Array + # Note: unlike np.meshgrid, only inputs with all the same dtype are + # allowed + + if len({a.dtype for a in arrays}) > 1: + raise ValueError("meshgrid inputs must all have the same dtype") + return [ Array._new(array) for array in np.meshgrid(*[a._array for a in arrays], indexing=indexing) diff --git a/numpy/array_api/_data_type_functions.py b/numpy/array_api/_data_type_functions.py index 7ccbe9469..e4d6db61b 100644 --- a/numpy/array_api/_data_type_functions.py +++ b/numpy/array_api/_data_type_functions.py @@ -13,6 +13,13 @@ if TYPE_CHECKING: import numpy as np +# Note: astype is a function, not an array method as in NumPy. +def astype(x: Array, dtype: Dtype, /, *, copy: bool = True) -> Array: + if not copy and dtype == x.dtype: + return x + return Array._new(x._array.astype(dtype=dtype, copy=copy)) + + def broadcast_arrays(*arrays: Array) -> List[Array]: """ Array API compatible wrapper for :py:func:`np.broadcast_arrays <numpy.broadcast_arrays>`. diff --git a/numpy/array_api/_searching_functions.py b/numpy/array_api/_searching_functions.py index 3dcef61c3..40f5a4d2e 100644 --- a/numpy/array_api/_searching_functions.py +++ b/numpy/array_api/_searching_functions.py @@ -43,4 +43,5 @@ def where(condition: Array, x1: Array, x2: Array, /) -> Array: """ # Call result type here just to raise on disallowed type combinations _result_type(x1.dtype, x2.dtype) + x1, x2 = Array._normalize_two_args(x1, x2) return Array._new(np.where(condition._array, x1._array, x2._array)) diff --git a/numpy/array_api/_set_functions.py b/numpy/array_api/_set_functions.py index 357f238f5..05ee7e555 100644 --- a/numpy/array_api/_set_functions.py +++ b/numpy/array_api/_set_functions.py @@ -2,19 +2,82 @@ from __future__ import annotations from ._array_object import Array -from typing import Tuple, Union +from typing import NamedTuple import numpy as np +# Note: np.unique() is split into four functions in the array API: +# unique_all, unique_counts, unique_inverse, and unique_values (this is done +# to remove polymorphic return types). -def unique( - x: Array, - /, - *, - return_counts: bool = False, - return_index: bool = False, - return_inverse: bool = False, -) -> Union[Array, Tuple[Array, ...]]: +# Note: The various unique() functions are supposed to return multiple NaNs. +# This does not match the NumPy behavior, however, this is currently left as a +# TODO in this implementation as this behavior may be reverted in np.unique(). +# See https://github.com/numpy/numpy/issues/20326. + +# Note: The functions here return a namedtuple (np.unique() returns a normal +# tuple). + +class UniqueAllResult(NamedTuple): + values: Array + indices: Array + inverse_indices: Array + counts: Array + + +class UniqueCountsResult(NamedTuple): + values: Array + counts: Array + + +class UniqueInverseResult(NamedTuple): + values: Array + inverse_indices: Array + + +def unique_all(x: Array, /) -> UniqueAllResult: + """ + Array API compatible wrapper for :py:func:`np.unique <numpy.unique>`. + + See its docstring for more information. + """ + res = np.unique( + x._array, + return_counts=True, + return_index=True, + return_inverse=True, + ) + + return UniqueAllResult(*[Array._new(i) for i in res]) + + +def unique_counts(x: Array, /) -> UniqueCountsResult: + res = np.unique( + x._array, + return_counts=True, + return_index=False, + return_inverse=False, + ) + + return UniqueCountsResult(*[Array._new(i) for i in res]) + + +def unique_inverse(x: Array, /) -> UniqueInverseResult: + """ + Array API compatible wrapper for :py:func:`np.unique <numpy.unique>`. + + See its docstring for more information. + """ + res = np.unique( + x._array, + return_counts=False, + return_index=False, + return_inverse=True, + ) + return UniqueInverseResult(*[Array._new(i) for i in res]) + + +def unique_values(x: Array, /) -> Array: """ Array API compatible wrapper for :py:func:`np.unique <numpy.unique>`. @@ -22,10 +85,8 @@ def unique( """ res = np.unique( x._array, - return_counts=return_counts, - return_index=return_index, - return_inverse=return_inverse, + return_counts=False, + return_index=False, + return_inverse=False, ) - if isinstance(res, tuple): - return tuple(Array._new(i) for i in res) return Array._new(res) diff --git a/numpy/array_api/tests/test_array_object.py b/numpy/array_api/tests/test_array_object.py index fb42cf621..12479d765 100644 --- a/numpy/array_api/tests/test_array_object.py +++ b/numpy/array_api/tests/test_array_object.py @@ -3,7 +3,7 @@ import operator from numpy.testing import assert_raises import numpy as np -from .. import ones, asarray, result_type +from .. import ones, asarray, result_type, all, equal from .._dtypes import ( _all_dtypes, _boolean_dtypes, @@ -39,18 +39,18 @@ def test_validate_index(): assert_raises(IndexError, lambda: a[:-4]) assert_raises(IndexError, lambda: a[:3:-1]) assert_raises(IndexError, lambda: a[:-5:-1]) - assert_raises(IndexError, lambda: a[3:]) + assert_raises(IndexError, lambda: a[4:]) assert_raises(IndexError, lambda: a[-4:]) - assert_raises(IndexError, lambda: a[3::-1]) + assert_raises(IndexError, lambda: a[4::-1]) assert_raises(IndexError, lambda: a[-4::-1]) assert_raises(IndexError, lambda: a[...,:5]) assert_raises(IndexError, lambda: a[...,:-5]) - assert_raises(IndexError, lambda: a[...,:4:-1]) + assert_raises(IndexError, lambda: a[...,:5:-1]) assert_raises(IndexError, lambda: a[...,:-6:-1]) - assert_raises(IndexError, lambda: a[...,4:]) + assert_raises(IndexError, lambda: a[...,5:]) assert_raises(IndexError, lambda: a[...,-5:]) - assert_raises(IndexError, lambda: a[...,4::-1]) + assert_raises(IndexError, lambda: a[...,5::-1]) assert_raises(IndexError, lambda: a[...,-5::-1]) # Boolean indices cannot be part of a larger tuple index @@ -74,6 +74,11 @@ def test_validate_index(): assert_raises(IndexError, lambda: a[None, ...]) assert_raises(IndexError, lambda: a[..., None]) + # Multiaxis indices must contain exactly as many indices as dimensions + assert_raises(IndexError, lambda: a[()]) + assert_raises(IndexError, lambda: a[0,]) + assert_raises(IndexError, lambda: a[0]) + assert_raises(IndexError, lambda: a[:]) def test_operators(): # For every operator, we test that it works for the required type @@ -291,8 +296,8 @@ def test_device_property(): a = ones((3, 4)) assert a.device == 'cpu' - assert np.array_equal(a.to_device('cpu'), a) + assert all(equal(a.to_device('cpu'), a)) assert_raises(ValueError, lambda: a.to_device('gpu')) - assert np.array_equal(asarray(a, device='cpu'), a) + assert all(equal(asarray(a, device='cpu'), a)) assert_raises(ValueError, lambda: asarray(a, device='gpu')) diff --git a/numpy/array_api/tests/test_creation_functions.py b/numpy/array_api/tests/test_creation_functions.py index 7b633eaf1..ebbb6aab3 100644 --- a/numpy/array_api/tests/test_creation_functions.py +++ b/numpy/array_api/tests/test_creation_functions.py @@ -11,11 +11,13 @@ from .._creation_functions import ( full, full_like, linspace, + meshgrid, ones, ones_like, zeros, zeros_like, ) +from .._dtypes import float32, float64 from .._array_object import Array @@ -124,3 +126,11 @@ def test_zeros_like_errors(): assert_raises(ValueError, lambda: zeros_like(asarray(1), device="gpu")) assert_raises(ValueError, lambda: zeros_like(asarray(1), dtype=int)) assert_raises(ValueError, lambda: zeros_like(asarray(1), dtype="i")) + +def test_meshgrid_dtype_errors(): + # Doesn't raise + meshgrid() + meshgrid(asarray([1.], dtype=float32)) + meshgrid(asarray([1.], dtype=float32), asarray([1.], dtype=float32)) + + assert_raises(ValueError, lambda: meshgrid(asarray([1.], dtype=float32), asarray([1.], dtype=float64))) diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index c8a24db0c..078c58976 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1573,6 +1573,19 @@ add_newdoc('numpy.core.multiarray', 'frombuffer', array_function_like_doc, )) +add_newdoc('numpy.core.multiarray', '_from_dlpack', + """ + _from_dlpack(x, /) + + Create a NumPy array from an object implementing the ``__dlpack__`` + protocol. + + See Also + -------- + `Array API documentation + <https://data-apis.org/array-api/latest/design_topics/data_interchange.html#syntax-for-data-interchange-with-dlpack>`_ + """) + add_newdoc('numpy.core', 'fastCopyAndTranspose', """_fastCopyAndTranspose(a)""") @@ -2263,6 +2276,15 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_priority__', add_newdoc('numpy.core.multiarray', 'ndarray', ('__array_struct__', """Array protocol: C-struct side.""")) +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack__', + """a.__dlpack__(*, stream=None) + + DLPack Protocol: Part of the Array API.""")) + +add_newdoc('numpy.core.multiarray', 'ndarray', ('__dlpack_device__', + """a.__dlpack_device__() + + DLPack Protocol: Part of the Array API.""")) add_newdoc('numpy.core.multiarray', 'ndarray', ('base', """ @@ -4737,6 +4759,16 @@ add_newdoc('numpy.core.multiarray', 'get_handler_name', memory, in which case you can traverse ``a.base`` for a memory handler. """) +add_newdoc('numpy.core.multiarray', 'get_handler_version', + """ + get_handler_version(a: ndarray) -> int,None + + Return the version of the memory handler used by `a`. If not provided, + return the version of the memory handler that will be used to allocate data + for the next `ndarray` in this context. May return None if `a` does not own + its memory, in which case you can traverse ``a.base`` for a memory handler. + """) + add_newdoc('numpy.core.multiarray', '_set_madvise_hugepage', """ _set_madvise_hugepage(enabled: bool) -> bool diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py index c2458c2b5..b401ee6a5 100644 --- a/numpy/core/code_generators/genapi.py +++ b/numpy/core/code_generators/genapi.py @@ -41,6 +41,7 @@ API_FILES = [join('multiarray', 'alloc.c'), join('multiarray', 'datetime_busdaycal.c'), join('multiarray', 'datetime_strings.c'), join('multiarray', 'descriptor.c'), + join('multiarray', 'dlpack.c'), join('multiarray', 'dtypemeta.c'), join('multiarray', 'einsum.c.src'), join('multiarray', 'flagsobject.c'), diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 3a27a34cd..292d9e0d3 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -827,7 +827,7 @@ defdict = { docstrings.get('numpy.core.umath.ceil'), None, TD('e', f='ceil', astype={'e': 'f'}), - TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]), + TD(inexactvec, dispatch=[('loops_unary_fp', 'fd')]), TD('fdg', f='ceil'), TD(O, f='npy_ObjectCeil'), ), diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 80177e2bb..2607fb732 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -674,10 +674,15 @@ typedef struct { void* (*calloc) (void *ctx, size_t nelem, size_t elsize); void* (*realloc) (void *ctx, void *ptr, size_t new_size); void (*free) (void *ctx, void *ptr, size_t size); + /* + * This is the end of the version=1 struct. Only add new fields after + * this line + */ } PyDataMemAllocator; typedef struct { - char name[128]; /* multiple of 64 to keep the struct aligned */ + char name[127]; /* multiple of 64 to keep the struct aligned */ + uint8_t version; /* currently 1 */ PyDataMemAllocator allocator; } PyDataMem_Handler; diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 3f184bd45..1d7050bbe 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -173,11 +173,8 @@ typedef struct _tagPyUFuncObject { * but this was never implemented. (This is also why the above * selector is called the "legacy" selector.) */ - #if PY_VERSION_HEX >= 0x03080000 vectorcallfunc vectorcall; - #else - void *reserved2; - #endif + /* Was previously the `PyUFunc_MaskedInnerLoopSelectionFunc` */ void *_always_null_previously_masked_innerloop_selector; diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 351cd3a1b..f88d75978 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -14,8 +14,9 @@ from ._multiarray_umath import * # noqa: F403 # do not change them. issue gh-15518 # _get_ndarray_c_version is semi-public, on purpose not added to __all__ from ._multiarray_umath import ( - _fastCopyAndTranspose, _flagdict, _insert, _reconstruct, _vec_string, - _ARRAY_API, _monotonicity, _get_ndarray_c_version, _set_madvise_hugepage, + _fastCopyAndTranspose, _flagdict, _from_dlpack, _insert, _reconstruct, + _vec_string, _ARRAY_API, _monotonicity, _get_ndarray_c_version, + _set_madvise_hugepage, ) __all__ = [ @@ -23,29 +24,30 @@ __all__ = [ 'ITEM_HASOBJECT', 'ITEM_IS_POINTER', 'LIST_PICKLE', 'MAXDIMS', 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'NEEDS_INIT', 'NEEDS_PYAPI', 'RAISE', 'USE_GETITEM', 'USE_SETITEM', 'WRAP', '_fastCopyAndTranspose', - '_flagdict', '_insert', '_reconstruct', '_vec_string', '_monotonicity', - 'add_docstring', 'arange', 'array', 'asarray', 'asanyarray', - 'ascontiguousarray', 'asfortranarray', 'bincount', 'broadcast', - 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', + '_flagdict', '_from_dlpack', '_insert', '_reconstruct', '_vec_string', + '_monotonicity', 'add_docstring', 'arange', 'array', 'asarray', + 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'bincount', + 'broadcast', 'busday_count', 'busday_offset', 'busdaycalendar', 'can_cast', 'compare_chararrays', 'concatenate', 'copyto', 'correlate', 'correlate2', 'count_nonzero', 'c_einsum', 'datetime_as_string', 'datetime_data', 'dot', 'dragon4_positional', 'dragon4_scientific', 'dtype', 'empty', 'empty_like', 'error', 'flagsobj', 'flatiter', 'format_longfloat', - 'frombuffer', 'fromfile', 'fromiter', 'fromstring', 'get_handler_name', - 'inner', 'interp', 'interp_complex', 'is_busday', 'lexsort', - 'matmul', 'may_share_memory', 'min_scalar_type', 'ndarray', 'nditer', - 'nested_iters', 'normalize_axis_index', 'packbits', - 'promote_types', 'putmask', 'ravel_multi_index', 'result_type', 'scalar', - 'set_datetimeparse_function', 'set_legacy_print_mode', 'set_numeric_ops', - 'set_string_function', 'set_typeDict', 'shares_memory', - 'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot', - 'where', 'zeros'] + 'frombuffer', 'fromfile', 'fromiter', 'fromstring', + 'get_handler_name', 'get_handler_version', 'inner', 'interp', + 'interp_complex', 'is_busday', 'lexsort', 'matmul', 'may_share_memory', + 'min_scalar_type', 'ndarray', 'nditer', 'nested_iters', + 'normalize_axis_index', 'packbits', 'promote_types', 'putmask', + 'ravel_multi_index', 'result_type', 'scalar', 'set_datetimeparse_function', + 'set_legacy_print_mode', 'set_numeric_ops', 'set_string_function', + 'set_typeDict', 'shares_memory', 'tracemalloc_domain', 'typeinfo', + 'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros'] # For backward compatibility, make sure pickle imports these functions from here _reconstruct.__module__ = 'numpy.core.multiarray' scalar.__module__ = 'numpy.core.multiarray' +_from_dlpack.__module__ = 'numpy' arange.__module__ = 'numpy' array.__module__ = 'numpy' asarray.__module__ = 'numpy' diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 1654e8364..344d40d93 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -13,8 +13,8 @@ from .multiarray import ( WRAP, arange, array, asarray, asanyarray, ascontiguousarray, asfortranarray, broadcast, can_cast, compare_chararrays, concatenate, copyto, dot, dtype, empty, - empty_like, flatiter, frombuffer, fromfile, fromiter, fromstring, - inner, lexsort, matmul, may_share_memory, + empty_like, flatiter, frombuffer, _from_dlpack, fromfile, fromiter, + fromstring, inner, lexsort, matmul, may_share_memory, min_scalar_type, ndarray, nditer, nested_iters, promote_types, putmask, result_type, set_numeric_ops, shares_memory, vdot, where, zeros, normalize_axis_index) @@ -41,7 +41,7 @@ __all__ = [ 'newaxis', 'ndarray', 'flatiter', 'nditer', 'nested_iters', 'ufunc', 'arange', 'array', 'asarray', 'asanyarray', 'ascontiguousarray', 'asfortranarray', 'zeros', 'count_nonzero', 'empty', 'broadcast', 'dtype', - 'fromstring', 'fromfile', 'frombuffer', 'where', + 'fromstring', 'fromfile', 'frombuffer', '_from_dlpack', 'where', 'argwhere', 'copyto', 'concatenate', 'fastCopyAndTranspose', 'lexsort', 'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type', 'result_type', 'isfortran', 'empty_like', 'zeros_like', 'ones_like', diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 3e1ed4c9b..2c99060ec 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -740,6 +740,7 @@ def configuration(parent_package='',top_path=None): ####################################################################### common_deps = [ + join('src', 'common', 'dlpack', 'dlpack.h'), join('src', 'common', 'array_assign.h'), join('src', 'common', 'binop_override.h'), join('src', 'common', 'cblasfuncs.h'), @@ -749,6 +750,7 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'npy_cblas.h'), join('src', 'common', 'npy_config.h'), join('src', 'common', 'npy_ctypes.h'), + join('src', 'common', 'npy_dlpack.h'), join('src', 'common', 'npy_extint128.h'), join('src', 'common', 'npy_import.h'), join('src', 'common', 'npy_hashtable.h'), @@ -881,6 +883,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'datetime_busday.c'), join('src', 'multiarray', 'datetime_busdaycal.c'), join('src', 'multiarray', 'descriptor.c'), + join('src', 'multiarray', 'dlpack.c'), join('src', 'multiarray', 'dtypemeta.c'), join('src', 'multiarray', 'dragon4.c'), join('src', 'multiarray', 'dtype_transfer.c'), diff --git a/numpy/core/src/common/dlpack/dlpack.h b/numpy/core/src/common/dlpack/dlpack.h new file mode 100644 index 000000000..29209aee1 --- /dev/null +++ b/numpy/core/src/common/dlpack/dlpack.h @@ -0,0 +1,201 @@ +// Taken from: +// https://github.com/dmlc/dlpack/blob/9b6176fdecb55e9bf39b16f08b96913ed3f275b4/include/dlpack/dlpack.h +/*! + * Copyright (c) 2017 by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current version of dlpack */ +#define DLPACK_VERSION 050 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include <stdint.h> +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif +/*! + * \brief The device type in DLDevice. + */ +typedef enum { + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, +} DLDeviceType; + +/*! + * \brief A Device for Tensor and operator. + */ +typedef struct { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. + */ + int device_id; +} DLDevice; + +/*! + * \brief The type code options DLDataType. + */ +typedef enum { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, +} DLDataTypeCode; + +/*! + * \brief The data type the tensor can hold. + * + * Examples + * - float: type_code = 2, bits = 32, lanes=1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 + * - int8: type_code = 0, bits = 8, lanes=1 + * - std::complex<float>: type_code = 5, bits = 64, lanes = 1 + */ +typedef struct { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; +} DLDataType; + +/*! + * \brief Plain C Tensor object, does not manage memory. + */ +typedef struct { + /*! + * \brief The opaque data pointer points to the allocated data. This will be + * CUDA device pointer or cl_mem handle in OpenCL. This pointer is always + * aligned to 256 bytes as in CUDA. + * + * For given DLTensor, the size of memory required to store the contents of + * data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + */ + void* data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! \brief The shape of the tensor */ + int64_t* shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes) + * can be NULL, indicating tensor is compact and row-majored. + */ + int64_t* strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; +} DLTensor; + +/*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. It is + * not meant to transfer the tensor. When the borrowing framework doesn't need + * the tensor, it should call the deleter to notify the host that the resource + * is no longer needed. + */ +typedef struct DLManagedTensor { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor in + * which DLManagedTensor is used in the framework. It can also be NULL. + */ + void * manager_ctx; + /*! \brief Destructor signature void (*)(void*) - this should be called + * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL + * if there is no way for the caller to provide a reasonable destructor. + * The destructors deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor * self); +} DLManagedTensor; +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ diff --git a/numpy/core/src/common/npy_dlpack.h b/numpy/core/src/common/npy_dlpack.h new file mode 100644 index 000000000..14ca352c0 --- /dev/null +++ b/numpy/core/src/common/npy_dlpack.h @@ -0,0 +1,28 @@ +#include "Python.h" +#include "dlpack/dlpack.h" + +#ifndef NPY_DLPACK_H +#define NPY_DLPACK_H + +// Part of the Array API specification. +#define NPY_DLPACK_CAPSULE_NAME "dltensor" +#define NPY_DLPACK_USED_CAPSULE_NAME "used_dltensor" + +// Used internally by NumPy to store a base object +// as it has to release a reference to the original +// capsule. +#define NPY_DLPACK_INTERNAL_CAPSULE_NAME "numpy_dltensor" + +PyObject * +array_dlpack(PyArrayObject *self, PyObject *const *args, Py_ssize_t len_args, + PyObject *kwnames); + + +PyObject * +array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args)); + + +NPY_NO_EXPORT PyObject * +_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj); + +#endif diff --git a/numpy/core/src/multiarray/alloc.c b/numpy/core/src/multiarray/alloc.c index e4756264d..d1173410d 100644 --- a/numpy/core/src/multiarray/alloc.c +++ b/numpy/core/src/multiarray/alloc.c @@ -370,6 +370,7 @@ default_free(void *NPY_UNUSED(ctx), void *ptr, size_t size) /* Memory handler global default */ PyDataMem_Handler default_handler = { "default_allocator", + 1, { NULL, /* ctx */ default_malloc, /* malloc */ @@ -395,7 +396,6 @@ PyDataMem_UserNEW(size_t size, PyObject *mem_handler) if (handler == NULL) { return NULL; } - assert(size != 0); result = handler->allocator.malloc(handler->allocator.ctx, size); if (_PyDataMem_eventhook != NULL) { @@ -639,3 +639,40 @@ get_handler_name(PyObject *NPY_UNUSED(self), PyObject *args) Py_DECREF(mem_handler); return name; } + +NPY_NO_EXPORT PyObject * +get_handler_version(PyObject *NPY_UNUSED(self), PyObject *args) +{ + PyObject *arr=NULL; + if (!PyArg_ParseTuple(args, "|O:get_handler_version", &arr)) { + return NULL; + } + if (arr != NULL && !PyArray_Check(arr)) { + PyErr_SetString(PyExc_ValueError, "if supplied, argument must be an ndarray"); + return NULL; + } + PyObject *mem_handler; + PyDataMem_Handler *handler; + PyObject *version; + if (arr != NULL) { + mem_handler = PyArray_HANDLER((PyArrayObject *) arr); + if (mem_handler == NULL) { + Py_RETURN_NONE; + } + Py_INCREF(mem_handler); + } + else { + mem_handler = PyDataMem_GetHandler(); + if (mem_handler == NULL) { + return NULL; + } + } + handler = (PyDataMem_Handler *) PyCapsule_GetPointer(mem_handler, "mem_handler"); + if (handler == NULL) { + Py_DECREF(mem_handler); + return NULL; + } + version = PyLong_FromLong(handler->version); + Py_DECREF(mem_handler); + return version; +} diff --git a/numpy/core/src/multiarray/alloc.h b/numpy/core/src/multiarray/alloc.h index 4f7df1f84..f1ccf0bcd 100644 --- a/numpy/core/src/multiarray/alloc.h +++ b/numpy/core/src/multiarray/alloc.h @@ -47,5 +47,7 @@ extern PyDataMem_Handler default_handler; NPY_NO_EXPORT PyObject * get_handler_name(PyObject *NPY_UNUSED(self), PyObject *obj); +NPY_NO_EXPORT PyObject * +get_handler_version(PyObject *NPY_UNUSED(self), PyObject *obj); #endif /* NUMPY_CORE_SRC_MULTIARRAY_ALLOC_H_ */ diff --git a/numpy/core/src/multiarray/array_coercion.c b/numpy/core/src/multiarray/array_coercion.c index 847bdafc3..3c44d312c 100644 --- a/numpy/core/src/multiarray/array_coercion.c +++ b/numpy/core/src/multiarray/array_coercion.c @@ -555,6 +555,7 @@ npy_new_coercion_cache( cache = PyMem_Malloc(sizeof(coercion_cache_obj)); } if (cache == NULL) { + Py_DECREF(arr_or_sequence); PyErr_NoMemory(); return -1; } diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 9910fffe6..5853e068b 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1393,7 +1393,7 @@ arr_add_docstring(PyObject *NPY_UNUSED(dummy), PyObject *args) { PyObject *obj; PyObject *str; - #if PY_VERSION_HEX >= 0x030700A2 && (!defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300) + #if !defined(PYPY_VERSION_NUM) || PYPY_VERSION_NUM > 0x07030300 const char *docstr; #else char *docstr; diff --git a/numpy/core/src/multiarray/dlpack.c b/numpy/core/src/multiarray/dlpack.c new file mode 100644 index 000000000..291e60a22 --- /dev/null +++ b/numpy/core/src/multiarray/dlpack.c @@ -0,0 +1,408 @@ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE + +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include <dlpack/dlpack.h> + +#include "numpy/arrayobject.h" +#include "common/npy_argparse.h" + +#include "common/dlpack/dlpack.h" +#include "common/npy_dlpack.h" + +static void +array_dlpack_deleter(DLManagedTensor *self) +{ + PyArrayObject *array = (PyArrayObject *)self->manager_ctx; + // This will also free the strides as it's one allocation. + PyMem_Free(self->dl_tensor.shape); + PyMem_Free(self); + Py_XDECREF(array); +} + +/* This is exactly as mandated by dlpack */ +static void dlpack_capsule_deleter(PyObject *self) { + if (PyCapsule_IsValid(self, NPY_DLPACK_USED_CAPSULE_NAME)) { + return; + } + + /* an exception may be in-flight, we must save it in case we create another one */ + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_CAPSULE_NAME); + if (managed == NULL) { + PyErr_WriteUnraisable(self); + goto done; + } + /* + * the spec says the deleter can be NULL if there is no way for the caller + * to provide a reasonable destructor. + */ + if (managed->deleter) { + managed->deleter(managed); + /* TODO: is the deleter allowed to set a python exception? */ + assert(!PyErr_Occurred()); + } + +done: + PyErr_Restore(type, value, traceback); +} + +/* used internally, almost identical to dlpack_capsule_deleter() */ +static void array_dlpack_internal_capsule_deleter(PyObject *self) +{ + /* an exception may be in-flight, we must save it in case we create another one */ + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(self, NPY_DLPACK_INTERNAL_CAPSULE_NAME); + if (managed == NULL) { + PyErr_WriteUnraisable(self); + goto done; + } + /* + * the spec says the deleter can be NULL if there is no way for the caller + * to provide a reasonable destructor. + */ + if (managed->deleter) { + managed->deleter(managed); + /* TODO: is the deleter allowed to set a python exception? */ + assert(!PyErr_Occurred()); + } + +done: + PyErr_Restore(type, value, traceback); +} + + +// This function cannot return NULL, but it can fail, +// So call PyErr_Occurred to check if it failed after +// calling it. +static DLDevice +array_get_dl_device(PyArrayObject *self) { + DLDevice ret; + ret.device_type = kDLCPU; + ret.device_id = 0; + PyObject *base = PyArray_BASE(self); + // The outer if is due to the fact that NumPy arrays are on the CPU + // by default (if not created from DLPack). + if (PyCapsule_IsValid(base, NPY_DLPACK_INTERNAL_CAPSULE_NAME)) { + DLManagedTensor *managed = PyCapsule_GetPointer( + base, NPY_DLPACK_INTERNAL_CAPSULE_NAME); + if (managed == NULL) { + return ret; + } + return managed->dl_tensor.device; + } + return ret; +} + + +PyObject * +array_dlpack(PyArrayObject *self, + PyObject *const *args, Py_ssize_t len_args, PyObject *kwnames) +{ + PyObject *stream = Py_None; + NPY_PREPARE_ARGPARSER; + if (npy_parse_arguments("__dlpack__", args, len_args, kwnames, + "$stream", NULL, &stream, NULL, NULL, NULL)) { + return NULL; + } + + if (stream != Py_None) { + PyErr_SetString(PyExc_RuntimeError, "NumPy only supports " + "stream=None."); + return NULL; + } + + if ( !(PyArray_FLAGS(self) & NPY_ARRAY_WRITEABLE)) { + PyErr_SetString(PyExc_TypeError, "NumPy currently only supports " + "dlpack for writeable arrays"); + return NULL; + } + + npy_intp itemsize = PyArray_ITEMSIZE(self); + int ndim = PyArray_NDIM(self); + npy_intp *strides = PyArray_STRIDES(self); + npy_intp *shape = PyArray_SHAPE(self); + + if (!PyArray_IS_C_CONTIGUOUS(self) && PyArray_SIZE(self) != 1) { + for (int i = 0; i < ndim; ++i) { + if (strides[i] % itemsize != 0) { + PyErr_SetString(PyExc_RuntimeError, + "DLPack only supports strides which are a multiple of " + "itemsize."); + return NULL; + } + } + } + + DLDataType managed_dtype; + PyArray_Descr *dtype = PyArray_DESCR(self); + + if (PyDataType_ISBYTESWAPPED(dtype)) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports native " + "byte swapping."); + return NULL; + } + + managed_dtype.bits = 8 * itemsize; + managed_dtype.lanes = 1; + + if (PyDataType_ISSIGNED(dtype)) { + managed_dtype.code = kDLInt; + } + else if (PyDataType_ISUNSIGNED(dtype)) { + managed_dtype.code = kDLUInt; + } + else if (PyDataType_ISFLOAT(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 8) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "floating point types without padding."); + return NULL; + } + managed_dtype.code = kDLFloat; + } + else if (PyDataType_ISCOMPLEX(dtype)) { + // We can't be sure that the dtype is + // IEEE or padded. + if (itemsize > 16) { + PyErr_SetString(PyExc_TypeError, "DLPack only supports IEEE " + "complex point types without padding."); + return NULL; + } + managed_dtype.code = kDLComplex; + } + else { + PyErr_SetString(PyExc_TypeError, + "DLPack only supports signed/unsigned integers, float " + "and complex dtypes."); + return NULL; + } + + DLDevice device = array_get_dl_device(self); + if (PyErr_Occurred()) { + return NULL; + } + + DLManagedTensor *managed = PyMem_Malloc(sizeof(DLManagedTensor)); + if (managed == NULL) { + PyErr_NoMemory(); + return NULL; + } + + /* + * Note: the `dlpack.h` header suggests/standardizes that `data` must be + * 256-byte aligned. We ignore this intentionally, because `__dlpack__` + * standardizes that `byte_offset` must be 0 (for now) to not break pytorch: + * https://github.com/data-apis/array-api/issues/293#issuecomment-964111413 + * + * We further assume that exporting fully unaligned data is OK even without + * `byte_offset` since the standard does not reject it. + * Presumably, pytorch will support importing `byte_offset != 0` and NumPy + * can choose to use it starting about 2023. At that point, it may be + * that NumPy MUST use `byte_offset` to adhere to the standard (as + * specified in the header)! + */ + managed->dl_tensor.data = PyArray_DATA(self); + managed->dl_tensor.byte_offset = 0; + managed->dl_tensor.device = device; + managed->dl_tensor.dtype = managed_dtype; + + int64_t *managed_shape_strides = PyMem_Malloc(sizeof(int64_t) * ndim * 2); + if (managed_shape_strides == NULL) { + PyErr_NoMemory(); + PyMem_Free(managed); + return NULL; + } + + int64_t *managed_shape = managed_shape_strides; + int64_t *managed_strides = managed_shape_strides + ndim; + for (int i = 0; i < ndim; ++i) { + managed_shape[i] = shape[i]; + // Strides in DLPack are items; in NumPy are bytes. + managed_strides[i] = strides[i] / itemsize; + } + + managed->dl_tensor.ndim = ndim; + managed->dl_tensor.shape = managed_shape; + managed->dl_tensor.strides = NULL; + if (PyArray_SIZE(self) != 1 && !PyArray_IS_C_CONTIGUOUS(self)) { + managed->dl_tensor.strides = managed_strides; + } + managed->dl_tensor.byte_offset = 0; + managed->manager_ctx = self; + managed->deleter = array_dlpack_deleter; + + PyObject *capsule = PyCapsule_New(managed, NPY_DLPACK_CAPSULE_NAME, + dlpack_capsule_deleter); + if (capsule == NULL) { + PyMem_Free(managed); + PyMem_Free(managed_shape_strides); + return NULL; + } + + // the capsule holds a reference + Py_INCREF(self); + return capsule; +} + +PyObject * +array_dlpack_device(PyArrayObject *self, PyObject *NPY_UNUSED(args)) +{ + DLDevice device = array_get_dl_device(self); + if (PyErr_Occurred()) { + return NULL; + } + return Py_BuildValue("ii", device.device_type, device.device_id); +} + +NPY_NO_EXPORT PyObject * +_from_dlpack(PyObject *NPY_UNUSED(self), PyObject *obj) { + PyObject *capsule = PyObject_CallMethod((PyObject *)obj->ob_type, + "__dlpack__", "O", obj); + if (capsule == NULL) { + return NULL; + } + + DLManagedTensor *managed = + (DLManagedTensor *)PyCapsule_GetPointer(capsule, + NPY_DLPACK_CAPSULE_NAME); + + if (managed == NULL) { + Py_DECREF(capsule); + return NULL; + } + + const int ndim = managed->dl_tensor.ndim; + if (ndim > NPY_MAXDIMS) { + PyErr_SetString(PyExc_RuntimeError, + "maxdims of DLPack tensor is higher than the supported " + "maxdims."); + Py_DECREF(capsule); + return NULL; + } + + DLDeviceType device_type = managed->dl_tensor.device.device_type; + if (device_type != kDLCPU && + device_type != kDLCUDAHost && + device_type != kDLROCMHost && + device_type != kDLCUDAManaged) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported device in DLTensor."); + Py_DECREF(capsule); + return NULL; + } + + if (managed->dl_tensor.dtype.lanes != 1) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported lanes in DLTensor dtype."); + Py_DECREF(capsule); + return NULL; + } + + int typenum = -1; + const uint8_t bits = managed->dl_tensor.dtype.bits; + const npy_intp itemsize = bits / 8; + switch (managed->dl_tensor.dtype.code) { + case kDLInt: + switch (bits) + { + case 8: typenum = NPY_INT8; break; + case 16: typenum = NPY_INT16; break; + case 32: typenum = NPY_INT32; break; + case 64: typenum = NPY_INT64; break; + } + break; + case kDLUInt: + switch (bits) + { + case 8: typenum = NPY_UINT8; break; + case 16: typenum = NPY_UINT16; break; + case 32: typenum = NPY_UINT32; break; + case 64: typenum = NPY_UINT64; break; + } + break; + case kDLFloat: + switch (bits) + { + case 16: typenum = NPY_FLOAT16; break; + case 32: typenum = NPY_FLOAT32; break; + case 64: typenum = NPY_FLOAT64; break; + } + break; + case kDLComplex: + switch (bits) + { + case 64: typenum = NPY_COMPLEX64; break; + case 128: typenum = NPY_COMPLEX128; break; + } + break; + } + + if (typenum == -1) { + PyErr_SetString(PyExc_RuntimeError, + "Unsupported dtype in DLTensor."); + Py_DECREF(capsule); + return NULL; + } + + npy_intp shape[NPY_MAXDIMS]; + npy_intp strides[NPY_MAXDIMS]; + + for (int i = 0; i < ndim; ++i) { + shape[i] = managed->dl_tensor.shape[i]; + // DLPack has elements as stride units, NumPy has bytes. + if (managed->dl_tensor.strides != NULL) { + strides[i] = managed->dl_tensor.strides[i] * itemsize; + } + } + + char *data = (char *)managed->dl_tensor.data + + managed->dl_tensor.byte_offset; + + PyArray_Descr *descr = PyArray_DescrFromType(typenum); + if (descr == NULL) { + Py_DECREF(capsule); + return NULL; + } + + PyObject *ret = PyArray_NewFromDescr(&PyArray_Type, descr, ndim, shape, + managed->dl_tensor.strides != NULL ? strides : NULL, data, 0, NULL); + if (ret == NULL) { + Py_DECREF(capsule); + return NULL; + } + + PyObject *new_capsule = PyCapsule_New(managed, + NPY_DLPACK_INTERNAL_CAPSULE_NAME, + array_dlpack_internal_capsule_deleter); + if (new_capsule == NULL) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + if (PyArray_SetBaseObject((PyArrayObject *)ret, new_capsule) < 0) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + if (PyCapsule_SetName(capsule, NPY_DLPACK_USED_CAPSULE_NAME) < 0) { + Py_DECREF(capsule); + Py_DECREF(ret); + return NULL; + } + + Py_DECREF(capsule); + return ret; +} + + diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 2d66c77dc..2ca8d9288 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -26,6 +26,7 @@ #include "shape.h" #include "strfuncs.h" #include "array_assign.h" +#include "npy_dlpack.h" #include "methods.h" #include "alloc.h" @@ -1821,22 +1822,8 @@ array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol) descr = PyArray_DESCR(self); - /* if the python version is below 3.8, the pickle module does not provide - * built-in support for protocol 5. We try importing the pickle5 - * backport instead */ -#if PY_VERSION_HEX >= 0x03080000 /* we expect protocol 5 to be available in Python 3.8 */ pickle_module = PyImport_ImportModule("pickle"); -#else - pickle_module = PyImport_ImportModule("pickle5"); - if (pickle_module == NULL) { - /* for protocol 5, raise a clear ImportError if pickle5 is not found - */ - PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 " - "requires the pickle5 module for Python >=3.6 and <3.8"); - return NULL; - } -#endif if (pickle_module == NULL){ return NULL; } @@ -2989,5 +2976,13 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = { {"view", (PyCFunction)array_view, METH_FASTCALL | METH_KEYWORDS, NULL}, + // For data interchange between libraries + {"__dlpack__", + (PyCFunction)array_dlpack, + METH_FASTCALL | METH_KEYWORDS, NULL}, + + {"__dlpack_device__", + (PyCFunction)array_dlpack_device, + METH_NOARGS, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index dea828ed9..a854bcb3b 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -70,6 +70,8 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "get_attr_string.h" #include "experimental_public_dtype_api.h" /* _get_experimental_dtype_api */ +#include "npy_dlpack.h" + /* ***************************************************************************** ** INCLUDE GENERATED CODE ** @@ -4231,7 +4233,6 @@ _reload_guard(PyObject *NPY_UNUSED(self)) { Py_RETURN_NONE; } - static struct PyMethodDef array_module_methods[] = { {"_get_implementing_args", (PyCFunction)array__get_implementing_args, @@ -4436,6 +4437,9 @@ static struct PyMethodDef array_module_methods[] = { {"get_handler_name", (PyCFunction) get_handler_name, METH_VARARGS, NULL}, + {"get_handler_version", + (PyCFunction) get_handler_version, + METH_VARARGS, NULL}, {"_add_newdoc_ufunc", (PyCFunction)add_newdoc_ufunc, METH_VARARGS, NULL}, {"_get_sfloat_dtype", @@ -4445,6 +4449,8 @@ static struct PyMethodDef array_module_methods[] = { {"_reload_guard", (PyCFunction)_reload_guard, METH_NOARGS, "Give a warning on reload and big warning in sub-interpreters."}, + {"_from_dlpack", (PyCFunction)_from_dlpack, + METH_O, NULL}, {NULL, NULL, 0, NULL} /* sentinel */ }; @@ -4675,14 +4681,14 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { PyObject *m, *d, *s; PyObject *c_api; - /* Initialize CPU features */ - if (npy_cpu_init() < 0) { - goto err; - } - /* Create the module and add the functions */ m = PyModule_Create(&moduledef); if (!m) { + return NULL; + } + + /* Initialize CPU features */ + if (npy_cpu_init() < 0) { goto err; } @@ -4934,5 +4940,6 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { PyErr_SetString(PyExc_RuntimeError, "cannot load multiarray module."); } + Py_DECREF(m); return NULL; } diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index e409e9874..564352f1f 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -233,8 +233,12 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr, PyArray_VectorUnaryFunc* castfunc; descr = PyArray_DescrFromScalar(scalar); + if (descr == NULL) { + return -1; + } castfunc = PyArray_GetCastFunc(descr, outcode->type_num); if (castfunc == NULL) { + Py_DECREF(descr); return -1; } if (PyTypeNum_ISEXTENDED(descr->type_num) || @@ -254,6 +258,7 @@ PyArray_CastScalarToCtype(PyObject *scalar, void *ctypeptr, NPY_ARRAY_CARRAY, NULL); if (aout == NULL) { Py_DECREF(ain); + Py_DECREF(descr); return -1; } castfunc(PyArray_DATA(ain), PyArray_DATA(aout), 1, ain, aout); diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 7c0710819..aaa694f34 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1532,8 +1532,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const * */ /**begin repeat - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ /**begin repeat1 @@ -1568,8 +1568,8 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void */ /**begin repeat2 - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ NPY_NO_EXPORT NPY_GCC_OPT_3 void diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index 0938cd050..081ca9957 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -187,7 +187,7 @@ NPY_NO_EXPORT void * #TYPE = FLOAT, DOUBLE# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# + * #kind = ceil, sqrt, absolute, square, reciprocal# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))) @@ -228,7 +228,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, /**end repeat**/ /**begin repeat - * #func = sin, cos# + * #func = sin, cos# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@, @@ -275,7 +275,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, ( /**end repeat**/ /**begin repeat - * #func = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# */ /**begin repeat1 diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src index 2d5917282..789733fb6 100644 --- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src @@ -1,6 +1,8 @@ /*@targets ** $maxopt baseline - ** sse2 vsx2 neon + ** sse2 sse41 + ** vsx2 + ** neon asimd **/ /** * Force use SSE only on x86, even if AVX2 or AVX512F are enabled @@ -65,6 +67,9 @@ NPY_FINLINE double c_square_f64(double a) #define c_sqrt_f64 npy_sqrt #endif +#define c_ceil_f32 npy_ceilf +#define c_ceil_f64 npy_ceil + /******************************************************************************** ** Defining the SIMD kernels ********************************************************************************/ @@ -134,10 +139,10 @@ NPY_FINLINE double c_square_f64(double a) */ #if @VCHK@ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #repl_0w1 = 0, 0, 0, 1# - * #RECIP_WORKAROUND = 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #repl_0w1 = 0, 0, 0, 0, 1# + * #RECIP_WORKAROUND = 0, 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# */ /**begin repeat2 * #STYPE = CONTIG, NCONTIG, CONTIG, NCONTIG# @@ -245,9 +250,9 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@ * #VCHK = NPY_SIMD, NPY_SIMD_F64# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #clear = 0, 1, 0, 0# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #clear = 0, 0, 1, 0, 0# */ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index d47be9a30..0e2c1ab8b 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -169,7 +169,7 @@ run_@func@_avx512_skx_@TYPE@(char **args, npy_intp const *dimensions, npy_intp c */ /**begin repeat2 - * #func = rint, floor, ceil, trunc# + * #func = rint, floor, trunc# */ #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS @@ -850,12 +850,6 @@ fma_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ -fma_ceil_@vsub@(@vtype@ x) -{ - return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ fma_trunc_@vsub@(@vtype@ x) { return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO); @@ -988,12 +982,6 @@ avx512_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ -avx512_ceil_@vsub@(@vtype@ x) -{ - return _mm512_roundscale_@vsub@(x, 0x0A); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ avx512_trunc_@vsub@(@vtype@ x) { return _mm512_roundscale_@vsub@(x, 0x0B); @@ -1327,8 +1315,8 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ @@ -1398,8 +1386,8 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 237af81b2..186f18a62 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -4926,65 +4926,6 @@ fail: /* - * TODO: The implementation below can be replaced with PyVectorcall_Call - * when available (should be Python 3.8+). - */ -static PyObject * -ufunc_generic_call( - PyUFuncObject *ufunc, PyObject *args, PyObject *kwds) -{ - Py_ssize_t len_args = PyTuple_GET_SIZE(args); - /* - * Wrapper for tp_call to tp_fastcall, to support both on older versions - * of Python. (and generally simplifying support of both versions in the - * same codebase. - */ - if (kwds == NULL) { - return ufunc_generic_fastcall(ufunc, - PySequence_Fast_ITEMS(args), len_args, NULL, NPY_FALSE); - } - - PyObject *new_args[NPY_MAXARGS]; - Py_ssize_t len_kwds = PyDict_Size(kwds); - - if (NPY_UNLIKELY(len_args + len_kwds > NPY_MAXARGS)) { - /* - * We do not have enough scratch-space, so we have to abort; - * In practice this error should not be seen by users. - */ - PyErr_Format(PyExc_ValueError, - "%s() takes from %d to %d positional arguments but " - "%zd were given", - ufunc_get_name_cstr(ufunc) , ufunc->nin, ufunc->nargs, len_args); - return NULL; - } - - /* Copy args into the scratch space */ - for (Py_ssize_t i = 0; i < len_args; i++) { - new_args[i] = PyTuple_GET_ITEM(args, i); - } - - PyObject *kwnames = PyTuple_New(len_kwds); - - PyObject *key, *value; - Py_ssize_t pos = 0; - Py_ssize_t i = 0; - while (PyDict_Next(kwds, &pos, &key, &value)) { - Py_INCREF(key); - PyTuple_SET_ITEM(kwnames, i, key); - new_args[i + len_args] = value; - i++; - } - - PyObject *res = ufunc_generic_fastcall(ufunc, - new_args, len_args, kwnames, NPY_FALSE); - Py_DECREF(kwnames); - return res; -} - - -#if PY_VERSION_HEX >= 0x03080000 -/* * Implement vectorcallfunc which should be defined with Python 3.8+. * In principle this could be backported, but the speed gain seems moderate * since ufunc calls often do not have keyword arguments and always have @@ -5001,7 +4942,6 @@ ufunc_generic_vectorcall(PyObject *ufunc, return ufunc_generic_fastcall((PyUFuncObject *)ufunc, args, PyVectorcall_NARGS(len_args), kwnames, NPY_FALSE); } -#endif /* PY_VERSION_HEX >= 0x03080000 */ NPY_NO_EXPORT PyObject * @@ -5178,11 +5118,7 @@ PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, voi ufunc->core_dim_flags = NULL; ufunc->userloops = NULL; ufunc->ptr = NULL; -#if PY_VERSION_HEX >= 0x03080000 ufunc->vectorcall = &ufunc_generic_vectorcall; -#else - ufunc->reserved2 = NULL; -#endif ufunc->reserved1 = 0; ufunc->iter_flags = 0; @@ -6437,19 +6373,15 @@ NPY_NO_EXPORT PyTypeObject PyUFunc_Type = { .tp_basicsize = sizeof(PyUFuncObject), .tp_dealloc = (destructor)ufunc_dealloc, .tp_repr = (reprfunc)ufunc_repr, - .tp_call = (ternaryfunc)ufunc_generic_call, + .tp_call = &PyVectorcall_Call, .tp_str = (reprfunc)ufunc_repr, .tp_flags = Py_TPFLAGS_DEFAULT | -#if PY_VERSION_HEX >= 0x03080000 _Py_TPFLAGS_HAVE_VECTORCALL | -#endif Py_TPFLAGS_HAVE_GC, .tp_traverse = (traverseproc)ufunc_traverse, .tp_methods = ufunc_methods, .tp_getset = ufunc_getset, -#if PY_VERSION_HEX >= 0x03080000 .tp_vectorcall_offset = offsetof(PyUFuncObject, vectorcall), -#endif }; /* End of code for ufunc objects */ diff --git a/numpy/core/tests/test_dlpack.py b/numpy/core/tests/test_dlpack.py new file mode 100644 index 000000000..f848b2008 --- /dev/null +++ b/numpy/core/tests/test_dlpack.py @@ -0,0 +1,109 @@ +import sys +import pytest + +import numpy as np +from numpy.testing import assert_array_equal, IS_PYPY + + +class TestDLPack: + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_dunder_dlpack_refcount(self): + x = np.arange(5) + y = x.__dlpack__() + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + def test_dunder_dlpack_stream(self): + x = np.arange(5) + x.__dlpack__(stream=None) + + with pytest.raises(RuntimeError): + x.__dlpack__(stream=1) + + def test_strides_not_multiple_of_itemsize(self): + dt = np.dtype([('int', np.int32), ('char', np.int8)]) + y = np.zeros((5,), dtype=dt) + z = y['int'] + + with pytest.raises(RuntimeError): + np._from_dlpack(z) + + @pytest.mark.skipif(IS_PYPY, reason="PyPy can't get refcounts.") + def test_from_dlpack_refcount(self): + x = np.arange(5) + y = np._from_dlpack(x) + assert sys.getrefcount(x) == 3 + del y + assert sys.getrefcount(x) == 2 + + @pytest.mark.parametrize("dtype", [ + np.int8, np.int16, np.int32, np.int64, + np.uint8, np.uint16, np.uint32, np.uint64, + np.float16, np.float32, np.float64, + np.complex64, np.complex128 + ]) + def test_dtype_passthrough(self, dtype): + x = np.arange(5, dtype=dtype) + y = np._from_dlpack(x) + + assert y.dtype == x.dtype + assert_array_equal(x, y) + + def test_invalid_dtype(self): + x = np.asarray(np.datetime64('2021-05-27')) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_invalid_byte_swapping(self): + dt = np.dtype('=i8').newbyteorder() + x = np.arange(5, dtype=dt) + + with pytest.raises(TypeError): + np._from_dlpack(x) + + def test_non_contiguous(self): + x = np.arange(25).reshape((5, 5)) + + y1 = x[0] + assert_array_equal(y1, np._from_dlpack(y1)) + + y2 = x[:, 0] + assert_array_equal(y2, np._from_dlpack(y2)) + + y3 = x[1, :] + assert_array_equal(y3, np._from_dlpack(y3)) + + y4 = x[1] + assert_array_equal(y4, np._from_dlpack(y4)) + + y5 = np.diagonal(x).copy() + assert_array_equal(y5, np._from_dlpack(y5)) + + @pytest.mark.parametrize("ndim", range(33)) + def test_higher_dims(self, ndim): + shape = (1,) * ndim + x = np.zeros(shape, dtype=np.float64) + + assert shape == np._from_dlpack(x).shape + + def test_dlpack_device(self): + x = np.arange(5) + assert x.__dlpack_device__() == (1, 0) + assert np._from_dlpack(x).__dlpack_device__() == (1, 0) + + def dlpack_deleter_exception(self): + x = np.arange(5) + _ = x.__dlpack__() + raise RuntimeError + + def test_dlpack_destructor_exception(self): + with pytest.raises(RuntimeError): + self.dlpack_deleter_exception() + + def test_readonly(self): + x = np.arange(5) + x.flags.writeable = False + with pytest.raises(TypeError): + x.__dlpack__() diff --git a/numpy/core/tests/test_mem_policy.py b/numpy/core/tests/test_mem_policy.py index 7fec8897f..abf340062 100644 --- a/numpy/core/tests/test_mem_policy.py +++ b/numpy/core/tests/test_mem_policy.py @@ -179,6 +179,7 @@ def get_module(tmp_path): }; static PyDataMem_Handler secret_data_handler = { "secret_data_allocator", + 1, { &secret_data_handler_ctx, /* ctx */ shift_alloc, /* malloc */ @@ -212,17 +213,22 @@ def get_module(tmp_path): def test_set_policy(get_module): get_handler_name = np.core.multiarray.get_handler_name + get_handler_version = np.core.multiarray.get_handler_version orig_policy_name = get_handler_name() a = np.arange(10).reshape((2, 5)) # a doesn't own its own data assert get_handler_name(a) is None + assert get_handler_version(a) is None assert get_handler_name(a.base) == orig_policy_name + assert get_handler_version(a.base) == 1 orig_policy = get_module.set_secret_data_policy() b = np.arange(10).reshape((2, 5)) # b doesn't own its own data assert get_handler_name(b) is None + assert get_handler_version(b) is None assert get_handler_name(b.base) == 'secret_data_allocator' + assert get_handler_version(b.base) == 1 if orig_policy_name == 'default_allocator': get_module.set_old_policy(None) # tests PyDataMem_SetHandler(NULL) diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index d7df386fe..85bf5d0d1 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -196,7 +196,6 @@ class _Config: native = '-march=native', opt = '-O3', werror = '-Werror', - cxx = '-std=c++11', ), clang = dict( native = '-march=native', @@ -207,25 +206,21 @@ class _Config: # "unused arguments" warnings. # see https://github.com/numpy/numpy/issues/19624 werror = '-Werror=switch -Werror', - cxx = '-std=c++11', ), icc = dict( native = '-xHost', opt = '-O3', werror = '-Werror', - cxx = '-std=c++11', ), iccw = dict( native = '/QxHost', opt = '/O3', werror = '/Werror', - cxx = '-std=c++11', ), msvc = dict( native = None, opt = '/O2', werror = '/WX', - cxx = '-std=c++11', ) ) conf_min_features = dict( diff --git a/numpy/distutils/mingw32ccompiler.py b/numpy/distutils/mingw32ccompiler.py index 82d296434..fbe3655c9 100644 --- a/numpy/distutils/mingw32ccompiler.py +++ b/numpy/distutils/mingw32ccompiler.py @@ -24,7 +24,6 @@ from numpy.distutils import log # 3. Force windows to use g77 import distutils.cygwinccompiler -from distutils.version import StrictVersion from distutils.unixccompiler import UnixCCompiler from distutils.msvccompiler import get_build_version as get_build_msvc_version from distutils.errors import UnknownFileError @@ -62,35 +61,6 @@ class Mingw32CCompiler(distutils.cygwinccompiler.CygwinCCompiler): distutils.cygwinccompiler.CygwinCCompiler.__init__ (self, verbose, dry_run, force) - # we need to support 3.2 which doesn't match the standard - # get_versions methods regex - if self.gcc_version is None: - try: - out_string = subprocess.check_output(['gcc', '-dumpversion']) - except (OSError, CalledProcessError): - out_string = "" # ignore failures to match old behavior - result = re.search(r'(\d+\.\d+)', out_string) - if result: - self.gcc_version = StrictVersion(result.group(1)) - - # A real mingw32 doesn't need to specify a different entry point, - # but cygwin 2.91.57 in no-cygwin-mode needs it. - if self.gcc_version <= "2.91.57": - entry_point = '--entry _DllMain@12' - else: - entry_point = '' - - if self.linker_dll == 'dllwrap': - # Commented out '--driver-name g++' part that fixes weird - # g++.exe: g++: No such file or directory - # error (mingw 1.0 in Enthon24 tree, gcc-3.4.5). - # If the --driver-name part is required for some environment - # then make the inclusion of this part specific to that - # environment. - self.linker = 'dllwrap' # --driver-name g++' - elif self.linker_dll == 'gcc': - self.linker = 'g++' - # **changes: eric jones 4/11/01 # 1. Check for import library on Windows. Build if it doesn't exist. @@ -113,42 +83,18 @@ class Mingw32CCompiler(distutils.cygwinccompiler.CygwinCCompiler): # kind of bad consequences, like using Py_ModuleInit4 instead of # Py_ModuleInit4_64, etc... So we add it here if get_build_architecture() == 'AMD64': - if self.gcc_version < "4.0": - self.set_executables( - compiler='gcc -g -DDEBUG -DMS_WIN64 -mno-cygwin -O0 -Wall', - compiler_so='gcc -g -DDEBUG -DMS_WIN64 -mno-cygwin -O0' - ' -Wall -Wstrict-prototypes', - linker_exe='gcc -g -mno-cygwin', - linker_so='gcc -g -mno-cygwin -shared') - else: - # gcc-4 series releases do not support -mno-cygwin option - self.set_executables( - compiler='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall', - compiler_so='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall -Wstrict-prototypes', - linker_exe='gcc -g', - linker_so='gcc -g -shared') + self.set_executables( + compiler='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall', + compiler_so='gcc -g -DDEBUG -DMS_WIN64 -O0 -Wall ' + '-Wstrict-prototypes', + linker_exe='gcc -g', + linker_so='gcc -g -shared') else: - if self.gcc_version <= "3.0.0": - self.set_executables( - compiler='gcc -mno-cygwin -O2 -w', - compiler_so='gcc -mno-cygwin -mdll -O2 -w' - ' -Wstrict-prototypes', - linker_exe='g++ -mno-cygwin', - linker_so='%s -mno-cygwin -mdll -static %s' % - (self.linker, entry_point)) - elif self.gcc_version < "4.0": - self.set_executables( - compiler='gcc -mno-cygwin -O2 -Wall', - compiler_so='gcc -mno-cygwin -O2 -Wall' - ' -Wstrict-prototypes', - linker_exe='g++ -mno-cygwin', - linker_so='g++ -mno-cygwin -shared') - else: - # gcc-4 series releases do not support -mno-cygwin option - self.set_executables(compiler='gcc -O2 -Wall', - compiler_so='gcc -O2 -Wall -Wstrict-prototypes', - linker_exe='g++ ', - linker_so='g++ -shared') + self.set_executables( + compiler='gcc -O2 -Wall', + compiler_so='gcc -O2 -Wall -Wstrict-prototypes', + linker_exe='g++ ', + linker_so='g++ -shared') # added for python2.3 support # we can't pass it through set_executables because pre 2.2 would fail self.compiler_cxx = ['g++'] @@ -198,10 +144,7 @@ class Mingw32CCompiler(distutils.cygwinccompiler.CygwinCCompiler): extra_postargs, build_temp, target_lang) - if self.gcc_version < "3.0.0": - func = distutils.cygwinccompiler.CygwinCCompiler.link - else: - func = UnixCCompiler.link + func = UnixCCompiler.link func(*args[:func.__code__.co_argcount]) return diff --git a/numpy/distutils/unixccompiler.py b/numpy/distutils/unixccompiler.py index 733a9fc50..4884960fd 100644 --- a/numpy/distutils/unixccompiler.py +++ b/numpy/distutils/unixccompiler.py @@ -5,6 +5,7 @@ unixccompiler - can handle very long argument lists for ar. import os import sys import subprocess +import shlex from distutils.errors import CompileError, DistutilsExecError, LibError from distutils.unixccompiler import UnixCCompiler @@ -30,15 +31,15 @@ def UnixCCompiler__compile(self, obj, src, ext, cc_args, extra_postargs, pp_opts if 'OPT' in os.environ: # XXX who uses this? from sysconfig import get_config_vars - opt = " ".join(os.environ['OPT'].split()) - gcv_opt = " ".join(get_config_vars('OPT')[0].split()) - ccomp_s = " ".join(self.compiler_so) + opt = shlex.join(shlex.split(os.environ['OPT'])) + gcv_opt = shlex.join(shlex.split(get_config_vars('OPT')[0])) + ccomp_s = shlex.join(self.compiler_so) if opt not in ccomp_s: ccomp_s = ccomp_s.replace(gcv_opt, opt) - self.compiler_so = ccomp_s.split() - llink_s = " ".join(self.linker_so) + self.compiler_so = shlex.split(ccomp_s) + llink_s = shlex.join(self.linker_so) if opt not in llink_s: - self.linker_so = llink_s.split() + opt.split() + self.linker_so = self.linker_so + shlex.split(opt) display = '%s: %s' % (os.path.basename(self.compiler_so[0]), src) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 4d57acdb2..3c9983edf 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -67,7 +67,7 @@ __all__ = [ # fix_gamma : Callable # A function used for discret methods to force the index to a specific value. _QuantileInterpolation = dict( - # --- HYNDMAN and FAN methods + # --- HYNDMAN AND FAN METHODS # Discrete methods inverted_cdf=dict( get_virtual_index=lambda n, quantiles: _inverted_cdf(n, quantiles), @@ -102,10 +102,12 @@ _QuantileInterpolation = dict( _compute_virtual_index(n, quantiles, 0, 0), fix_gamma=lambda gamma, _: gamma, ), - # Default value + # Default method. + # To avoid some rounding issues, `(n-1) * quantiles` is preferred to + # `_compute_virtual_index(n, quantiles, 1, 1)`. + # They are mathematically equivalent. linear=dict( - get_virtual_index=lambda n, quantiles: - _compute_virtual_index(n, quantiles, 1, 1), + get_virtual_index=lambda n, quantiles: (n - 1) * quantiles, fix_gamma=lambda gamma, _: gamma, ), median_unbiased=dict( @@ -118,7 +120,7 @@ _QuantileInterpolation = dict( _compute_virtual_index(n, quantiles, 3 / 8.0, 3 / 8.0), fix_gamma=lambda gamma, _: gamma, ), - # --- OTHER METHODS fixme add deprecated ? + # --- OTHER METHODS lower=dict( get_virtual_index=lambda n, quantiles: np.floor( (n - 1) * quantiles).astype(np.intp), @@ -3901,7 +3903,7 @@ def percentile(a, * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' New options: @@ -3911,7 +3913,7 @@ def percentile(a, * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' @@ -4007,8 +4009,7 @@ def percentile(a, * alpha = 0 * beta = 0 - inclusive: - Default method, aliased with "linear". + linear: method 7 of H&F [1]_. This method give continuous results using: * alpha = 1 @@ -4164,7 +4165,7 @@ def quantile(a, * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' New options: @@ -4174,7 +4175,7 @@ def quantile(a, * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' @@ -4261,8 +4262,7 @@ def quantile(a, * alpha = 0 * beta = 0 - inclusive: - Default method, aliased with "linear". + linear: method 7 of H&F [1]_. This method give continuous results using: * alpha = 1 diff --git a/numpy/lib/function_base.pyi b/numpy/lib/function_base.pyi index 4bbd873a3..82c625fed 100644 --- a/numpy/lib/function_base.pyi +++ b/numpy/lib/function_base.pyi @@ -514,7 +514,6 @@ _InterpolationKind = L[ "higher", "midpoint", "nearest", - "inclusive", ] @overload diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index 9fab77f45..7e953be03 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -1277,7 +1277,7 @@ def nanpercentile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1287,7 +1287,7 @@ def nanpercentile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' @@ -1418,7 +1418,7 @@ def nanquantile( * (NPY 2): 'higher', * (NPY 3): 'midpoint' * (NPY 4): 'nearest' - * (NPY 5): 'linear', aliased with 'inclusive' (default) + * (NPY 5): 'linear' (default) New options: @@ -1428,7 +1428,7 @@ def nanquantile( * (H&F 4): 'interpolated_inverted_cdf' * (H&F 5): 'hazen' * (H&F 6): 'weibull' - * (H&F 7): 'inclusive', aliased with 'linear' (default) + * (H&F 7): 'linear' (default) * (H&F 8): 'median_unbiased' * (H&F 9): 'normal_unbiased' diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index d5fa012f1..1c274afae 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -3356,6 +3356,14 @@ class TestPercentile: class TestQuantile: # most of this is already tested by TestPercentile + def test_max_ulp(self): + x = [0.0, 0.2, 0.4] + a = np.quantile(x, 0.45) + # The default linear method would result in 0 + 0.2 * (0.45/2) = 0.18. + # 0.18 is not exactly representable and the formula leads to a 1 ULP + # different result. Ensure it is this exact within 1 ULP, see gh-20331. + np.testing.assert_array_max_ulp(a, 0.18, maxulp=1) + def test_basic(self): x = np.arange(8) * 0.5 assert_equal(np.quantile(x, 0), 0.) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 5347ea125..7087b6e1d 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -3107,7 +3107,7 @@ cdef class Generator: `a` > 1. The Zipf distribution (also known as the zeta distribution) is a - continuous probability distribution that satisfies Zipf's law: the + discrete probability distribution that satisfies Zipf's law: the frequency of an item is inversely proportional to its rank in a frequency table. @@ -3135,9 +3135,10 @@ cdef class Generator: ----- The probability density for the Zipf distribution is - .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, + .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)}, - where :math:`\\zeta` is the Riemann Zeta function. + for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta + function. It is named for the American linguist George Kingsley Zipf, who noted that the frequency of any word in a sample of a language is inversely @@ -3153,22 +3154,29 @@ cdef class Generator: -------- Draw samples from the distribution: - >>> a = 2. # parameter - >>> s = np.random.default_rng().zipf(a, 1000) + >>> a = 4.0 + >>> n = 20000 + >>> s = np.random.default_rng().zipf(a, size=n) Display the histogram of the samples, along with - the probability density function: + the expected histogram based on the probability + density function: >>> import matplotlib.pyplot as plt - >>> from scipy import special # doctest: +SKIP + >>> from scipy.special import zeta # doctest: +SKIP + + `bincount` provides a fast histogram for small integers. - Truncate s values at 50 so plot is interesting: + >>> count = np.bincount(s) + >>> k = np.arange(1, s.max() + 1) - >>> count, bins, ignored = plt.hist(s[s<50], - ... 50, density=True) - >>> x = np.arange(1., 50.) - >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP - >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP + >>> plt.bar(k, count[1:], alpha=0.5, label='sample count') + >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5, + ... label='expected count') # doctest: +SKIP + >>> plt.semilogy() + >>> plt.grid(alpha=0.4) + >>> plt.legend() + >>> plt.title(f'Zipf sample, a={a}, size={n}') >>> plt.show() """ diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index 81a526ab4..3e13503d0 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -3609,7 +3609,7 @@ cdef class RandomState: `a` > 1. The Zipf distribution (also known as the zeta distribution) is a - continuous probability distribution that satisfies Zipf's law: the + discrete probability distribution that satisfies Zipf's law: the frequency of an item is inversely proportional to its rank in a frequency table. @@ -3642,9 +3642,10 @@ cdef class RandomState: ----- The probability density for the Zipf distribution is - .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, + .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)}, - where :math:`\\zeta` is the Riemann Zeta function. + for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta + function. It is named for the American linguist George Kingsley Zipf, who noted that the frequency of any word in a sample of a language is inversely @@ -3660,21 +3661,29 @@ cdef class RandomState: -------- Draw samples from the distribution: - >>> a = 2. # parameter - >>> s = np.random.zipf(a, 1000) + >>> a = 4.0 + >>> n = 20000 + >>> s = np.random.zipf(a, n) Display the histogram of the samples, along with - the probability density function: + the expected histogram based on the probability + density function: >>> import matplotlib.pyplot as plt - >>> from scipy import special # doctest: +SKIP + >>> from scipy.special import zeta # doctest: +SKIP + + `bincount` provides a fast histogram for small integers. - Truncate s values at 50 so plot is interesting: + >>> count = np.bincount(s) + >>> k = np.arange(1, s.max() + 1) - >>> count, bins, ignored = plt.hist(s[s<50], 50, density=True) - >>> x = np.arange(1., 50.) - >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP - >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP + >>> plt.bar(k, count[1:], alpha=0.5, label='sample count') + >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5, + ... label='expected count') # doctest: +SKIP + >>> plt.semilogy() + >>> plt.grid(alpha=0.4) + >>> plt.legend() + >>> plt.title(f'Zipf sample, a={a}, size={n}') >>> plt.show() """ diff --git a/numpy/typing/_generic_alias.py b/numpy/typing/_generic_alias.py index 932f12dd0..1eb2c8c05 100644 --- a/numpy/typing/_generic_alias.py +++ b/numpy/typing/_generic_alias.py @@ -185,6 +185,8 @@ class _GenericAlias: "__mro_entries__", "__reduce__", "__reduce_ex__", + "__copy__", + "__deepcopy__", }) def __getattribute__(self, name: str) -> Any: diff --git a/numpy/typing/tests/test_generic_alias.py b/numpy/typing/tests/test_generic_alias.py index 3021d9859..39343420b 100644 --- a/numpy/typing/tests/test_generic_alias.py +++ b/numpy/typing/tests/test_generic_alias.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +import copy import types import pickle import weakref @@ -80,6 +81,21 @@ class TestGenericAlias: value_ref = func(NDArray_ref) assert value == value_ref + @pytest.mark.parametrize("name,func", [ + ("__copy__", lambda n: n == copy.copy(n)), + ("__deepcopy__", lambda n: n == copy.deepcopy(n)), + ]) + def test_copy(self, name: str, func: FuncType) -> None: + value = func(NDArray) + + # xref bpo-45167 + GE_398 = ( + sys.version_info[:2] == (3, 9) and sys.version_info >= (3, 9, 8) + ) + if GE_398 or sys.version_info >= (3, 10, 1): + value_ref = func(NDArray_ref) + assert value == value_ref + def test_weakref(self) -> None: """Test ``__weakref__``.""" value = weakref.ref(NDArray)() @@ -30,8 +30,7 @@ import re # Python supported version checks. Keep right after stdlib imports to ensure we # get a sensible error for older Python versions -# This needs to be changed to 3.8 for 1.22 release, but 3.7 is needed for LGTM. -if sys.version_info[:2] < (3, 7): +if sys.version_info[:2] < (3, 8): raise RuntimeError("Python version >= 3.8 required.") |
