diff options
Diffstat (limited to 'numpy')
104 files changed, 5985 insertions, 488 deletions
diff --git a/numpy/__init__.py b/numpy/__init__.py index e6a24f0d1..550fb1772 100644 --- a/numpy/__init__.py +++ b/numpy/__init__.py @@ -136,6 +136,9 @@ else: __all__ = ['ModuleDeprecationWarning', 'VisibleDeprecationWarning'] + # mapping of {name: (value, deprecation_msg)} + __deprecated_attrs__ = {} + # Allow distributors to run custom init code from . import _distributor_init @@ -156,11 +159,35 @@ else: from . import matrixlib as _mat from .matrixlib import * - # Make these accessible from numpy name-space - # but not imported in from numpy import * - # TODO[gh-6103]: Deprecate these - from builtins import bool, int, float, complex, object, str - from .compat import long, unicode + # Deprecations introduced in NumPy 1.20.0, 2020-06-06 + import builtins as _builtins + __deprecated_attrs__.update({ + n: ( + getattr(_builtins, n), + "`np.{n}` is a deprecated alias for the builtin `{n}`. " + "Use `{n}` by itself, which is identical in behavior, to silence " + "this warning. " + "If you specifically wanted the numpy scalar type, use `np.{n}_` " + "here." + .format(n=n) + ) + for n in ["bool", "int", "float", "complex", "object", "str"] + }) + __deprecated_attrs__.update({ + n: ( + getattr(compat, n), + "`np.{n}` is a deprecated alias for `np.compat.{n}`. " + "Use `np.compat.{n}` by itself, which is identical in behavior, " + "to silence this warning. " + "In the likely event your code does not need to work on Python 2 " + "you can use the builtin ``{n2}`` for which ``np.compat.{n}`` is " + "itself an alias. " + "If you specifically wanted the numpy scalar type, use `np.{n2}_` " + "here." + .format(n=n, n2=n2) + ) + for n, n2 in [("long", "int"), ("unicode", "str")] + }) from .core import round, abs, max, min # now that numpy modules are imported, can initialize limits @@ -172,8 +199,10 @@ else: __all__.extend(lib.__all__) __all__.extend(['linalg', 'fft', 'random', 'ctypeslib', 'ma']) - # These are added by `from .core import *` and `core.__all__`, but we - # overwrite them above with builtins we do _not_ want to export. + # These are exported by np.core, but are replaced by the builtins below + # remove them to ensure that we don't end up with `np.long == np.int_`, + # which would be a breaking change. + del long, unicode __all__.remove('long') __all__.remove('unicode') @@ -196,25 +225,33 @@ else: numarray = 'removed' if sys.version_info[:2] >= (3, 7): - # Importing Tester requires importing all of UnitTest which is not a - # cheap import Since it is mainly used in test suits, we lazy import it - # here to save on the order of 10 ms of import time for most users - # - # The previous way Tester was imported also had a side effect of adding - # the full `numpy.testing` namespace - # # module level getattr is only supported in 3.7 onwards # https://www.python.org/dev/peps/pep-0562/ def __getattr__(attr): + # Emit warnings for deprecated attributes + try: + val, msg = __deprecated_attrs__[attr] + except KeyError: + pass + else: + warnings.warn(msg, DeprecationWarning, stacklevel=2) + return val + + # Importing Tester requires importing all of UnitTest which is not a + # cheap import Since it is mainly used in test suits, we lazy import it + # here to save on the order of 10 ms of import time for most users + # + # The previous way Tester was imported also had a side effect of adding + # the full `numpy.testing` namespace if attr == 'testing': import numpy.testing as testing return testing elif attr == 'Tester': from .testing import Tester return Tester - else: - raise AttributeError("module {!r} has no attribute " - "{!r}".format(__name__, attr)) + + raise AttributeError("module {!r} has no attribute " + "{!r}".format(__name__, attr)) def __dir__(): return list(globals().keys() | {'Tester', 'testing'}) @@ -224,6 +261,13 @@ else: # no-one else in the world is using it (though I hope not) from .testing import Tester + # We weren't able to emit a warning about these, so keep them around + globals().update({ + k: v + for k, (v, msg) in __deprecated_attrs__.items() + }) + + # Pytest testing from numpy._pytesttester import PytestTester test = PytestTester(__name__) @@ -279,12 +323,11 @@ else: error_message = "{}: {}".format(w[-1].category.__name__, str(w[-1].message)) msg = ( "Polyfit sanity test emitted a warning, most likely due " - "to using a buggy Accelerate backend. " - "If you compiled yourself, " - "see site.cfg.example for information. " + "to using a buggy Accelerate backend. If you compiled " + "yourself, more information is available at " + "https://numpy.org/doc/stable/user/building.html#accelerated-blas-lapack-libraries " "Otherwise report this to the vendor " - "that provided NumPy.\n{}\n".format( - error_message)) + "that provided NumPy.\n{}\n".format(error_message)) raise RuntimeError(msg) del _mac_os_check diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi index 5031893ed..f9218391e 100644 --- a/numpy/__init__.pyi +++ b/numpy/__init__.pyi @@ -51,7 +51,12 @@ _NdArraySubClass = TypeVar("_NdArraySubClass", bound=ndarray) class dtype: names: Optional[Tuple[str, ...]] - def __init__(self, obj: DtypeLike, align: bool = ..., copy: bool = ...) -> None: ... + def __init__( + self, + dtype: DtypeLike, + align: bool = ..., + copy: bool = ..., + ) -> None: ... def __eq__(self, other: DtypeLike) -> bool: ... def __ne__(self, other: DtypeLike) -> bool: ... def __gt__(self, other: DtypeLike) -> bool: ... @@ -382,18 +387,18 @@ class _real_generic(generic): # type: ignore class number(generic): ... # type: ignore class bool_(_real_generic): - def __init__(self, value: object = ...) -> None: ... + def __init__(self, __value: object = ...) -> None: ... class object_(generic): - def __init__(self, value: object = ...) -> None: ... + def __init__(self, __value: object = ...) -> None: ... class datetime64: @overload def __init__( - self, _data: Union[datetime64, str, dt.datetime] = ..., _format: str = ... + self, __value: Union[datetime64, str, dt.datetime] = ..., __format: str = ... ) -> None: ... @overload - def __init__(self, _data: int, _format: str) -> None: ... + def __init__(self, __value: int, __format: str) -> None: ... def __add__(self, other: Union[timedelta64, int]) -> datetime64: ... def __sub__(self, other: Union[timedelta64, datetime64, int]) -> timedelta64: ... @@ -401,19 +406,19 @@ class integer(number, _real_generic): ... # type: ignore class signedinteger(integer): ... # type: ignore class int8(signedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class int16(signedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class int32(signedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class int64(signedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class timedelta64(signedinteger): - def __init__(self, _data: Any = ..., _format: str = ...) -> None: ... + def __init__(self, __value: Any = ..., __format: str = ...) -> None: ... @overload def __add__(self, other: Union[timedelta64, int]) -> timedelta64: ... @overload @@ -433,34 +438,34 @@ class timedelta64(signedinteger): class unsignedinteger(integer): ... # type: ignore class uint8(unsignedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class uint16(unsignedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class uint32(unsignedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class uint64(unsignedinteger): - def __init__(self, value: SupportsInt = ...) -> None: ... + def __init__(self, __value: SupportsInt = ...) -> None: ... class inexact(number): ... # type: ignore class floating(inexact, _real_generic): ... # type: ignore class float16(floating): - def __init__(self, value: SupportsFloat = ...) -> None: ... + def __init__(self, __value: SupportsFloat = ...) -> None: ... class float32(floating): - def __init__(self, value: SupportsFloat = ...) -> None: ... + def __init__(self, __value: SupportsFloat = ...) -> None: ... class float64(floating): - def __init__(self, value: SupportsFloat = ...) -> None: ... + def __init__(self, __value: SupportsFloat = ...) -> None: ... class complexfloating(inexact): ... # type: ignore class complex64(complexfloating): def __init__( - self, value: Union[SupportsInt, SupportsFloat, SupportsComplex] = ... + self, __value: Union[SupportsInt, SupportsFloat, SupportsComplex] = ... ) -> None: ... @property def real(self) -> float32: ... @@ -469,7 +474,7 @@ class complex64(complexfloating): class complex128(complexfloating): def __init__( - self, value: Union[SupportsInt, SupportsFloat, SupportsComplex] = ... + self, __value: Union[SupportsInt, SupportsFloat, SupportsComplex] = ... ) -> None: ... @property def real(self) -> float64: ... @@ -479,24 +484,24 @@ class complex128(complexfloating): class flexible(_real_generic): ... # type: ignore class void(flexible): - def __init__(self, value: Union[int, integer, bool_, bytes, bytes_]): ... + def __init__(self, __value: Union[int, integer, bool_, bytes, bytes_]): ... class character(_real_generic): ... # type: ignore class bytes_(character): @overload - def __init__(self, value: object = ...) -> None: ... + def __init__(self, __value: object = ...) -> None: ... @overload def __init__( - self, value: object, encoding: str = ..., errors: str = ... + self, __value: Union[str, str_], encoding: str = ..., errors: str = ... ) -> None: ... class str_(character): @overload - def __init__(self, value: object = ...) -> None: ... + def __init__(self, __value: object = ...) -> None: ... @overload def __init__( - self, value: object, encoding: str = ..., errors: str = ... + self, __value: Union[bytes, bytes_], encoding: str = ..., errors: str = ... ) -> None: ... # TODO(alan): Platform dependent types @@ -936,21 +941,18 @@ def reshape(a: ArrayLike, newshape: _ShapeLike, order: _Order = ...) -> ndarray: @overload def choose( a: _ScalarIntOrBool, - choices: Union[Sequence[ArrayLike], ndarray], + choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ..., ) -> _ScalarIntOrBool: ... @overload def choose( - a: _IntOrBool, - choices: Union[Sequence[ArrayLike], ndarray], - out: Optional[ndarray] = ..., - mode: _Mode = ..., + a: _IntOrBool, choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ... ) -> Union[integer, bool_]: ... @overload def choose( a: _ArrayLikeIntOrBool, - choices: Union[Sequence[ArrayLike], ndarray], + choices: ArrayLike, out: Optional[ndarray] = ..., mode: _Mode = ..., ) -> ndarray: ... @@ -960,9 +962,7 @@ def repeat( def put( a: ndarray, ind: _ArrayLikeIntOrBool, v: ArrayLike, mode: _Mode = ... ) -> None: ... -def swapaxes( - a: Union[Sequence[ArrayLike], ndarray], axis1: int, axis2: int -) -> ndarray: ... +def swapaxes(a: ArrayLike, axis1: int, axis2: int) -> ndarray: ... def transpose( a: ArrayLike, axes: Union[None, Sequence[int], ndarray] = ... ) -> ndarray: ... @@ -998,54 +998,42 @@ def argpartition( order: Union[None, str, Sequence[str]] = ..., ) -> ndarray: ... def sort( - a: Union[Sequence[ArrayLike], ndarray], + a: ArrayLike, axis: Optional[int] = ..., kind: Optional[_SortKind] = ..., order: Union[None, str, Sequence[str]] = ..., ) -> ndarray: ... def argsort( - a: Union[Sequence[ArrayLike], ndarray], + a: ArrayLike, axis: Optional[int] = ..., kind: Optional[_SortKind] = ..., order: Union[None, str, Sequence[str]] = ..., ) -> ndarray: ... @overload -def argmax( - a: Union[Sequence[ArrayLike], ndarray], - axis: None = ..., - out: Optional[ndarray] = ..., -) -> integer: ... +def argmax(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... @overload def argmax( - a: Union[Sequence[ArrayLike], ndarray], - axis: int = ..., - out: Optional[ndarray] = ..., + a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... ) -> Union[integer, ndarray]: ... @overload -def argmin( - a: Union[Sequence[ArrayLike], ndarray], - axis: None = ..., - out: Optional[ndarray] = ..., -) -> integer: ... +def argmin(a: ArrayLike, axis: None = ..., out: Optional[ndarray] = ...) -> integer: ... @overload def argmin( - a: Union[Sequence[ArrayLike], ndarray], - axis: int = ..., - out: Optional[ndarray] = ..., + a: ArrayLike, axis: int = ..., out: Optional[ndarray] = ... ) -> Union[integer, ndarray]: ... @overload def searchsorted( - a: Union[Sequence[ArrayLike], ndarray], + a: ArrayLike, v: _Scalar, side: _Side = ..., - sorter: Union[None, Sequence[_IntOrBool], ndarray] = ..., # 1D int array + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array ) -> integer: ... @overload def searchsorted( - a: Union[Sequence[ArrayLike], ndarray], + a: ArrayLike, v: ArrayLike, side: _Side = ..., - sorter: Union[None, Sequence[_IntOrBool], ndarray] = ..., # 1D int array + sorter: Optional[_ArrayLikeIntOrBool] = ..., # 1D int array ) -> ndarray: ... def resize(a: ArrayLike, new_shape: _ShapeLike) -> ndarray: ... @overload @@ -1053,13 +1041,10 @@ def squeeze(a: _ScalarGeneric, axis: Optional[_ShapeLike] = ...) -> _ScalarGener @overload def squeeze(a: ArrayLike, axis: Optional[_ShapeLike] = ...) -> ndarray: ... def diagonal( - a: Union[Sequence[Sequence[ArrayLike]], ndarray], # >= 2D array - offset: int = ..., - axis1: int = ..., - axis2: int = ..., + a: ArrayLike, offset: int = ..., axis1: int = ..., axis2: int = ... # >= 2D array ) -> ndarray: ... def trace( - a: Union[Sequence[Sequence[ArrayLike]], ndarray], # >= 2D array + a: ArrayLike, # >= 2D array offset: int = ..., axis1: int = ..., axis2: int = ..., @@ -1070,7 +1055,7 @@ def ravel(a: ArrayLike, order: _Order = ...) -> ndarray: ... def nonzero(a: ArrayLike) -> Tuple[ndarray, ...]: ... def shape(a: ArrayLike) -> _Shape: ... def compress( - condition: Union[Sequence[_Bool], ndarray], # 1D bool array + condition: ArrayLike, # 1D bool array a: ArrayLike, axis: Optional[int] = ..., out: Optional[ndarray] = ..., diff --git a/numpy/_pytesttester.py b/numpy/_pytesttester.py index ca86aeb22..1c32367f3 100644 --- a/numpy/_pytesttester.py +++ b/numpy/_pytesttester.py @@ -35,12 +35,27 @@ __all__ = ['PytestTester'] def _show_numpy_info(): + from numpy.core._multiarray_umath import ( + __cpu_features__, __cpu_baseline__, __cpu_dispatch__ + ) import numpy as np print("NumPy version %s" % np.__version__) relaxed_strides = np.ones((10, 1), order="C").flags.f_contiguous print("NumPy relaxed strides checking option:", relaxed_strides) + if len(__cpu_baseline__) == 0 and len(__cpu_dispatch__) == 0: + enabled_features = "nothing enabled" + else: + enabled_features = ' '.join(__cpu_baseline__) + for feature in __cpu_dispatch__: + if __cpu_features__[feature]: + enabled_features += " %s*" % feature + else: + enabled_features += " %s?" % feature + print("NumPy CPU features:", enabled_features) + + class PytestTester: """ diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index 688238af3..33c1f08b1 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1307,14 +1307,14 @@ add_newdoc('numpy.core.multiarray', 'arange', Parameters ---------- - start : number, optional + start : integer or real, optional Start of interval. The interval includes this value. The default start value is 0. - stop : number + stop : integer or real End of interval. The interval does not include this value, except in some cases where `step` is not an integer and floating point round-off affects the length of `out`. - step : number, optional + step : integer or real, optional Spacing between values. For any output `out`, this is the distance between two adjacent values, ``out[i+1] - out[i]``. The default step size is 1. If `step` is specified as a position argument, @@ -1525,7 +1525,7 @@ add_newdoc('numpy.core.multiarray', 'c_einsum', Controls the memory layout of the output. 'C' means it should be C contiguous. 'F' means it should be Fortran contiguous, 'A' means it should be 'F' if the inputs are all 'F', 'C' otherwise. - 'K' means it should be as close to the layout as the inputs as + 'K' means it should be as close to the layout of the inputs as is possible, including arbitrarily permuted axes. Default is 'K'. casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional @@ -3936,18 +3936,17 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('tobytes', """ Construct Python bytes containing the raw data bytes in the array. Constructs Python bytes showing a copy of the raw contents of - data memory. The bytes object can be produced in either 'C' or 'Fortran', - or 'Any' order (the default is 'C'-order). 'Any' order means C-order - unless the F_CONTIGUOUS flag in the array is set, in which case it - means 'Fortran' order. + data memory. The bytes object is produced in C-order by default. + This behavior is controlled by the ``order`` parameter. .. versionadded:: 1.9.0 Parameters ---------- - order : {'C', 'F', None}, optional - Order of the data for multidimensional arrays: - C, Fortran, or the same as for the original array. + order : {'C', 'F', 'A'}, optional + Controls the memory layout of the bytes object. 'C' means C-order, + 'F' means F-order, 'A' (short for *Any*) means 'F' if `a` is + Fortran contiguous, 'C' otherwise. Default is 'C'. Returns ------- @@ -5142,7 +5141,7 @@ add_newdoc('numpy.core', 'ufunc', ('at', add_newdoc('numpy.core.multiarray', 'dtype', """ - dtype(obj, align=False, copy=False) + dtype(dtype, align=False, copy=False) Create a data type object. @@ -5152,7 +5151,7 @@ add_newdoc('numpy.core.multiarray', 'dtype', Parameters ---------- - obj + dtype Object to be converted to a data type object. align : bool, optional Add padding to the fields to match what a C compiler would output diff --git a/numpy/core/_type_aliases.py b/numpy/core/_type_aliases.py index c26431443..de90fd818 100644 --- a/numpy/core/_type_aliases.py +++ b/numpy/core/_type_aliases.py @@ -11,40 +11,19 @@ and sometimes other mappings too. .. data:: sctypeDict Similar to `allTypes`, but maps a broader set of aliases to their types. -.. data:: sctypeNA - NumArray-compatible names for the scalar types. Contains not only - ``name: type`` mappings, but ``char: name`` mappings too. - - .. deprecated:: 1.16 - .. data:: sctypes A dictionary keyed by a "type group" string, providing a list of types under that group. """ -import warnings from numpy.compat import unicode -from numpy._globals import VisibleDeprecationWarning -from numpy.core._string_helpers import english_lower, english_capitalize +from numpy.core._string_helpers import english_lower from numpy.core.multiarray import typeinfo, dtype from numpy.core._dtype import _kind_name sctypeDict = {} # Contains all leaf-node scalar types with aliases -class TypeNADict(dict): - def __getitem__(self, key): - # 2018-06-24, 1.16 - warnings.warn('sctypeNA and typeNA will be removed in v1.18 ' - 'of numpy', VisibleDeprecationWarning, stacklevel=2) - return dict.__getitem__(self, key) - def get(self, key, default=None): - # 2018-06-24, 1.16 - warnings.warn('sctypeNA and typeNA will be removed in v1.18 ' - 'of numpy', VisibleDeprecationWarning, stacklevel=2) - return dict.get(self, key, default) - -sctypeNA = TypeNADict() # Contails all leaf-node types -> numarray type equivalences allTypes = {} # Collect the types we will add to the module @@ -127,27 +106,24 @@ def _add_aliases(): if name in ('longdouble', 'clongdouble') and myname in allTypes: continue - base_capitalize = english_capitalize(base) - if base == 'complex': - na_name = '%s%d' % (base_capitalize, bit//2) - elif base == 'bool': - na_name = base_capitalize - else: - na_name = "%s%d" % (base_capitalize, bit) - allTypes[myname] = info.type # add mapping for both the bit name and the numarray name sctypeDict[myname] = info.type - sctypeDict[na_name] = info.type # add forward, reverse, and string mapping to numarray - sctypeNA[na_name] = info.type - sctypeNA[info.type] = na_name - sctypeNA[info.char] = na_name - sctypeDict[char] = info.type - sctypeNA[char] = na_name + + # Add deprecated numeric-style type aliases manually, at some point + # we may want to deprecate the lower case "bytes0" version as well. + for name in ["Bytes0", "Datetime64", "Str0", "Uint32", "Uint64"]: + if english_lower(name) not in allTypes: + # Only one of Uint32 or Uint64, aliases of `np.uintp`, was (and is) defined, note that this + # is not UInt32/UInt64 (capital i), which is removed. + continue + allTypes[name] = allTypes[english_lower(name)] + sctypeDict[name] = sctypeDict[english_lower(name)] + _add_aliases() def _add_integer_aliases(): @@ -157,20 +133,15 @@ def _add_integer_aliases(): u_info = _concrete_typeinfo[u_ctype] bits = i_info.bits # same for both - for info, charname, intname, Intname in [ - (i_info,'i%d' % (bits//8,), 'int%d' % bits, 'Int%d' % bits), - (u_info,'u%d' % (bits//8,), 'uint%d' % bits, 'UInt%d' % bits)]: + for info, charname, intname in [ + (i_info,'i%d' % (bits//8,), 'int%d' % bits), + (u_info,'u%d' % (bits//8,), 'uint%d' % bits)]: if bits not in seen_bits: # sometimes two different types have the same number of bits # if so, the one iterated over first takes precedence allTypes[intname] = info.type sctypeDict[intname] = info.type - sctypeDict[Intname] = info.type sctypeDict[charname] = info.type - sctypeNA[Intname] = info.type - sctypeNA[charname] = info.type - sctypeNA[info.type] = Intname - sctypeNA[info.char] = Intname seen_bits.add(bits) diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 2b88ccedf..412d9fe6a 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -649,6 +649,10 @@ def transpose(a, axes=None): >>> np.transpose(x, (1, 0, 2)).shape (2, 1, 3) + >>> x = np.ones((2, 3, 4, 5)) + >>> np.transpose(x).shape + (5, 4, 3, 2) + """ return _wrapfunc(a, 'transpose', axes) diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py index 9e46f0ea5..f57e95742 100644 --- a/numpy/core/function_base.py +++ b/numpy/core/function_base.py @@ -52,8 +52,10 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, If True, return (`samples`, `step`), where `step` is the spacing between samples. dtype : dtype, optional - The type of the output array. If `dtype` is not given, infer the data - type from the other input arguments. + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred dtype will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. .. versionadded:: 1.9.0 @@ -202,8 +204,10 @@ def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, ``ln(samples) / ln(base)`` (or ``log_base(samples)``) is uniform. Default is 10.0. dtype : dtype - The type of the output array. If `dtype` is not given, infer the data - type from the other input arguments. + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred type will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. axis : int, optional The axis in the result to store the samples. Relevant only if start or stop are array-like. By default (0), the samples will be along a @@ -297,8 +301,10 @@ def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0): If true, `stop` is the last sample. Otherwise, it is not included. Default is True. dtype : dtype - The type of the output array. If `dtype` is not given, infer the data - type from the other input arguments. + The type of the output array. If `dtype` is not given, the data type + is inferred from `start` and `stop`. The inferred dtype will never be + an integer; `float` is chosen even if the arguments would produce an + array of integers. axis : int, optional The axis in the result to store the samples. Relevant only if start or stop are array-like. By default (0), the samples will be along a @@ -408,8 +414,18 @@ def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0): log_start = _nx.log10(start) log_stop = _nx.log10(stop) - result = out_sign * logspace(log_start, log_stop, num=num, - endpoint=endpoint, base=10.0, dtype=dtype) + result = logspace(log_start, log_stop, num=num, + endpoint=endpoint, base=10.0, dtype=dtype) + + # Make sure the endpoints match the start and stop arguments. This is + # necessary because np.exp(np.log(x)) is not necessarily equal to x. + if num > 0: + result[0] = start + if num > 1 and endpoint: + result[-1] = stop + + result = out_sign * result + if axis != 0: result = _nx.moveaxis(result, 0, axis) diff --git a/numpy/core/include/numpy/ndarraytypes.h b/numpy/core/include/numpy/ndarraytypes.h index 1b61899fa..275bb336b 100644 --- a/numpy/core/include/numpy/ndarraytypes.h +++ b/numpy/core/include/numpy/ndarraytypes.h @@ -341,9 +341,6 @@ struct NpyAuxData_tag { #define NPY_ERR(str) fprintf(stderr, #str); fflush(stderr); #define NPY_ERR2(str) fprintf(stderr, str); fflush(stderr); -#define NPY_STRINGIFY(x) #x -#define NPY_TOSTRING(x) NPY_STRINGIFY(x) - /* * Macros to define how array, and dimension/strides data is * allocated. diff --git a/numpy/core/include/numpy/utils.h b/numpy/core/include/numpy/utils.h index 32218b8c7..e251a5201 100644 --- a/numpy/core/include/numpy/utils.h +++ b/numpy/core/include/numpy/utils.h @@ -2,20 +2,36 @@ #define __NUMPY_UTILS_HEADER__ #ifndef __COMP_NPY_UNUSED - #if defined(__GNUC__) - #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) - # elif defined(__ICC) - #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) - # elif defined(__clang__) - #define __COMP_NPY_UNUSED __attribute__ ((unused)) - #else - #define __COMP_NPY_UNUSED - #endif + #if defined(__GNUC__) + #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) + #elif defined(__ICC) + #define __COMP_NPY_UNUSED __attribute__ ((__unused__)) + #elif defined(__clang__) + #define __COMP_NPY_UNUSED __attribute__ ((unused)) + #else + #define __COMP_NPY_UNUSED + #endif +#endif + +#if defined(__GNUC__) || defined(__ICC) || defined(__clang__) + #define NPY_DECL_ALIGNED(x) __attribute__ ((aligned (x))) +#elif defined(_MSC_VER) + #define NPY_DECL_ALIGNED(x) __declspec(align(x)) +#else + #define NPY_DECL_ALIGNED(x) #endif /* Use this to tag a variable as not used. It will remove unused variable * warning on support platforms (see __COM_NPY_UNUSED) and mangle the variable * to avoid accidental use */ #define NPY_UNUSED(x) (__NPY_UNUSED_TAGGED ## x) __COMP_NPY_UNUSED +#define NPY_EXPAND(x) x + +#define NPY_STRINGIFY(x) #x +#define NPY_TOSTRING(x) NPY_STRINGIFY(x) + +#define NPY_CAT__(a, b) a ## b +#define NPY_CAT_(a, b) NPY_CAT__(a, b) +#define NPY_CAT(a, b) NPY_CAT_(a, b) #endif diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index aac741612..2a015f48f 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -91,7 +91,7 @@ from numpy.core.multiarray import ( from numpy.core.overrides import set_module # we add more at the bottom -__all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes', +__all__ = ['sctypeDict', 'typeDict', 'sctypes', 'ScalarType', 'obj2sctype', 'cast', 'nbytes', 'sctype2char', 'maximum_sctype', 'issctype', 'typecodes', 'find_common_type', 'issubdtype', 'datetime_data', 'datetime_as_string', @@ -106,7 +106,6 @@ from ._string_helpers import ( from ._type_aliases import ( sctypeDict, - sctypeNA, allTypes, bitname, sctypes, @@ -512,7 +511,6 @@ typecodes = {'Character':'c', # backwards compatibility --- deprecated name typeDict = sctypeDict -typeNA = sctypeNA # b -> boolean # u -> unsigned integer diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 5351b30bf..549860179 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -738,6 +738,7 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'ufunc_override.h'), join('src', 'common', 'umathmodule.h'), join('src', 'common', 'numpyos.h'), + join('src', 'common', 'npy_cpu_dispatch.h'), ] common_src = [ @@ -939,8 +940,11 @@ def configuration(parent_package='',top_path=None): # umath_tests module # ####################################################################### - config.add_extension('_umath_tests', - sources=[join('src', 'umath', '_umath_tests.c.src')]) + config.add_extension('_umath_tests', sources=[ + join('src', 'umath', '_umath_tests.c.src'), + join('src', 'umath', '_umath_tests.dispatch.c'), + join('src', 'common', 'npy_cpu_features.c.src'), + ]) ####################################################################### # custom rational dtype module # diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h index aebe241a5..4493409bb 100644 --- a/numpy/core/src/common/npy_config.h +++ b/numpy/core/src/common/npy_config.h @@ -3,6 +3,7 @@ #include "config.h" #include "npy_cpu_features.h" +#include "npy_cpu_dispatch.h" #include "numpy/numpyconfig.h" #include "numpy/npy_cpu.h" #include "numpy/npy_os.h" diff --git a/numpy/core/src/common/npy_cpu_dispatch.h b/numpy/core/src/common/npy_cpu_dispatch.h new file mode 100644 index 000000000..846d1ebb9 --- /dev/null +++ b/numpy/core/src/common/npy_cpu_dispatch.h @@ -0,0 +1,260 @@ +#ifndef NPY_CPU_DISPATCH_H_ +#define NPY_CPU_DISPATCH_H_ +/** + * This file is part of the NumPy CPU dispatcher. Please have a look at doc/reference/simd-optimizations.html + * To get a better understanding of the mechanism behind it. + */ +#include "npy_cpu_features.h" // NPY_CPU_HAVE +#include "numpy/utils.h" // NPY_EXPAND, NPY_CAT +/** + * Bringing the main configration header '_cpu_dispatch.h'. + * + * This header is generated by the distutils module 'ccompiler_opt', + * and contains all the #definitions and headers of instruction-sets, + * that had been configured through command arguments '--cpu-baseline' and '--cpu-dispatch'. + * + * It also contains extra C #definitions and macros that are used for implementing + * NumPy module's attributes `__cpu_baseline__` and `__cpu_dispaٍtch__`. + */ +/** + * Note: Always gaurd the genreated headers within 'NPY_DISABLE_OPTIMIZATION', + * due the nature of command argument '--disable-optimization', + * which is explicitly disabling the module ccompiler_opt. + */ +#ifndef NPY_DISABLE_OPTIMIZATION + #if defined(__powerpc64__) && !defined(__cplusplus) && defined(bool) + /** + * "altivec.h" header contains the definitions(bool, vector, pixel), + * usually in c++ we undefine them after including the header. + * It's better anyway to take them off and use built-in types(__vector, __pixel, __bool) instead, + * since c99 supports bool variables which may lead to ambiguous errors. + */ + // backup 'bool' before including '_cpu_dispatch.h', since it may not defiend as a compiler token. + #define NPY__DISPATCH_DEFBOOL + typedef bool npy__dispatch_bkbool; + #endif + #include "_cpu_dispatch.h" + #ifdef NPY_HAVE_VSX + #undef bool + #undef vector + #undef pixel + #ifdef NPY__DISPATCH_DEFBOOL + #define bool npy__dispatch_bkbool + #endif + #endif +#endif // !NPY_DISABLE_OPTIMIZATION +/** + * Macro NPY_CPU_DISPATCH_CURFX(NAME) + * + * Returns @NAME suffixed with "_" + "the current target" during compiling + * the wrapped sources that generated from the dispatch-able sources according + * to the provided configuration statements. + * + * It also returns @NAME as-is without any suffix when it comes to the baseline or + * in case if the optimization is disabled. + * + * The idea behind this Macro is to allow exporting certain symbols and to + * avoid linking duplications due to the nature of the dispatch-able sources. + * + * Example: + * @targets baseline avx avx512_skx vsx3 asimdhp // configration statments + * + * void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst) + * { + * // the kernel + * } + * + * By assuming the required optimizations are enabled via '--cpu-dspatch' and + * the compiler supported them too, then the generated symbols will be named as follows: + * + * - x86: + * dispatch_me(const int*, int*) // baseline + * dispatch_me_AVX(const int*, int*) + * dispatch_me_AVX512_SKX(const int*, int*) + * + * - ppc64: + * dispatch_me(const int*, int*) + * dispatch_me_VSX3(const int*, int*) + * + * - ARM: + * dispatch_me(const int*, int*) + * dispatch_me_ASIMHP(const int*, int*) + * + * - unsupported arch or when optimization is disabled: + * dispatch_me(const int*, int*) + * + * For forward declarations, see 'NPY_CPU_DISPATCH_DECLARE'. + */ +#ifdef NPY__CPU_TARGET_CURRENT + // 'NPY__CPU_TARGET_CURRENT': only defined by the dispatch-able sources + #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_CAT(NPY_CAT(NAME, _), NPY__CPU_TARGET_CURRENT) +#else + #define NPY_CPU_DISPATCH_CURFX(NAME) NPY_EXPAND(NAME) +#endif +/** + * Defining the default behavior for the configurable macros of dispatch-able sources, + * 'NPY__CPU_DISPATCH_CALL(...)' and 'NPY__CPU_DISPATCH_BASELINE_CALL(...)' + * + * These macros are defined inside the generated config files that been derived from + * the configuration statements of the dispatch-able sources. + * + * The generated config file takes the same name of the dispatch-able source with replacing + * the extension to '.h' instead of '.c', and it should be treated as a header template. + * + * For more clarification, please have a look at doc/reference/simd-optimizations.html. + */ +#ifndef NPY_DISABLE_OPTIMIZATION + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + &&"Expected config header of the dispatch-able source"; + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) \ + &&"Expected config header of the dispatch-able source"; +#else + /** + * We assume by default that all configuration statements contains 'baseline' option however, + * if the dispatch-able source doesn't require it, then the dispatch-able source and following macros + * need to be guard it with '#ifndef NPY_DISABLE_OPTIMIZATION' + */ + #define NPY__CPU_DISPATCH_BASELINE_CALL(CB, ...) \ + NPY_EXPAND(CB(__VA_ARGS__)) + #define NPY__CPU_DISPATCH_CALL(CHK, CB, ...) +#endif // !NPY_DISABLE_OPTIMIZATION +/** + * Macro NPY_CPU_DISPATCH_DECLARE(LEFT, ...) is used to provide forward + * declarations for the exported variables and functions that defined inside + * the dispatch-able sources. + * + * The first argument should ends with the exported function or variable name, + * while the Macro pasting the extra arguments. + * + * Examples: + * #ifndef NPY_DISABLE_OPTIMIZATION + * #include "dispatchable_source_name.dispatch.h" + * #endif + * + * NPY_CPU_DISPATCH_DECLARE(void dispatch_me, (const int*, int*)) + * NPY_CPU_DISPATCH_DECLARE(extern cb_type callback_tab, [TAB_SIZE]) + * + * By assuming the provided config header drived from a dispatch-able source, + * that configured with "@targets baseline sse41 vsx3 asimdhp", + * they supported by the compiler and enabled via '--cpu-dspatch', + * then the prototype declrations at the above example will equlivent to the follows: + * + * - x86: + * void dispatch_me(const int*, int*); // baseline + * void dispatch_me_SSE41(const int*, int*); + * + * extern cb_type callback_tab[TAB_SIZE]; + * extern cb_type callback_tab_SSE41[TAB_SIZE]; + * + * - ppc64: + * void dispatch_me(const int*, int*); + * void dispatch_me_VSX3(const int*, int*); + * + * extern cb_type callback_tab[TAB_SIZE]; + * extern cb_type callback_tab_VSX3[TAB_SIZE]; + * + * - ARM: + * void dispatch_me(const int*, int*); + * void dispatch_me_ASIMDHP(const int*, int*); + * + * extern cb_type callback_tab[TAB_SIZE]; + * extern cb_type callback_tab_ASIMDHP[TAB_SIZE]; + * + * - unsupported arch or when optimization is disabled: + * void dispatch_me(const int*, int*); + * extern cb_type callback_tab[TAB_SIZE]; + * + * For runtime dispatching, see 'NPY_CPU_DISPATCH_CALL' + */ +#define NPY_CPU_DISPATCH_DECLARE(...) \ + NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) \ + NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_DECLARE_BASE_CB_, __VA_ARGS__) +// Preprocessor callbacks +#define NPY_CPU_DISPATCH_DECLARE_CB_(DUMMY, TARGET_NAME, LEFT, ...) \ + NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__; +#define NPY_CPU_DISPATCH_DECLARE_BASE_CB_(LEFT, ...) \ + LEFT __VA_ARGS__; +// Dummy CPU runtime checking +#define NPY_CPU_DISPATCH_DECLARE_CHK_(FEATURE) +/** + * Macro NPY_CPU_DISPATCH_DECLARE_XB(LEFT, ...) + * + * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declration even + * if it was provided within the configration statments. + */ +#define NPY_CPU_DISPATCH_DECLARE_XB(...) \ + NPY__CPU_DISPATCH_CALL(NPY_CPU_DISPATCH_DECLARE_CHK_, NPY_CPU_DISPATCH_DECLARE_CB_, __VA_ARGS__) +/** + * Macro NPY_CPU_DISPATCH_CALL(LEFT, ...) is used for runtime dispatching + * of the exported functions and variables within the dispatch-able sources + * according to the highested interesed CPU features that supported by the + * running machine depending on the required optimizations. + * + * The first argument should ends with the exported function or variable name, + * while the Macro pasting the extra arguments. + * + * Example: + * Assume we have a dispatch-able source exporting the following function: + * + * @targets baseline avx2 avx512_skx // configration statments + * + * void NPY_CPU_DISPATCH_CURFX(dispatch_me)(const int *src, int *dst) + * { + * // the kernel + * } + * + * In order to call or to assign the pointer of it from outside the dispatch-able source, + * you have to use this Macro as follows: + * + * // bring the genreated config header of the dispatch-abel source + * #ifndef NPY_DISABLE_OPTIMIZATION + * #include "dispatchable_source_name.dispatch.h" + * #endif + * // forward declaration + * NPY_CPU_DISPATCH_DECLARE(dispatch_me, (const int *src, int *dst)) + * + * typedef void(*func_type)(const int*, int*); + * func_type the_callee(const int *src, int *dst, func_type *cb) + * { + * // direct call + * NPY_CPU_DISPATCH_CALL(dispatch_me, (src, dst)) + * // assign the pointer + * NPY_CPU_DISPATCH_CALL(*cb = dispatch_me) + * // return the pointer + * NPY_CPU_DISPATCH_CALL(return dispatch_me) + * } + */ +#define NPY_CPU_DISPATCH_CALL(...) \ + if (0) {/*DUMMY*/} \ + NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_CB_, __VA_ARGS__) \ + NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_BASE_CB_, __VA_ARGS__) +// Preprocessor callbacks +#define NPY_CPU_DISPATCH_CALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ + else if (TESTED_FEATURES) { NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__; } +#define NPY_CPU_DISPATCH_CALL_BASE_CB_(LEFT, ...) \ + else { LEFT __VA_ARGS__; } +/** + * Macro NPY_CPU_DISPATCH_CALL_XB(LEFT, ...) + * + * Same as `NPY_CPU_DISPATCH_DECLARE` but exclude the baseline declration even + * if it was provided within the configration statments. + */ +#define NPY_CPU_DISPATCH_CALL_XB(...) \ + if (0) {/*DUMMY*/} \ + NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_CB_, __VA_ARGS__) +/** + * Macro NPY_CPU_DISPATCH_CALL_ALL(LEFT, ...) + * + * Same as `NPY_CPU_DISPATCH_CALL` but dispatching all the required optimizations for + * the exported functions and variables instead of highest interested one. + */ +#define NPY_CPU_DISPATCH_CALL_ALL(...) \ + NPY__CPU_DISPATCH_CALL(NPY_CPU_HAVE, NPY_CPU_DISPATCH_CALL_ALL_CB_, __VA_ARGS__) \ + NPY__CPU_DISPATCH_BASELINE_CALL(NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_, __VA_ARGS__) +// Preprocessor callbacks +#define NPY_CPU_DISPATCH_CALL_ALL_CB_(TESTED_FEATURES, TARGET_NAME, LEFT, ...) \ + if (TESTED_FEATURES) { NPY_CAT(NPY_CAT(LEFT, _), TARGET_NAME) __VA_ARGS__; } +#define NPY_CPU_DISPATCH_CALL_ALL_BASE_CB_(LEFT, ...) \ + { LEFT __VA_ARGS__; } + +#endif // NPY_CPU_DISPATCH_H_ diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src index d35199760..facd27f3c 100644 --- a/numpy/core/src/common/npy_cpu_features.c.src +++ b/numpy/core/src/common/npy_cpu_features.c.src @@ -1,6 +1,7 @@ #include "npy_cpu_features.h" +#include "npy_cpu_dispatch.h" // To guarantee the CPU baseline definitions are in scope. #include "numpy/npy_common.h" // for NPY_INLINE -#include "numpy/npy_cpu.h" // To guarantee of having CPU definitions in scope. +#include "numpy/npy_cpu.h" // To guarantee the CPU definitions are in scope. /******************** Private Definitions *********************/ @@ -55,6 +56,44 @@ npy_cpu_features_dict(void) return dict; } +#define NPY__CPU_PYLIST_APPEND_CB(FEATURE, LIST) \ + item = PyUnicode_FromString(NPY_TOSTRING(FEATURE)); \ + if (item == NULL) { \ + Py_DECREF(LIST); \ + return NULL; \ + } \ + PyList_SET_ITEM(LIST, index++, item); + +NPY_VISIBILITY_HIDDEN PyObject * +npy_cpu_baseline_list(void) +{ +#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_BASELINE_N > 0 + PyObject *list = PyList_New(NPY_WITH_CPU_BASELINE_N), *item; + int index = 0; + if (list != NULL) { + NPY_WITH_CPU_BASELINE_CALL(NPY__CPU_PYLIST_APPEND_CB, list) + } + return list; +#else + return PyList_New(0); +#endif +} + +NPY_VISIBILITY_HIDDEN PyObject * +npy_cpu_dispatch_list(void) +{ +#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_DISPATCH_N > 0 + PyObject *list = PyList_New(NPY_WITH_CPU_DISPATCH_N), *item; + int index = 0; + if (list != NULL) { + NPY_WITH_CPU_DISPATCH_CALL(NPY__CPU_PYLIST_APPEND_CB, list) + } + return list; +#else + return PyList_New(0); +#endif +} + /**************************************************************** * This section is reserved to defining @npy__cpu_init_features * for each CPU architecture, please try to keep it clean. Ty @@ -366,7 +405,7 @@ npy__cpu_init_features(void) return; #endif // We have nothing else todo -#if defined(NPY_HAVE_NEON_ARM8) || defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8) +#if defined(NPY_HAVE_ASIMD) || defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8) #if defined(NPY_HAVE_FPHP) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) npy__cpu_have[NPY_CPU_FEATURE_FPHP] = 1; #endif diff --git a/numpy/core/src/common/npy_cpu_features.h b/numpy/core/src/common/npy_cpu_features.h index 0e8901328..fffdef38e 100644 --- a/numpy/core/src/common/npy_cpu_features.h +++ b/numpy/core/src/common/npy_cpu_features.h @@ -109,6 +109,48 @@ npy_cpu_have(NPY_CPU_FEATURE_##FEATURE_NAME) */ NPY_VISIBILITY_HIDDEN PyObject * npy_cpu_features_dict(void); +/* + * Return a new a Python list contains the minimal set of required optimizations + * that supported by the compiler and platform according to the specified + * values to command argument '--cpu-baseline'. + * + * This function is mainly used to implement umath's attrbute '__cpu_baseline__', + * and the items are sorted from the lowest to highest interest. + * + * For example, according to the default build configuration and by assuming the compiler + * support all the involved optimizations then the returned list should equivalent to: + * + * On x86: ['SSE', 'SSE2'] + * On x64: ['SSE', 'SSE2', 'SSE3'] + * On armhf: [] + * On aarch64: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD'] + * On ppc64: [] + * On ppc64le: ['VSX', 'VSX2'] + * On any other arch or if the optimization is disabled: [] + */ +NPY_VISIBILITY_HIDDEN PyObject * +npy_cpu_baseline_list(void); +/* + * Return a new a Python list contains the dispatched set of additional optimizations + * that supported by the compiler and platform according to the specified + * values to command argument '--cpu-dispatch'. + * + * This function is mainly used to implement umath's attrbute '__cpu_dispatch__', + * and the items are sorted from the lowest to highest interest. + * + * For example, according to the default build configuration and by assuming the compiler + * support all the involved optimizations then the returned list should equivalent to: + * + * On x86: ['SSE3', 'SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...] + * On x64: ['SSSE3', 'SSE41', 'POPCNT', 'SSE42', 'AVX', 'F16C', 'FMA3', 'AVX2', 'AVX512F', ...] + * On armhf: ['NEON', 'NEON_FP16', 'NEON_VPFV4', 'ASIMD', 'ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] + * On aarch64: ['ASIMDHP', 'ASIMDDP', 'ASIMDFHM'] + * On ppc64: ['VSX', 'VSX2', 'VSX3'] + * On ppc64le: ['VSX3'] + * On any other arch or if the optimization is disabled: [] + */ +NPY_VISIBILITY_HIDDEN PyObject * +npy_cpu_dispatch_list(void); #ifdef __cplusplus } diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c index 232176011..8b482dc03 100644 --- a/numpy/core/src/multiarray/buffer.c +++ b/numpy/core/src/multiarray/buffer.c @@ -64,7 +64,7 @@ _append_char(_tmp_string_t *s, char c) char *p; size_t to_alloc = (s->allocated == 0) ? INIT_SIZE : (2 * s->allocated); - p = realloc(s->s, to_alloc); + p = PyObject_Realloc(s->s, to_alloc); if (p == NULL) { PyErr_SetString(PyExc_MemoryError, "memory allocation failed"); return -1; @@ -135,12 +135,25 @@ fail: * AND, the descr element size is a multiple of the alignment, * AND, the array data is positioned to alignment granularity. */ -static int +static NPY_INLINE int _is_natively_aligned_at(PyArray_Descr *descr, PyArrayObject *arr, Py_ssize_t offset) { int k; + if (NPY_LIKELY(descr == PyArray_DESCR(arr))) { + /* + * If the descriptor is the arrays descriptor we can assume the + * array's alignment is correct. + */ + assert(offset == 0); + if (PyArray_ISALIGNED(arr)) { + assert(descr->elsize % descr->alignment == 0); + return 1; + } + return 0; + } + if ((Py_ssize_t)(PyArray_DATA(arr)) % descr->alignment != 0) { return 0; } @@ -297,8 +310,6 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str, descr->type_num == NPY_ULONGLONG); } - *offset += descr->elsize; - if (PyArray_IsScalar(obj, Generic)) { /* scalars are always natively aligned */ is_natively_aligned = 1; @@ -308,6 +319,8 @@ _buffer_format_string(PyArray_Descr *descr, _tmp_string_t *str, (PyArrayObject*)obj, *offset); } + *offset += descr->elsize; + if (descr->byteorder == '=' && is_natively_aligned) { /* Prefer native types, to cater for Cython */ is_standard_size = 0; @@ -445,49 +458,22 @@ static PyObject *_buffer_info_cache = NULL; static _buffer_info_t* _buffer_info_new(PyObject *obj) { + /* + * Note that the buffer info is cached as PyLongObjects making them appear + * like unreachable lost memory to valgrind. + */ _buffer_info_t *info; _tmp_string_t fmt = {NULL, 0, 0}; int k; PyArray_Descr *descr = NULL; int err = 0; - /* - * Note that the buffer info is cached as pyints making them appear like - * unreachable lost memory to valgrind. - */ - info = malloc(sizeof(_buffer_info_t)); - if (info == NULL) { - PyErr_NoMemory(); - goto fail; - } - - if (PyArray_IsScalar(obj, Datetime) || PyArray_IsScalar(obj, Timedelta)) { - /* - * Special case datetime64 scalars to remain backward compatible. - * This will change in a future version. - * Note arrays of datetime64 and structured arrays with datetime64 - * fields will not hit this code path and are currently unsupported - * in _buffer_format_string. - */ - if (_append_char(&fmt, 'B') < 0) { - goto fail; - } - if (_append_char(&fmt, '\0') < 0) { - goto fail; - } - info->ndim = 1; - info->shape = malloc(sizeof(Py_ssize_t) * 2); - if (info->shape == NULL) { + if (PyArray_IsScalar(obj, Void)) { + info = PyObject_Malloc(sizeof(_buffer_info_t)); + if (info == NULL) { PyErr_NoMemory(); goto fail; } - info->strides = info->shape + info->ndim; - info->shape[0] = 8; - info->strides[0] = 1; - info->format = fmt.s; - return info; - } - else if (PyArray_IsScalar(obj, Generic)) { descr = PyArray_DescrFromScalar(obj); if (descr == NULL) { goto fail; @@ -497,8 +483,16 @@ _buffer_info_new(PyObject *obj) info->strides = NULL; } else { + assert(PyArray_Check(obj)); PyArrayObject * arr = (PyArrayObject *)obj; descr = PyArray_DESCR(arr); + + info = PyObject_Malloc(sizeof(_buffer_info_t) + + sizeof(Py_ssize_t) * PyArray_NDIM(arr) * 2); + if (info == NULL) { + PyErr_NoMemory(); + goto fail; + } /* Fill in shape and strides */ info->ndim = PyArray_NDIM(arr); @@ -507,11 +501,8 @@ _buffer_info_new(PyObject *obj) info->strides = NULL; } else { - info->shape = malloc(sizeof(Py_ssize_t) * PyArray_NDIM(arr) * 2 + 1); - if (info->shape == NULL) { - PyErr_NoMemory(); - goto fail; - } + info->shape = (npy_intp *)((char *)info + sizeof(_buffer_info_t)); + assert((size_t)info->shape % sizeof(npy_intp) == 0); info->strides = info->shape + PyArray_NDIM(arr); for (k = 0; k < PyArray_NDIM(arr); ++k) { info->shape[k] = PyArray_DIMS(arr)[k]; @@ -525,11 +516,9 @@ _buffer_info_new(PyObject *obj) err = _buffer_format_string(descr, &fmt, obj, NULL, NULL); Py_DECREF(descr); if (err != 0) { - free(info->shape); goto fail; } if (_append_char(&fmt, '\0') < 0) { - free(info->shape); goto fail; } info->format = fmt.s; @@ -537,8 +526,8 @@ _buffer_info_new(PyObject *obj) return info; fail: - free(fmt.s); - free(info); + PyObject_Free(fmt.s); + PyObject_Free(info); return NULL; } @@ -569,12 +558,9 @@ static void _buffer_info_free(_buffer_info_t *info) { if (info->format) { - free(info->format); - } - if (info->shape) { - free(info->shape); + PyObject_Free(info->format); } - free(info); + PyObject_Free(info); } /* Get buffer info from the global dictionary */ diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c index 0390c92fc..7bd088677 100644 --- a/numpy/core/src/multiarray/convert_datatype.c +++ b/numpy/core/src/multiarray/convert_datatype.c @@ -1419,8 +1419,7 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2) } break; case NPY_TIMEDELTA: - if (PyTypeNum_ISINTEGER(type_num1) || - PyTypeNum_ISFLOAT(type_num1)) { + if (PyTypeNum_ISSIGNED(type_num1)) { return ensure_dtype_nbo(type2); } break; diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 2c2c457ac..ccebe9da6 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -3580,7 +3580,7 @@ array_from_text(PyArray_Descr *dtype, npy_intp num, char const *sep, size_t *nre npy_intp i; char *dptr, *clean_sep, *tmp; int err = 0; - int stop_reading_flag; /* -1 indicates end reached; -2 a parsing error */ + int stop_reading_flag = 0; /* -1 means end reached; -2 a parsing error */ npy_intp thisbuf = 0; npy_intp size; npy_intp bytes, totalbytes; diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index 5b4a94aa4..4e37b9628 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -1678,14 +1678,14 @@ _convert_from_str(PyObject *obj, int align) } /* Check for a deprecated Numeric-style typecode */ - char *dep_tps[] = {"Bool", "Complex", "Float", "Int", - "Object0", "String0", "Timedelta64", - "Unicode0", "UInt", "Void0"}; + /* `Uint` has deliberately weird uppercasing */ + char *dep_tps[] = {"Bytes", "Datetime64", "Str", "Uint"}; int ndep_tps = sizeof(dep_tps) / sizeof(dep_tps[0]); for (int i = 0; i < ndep_tps; ++i) { char *dep_tp = dep_tps[i]; if (strncmp(type, dep_tp, strlen(dep_tp)) == 0) { + /* Deprecated 2020-06-09, NumPy 1.20 */ if (DEPRECATE("Numeric-style type codes are " "deprecated and will result in " "an error in the future.") < 0) { diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index b914e5bb3..2538e05c6 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -31,9 +31,6 @@ #define EINSUM_USE_SSE1 0 #endif -/* - * TODO: Only some SSE2 for float64 is implemented. - */ #ifdef NPY_HAVE_SSE2_INTRINSICS #define EINSUM_USE_SSE2 1 #else @@ -276,6 +273,8 @@ static void #if EINSUM_USE_SSE1 && @float32@ __m128 a, b; +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, b; #endif NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_two (%d)\n", @@ -319,6 +318,29 @@ finish_after_unrolled_loop: /* Finish off the loop */ goto finish_after_unrolled_loop; } +#elif EINSUM_USE_SSE2 && @float64@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data1) && + EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(_mm_load_pd(data0+@i@), _mm_load_pd(data1+@i@)); + b = _mm_add_pd(a, _mm_load_pd(data_out+@i@)); + _mm_store_pd(data_out+@i@, b); +/**end repeat2**/ + data0 += 8; + data1 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } #endif /* Unroll the loop by 8 */ @@ -333,6 +355,14 @@ finish_after_unrolled_loop: b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@)); _mm_storeu_ps(data_out+@i@, b); /**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), _mm_loadu_pd(data1+@i@)); + b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@)); + _mm_storeu_pd(data_out+@i@, b); +/**end repeat2**/ #else /**begin repeat2 * #i = 0, 1, 2, 3, 4, 5, 6, 7# @@ -491,6 +521,8 @@ static void #if EINSUM_USE_SSE1 && @float32@ __m128 a, b, value1_sse; +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, b, value1_sse; #endif NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outcontig_two (%d)\n", @@ -534,6 +566,29 @@ finish_after_unrolled_loop: /* Finish off the loop */ goto finish_after_unrolled_loop; } +#elif EINSUM_USE_SSE2 && @float64@ + value1_sse = _mm_set1_pd(value1); + + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0) && EINSUM_IS_SSE_ALIGNED(data_out)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(_mm_load_pd(data0+@i@), value1_sse); + b = _mm_add_pd(a, _mm_load_pd(data_out+@i@)); + _mm_store_pd(data_out+@i@, b); +/**end repeat2**/ + data0 += 8; + data_out += 8; + } + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } #endif /* Unroll the loop by 8 */ @@ -548,6 +603,14 @@ finish_after_unrolled_loop: b = _mm_add_ps(a, _mm_loadu_ps(data_out+@i@)); _mm_storeu_ps(data_out+@i@, b); /**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + a = _mm_mul_pd(_mm_loadu_pd(data0+@i@), value1_sse); + b = _mm_add_pd(a, _mm_loadu_pd(data_out+@i@)); + _mm_storeu_pd(data_out+@i@, b); +/**end repeat2**/ #else /**begin repeat2 * #i = 0, 1, 2, 3, 4, 5, 6, 7# @@ -735,6 +798,8 @@ static void #if EINSUM_USE_SSE1 && @float32@ __m128 a, accum_sse = _mm_setzero_ps(); +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, accum_sse = _mm_setzero_pd(); #endif NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_stride0_contig_outstride0_two (%d)\n", @@ -772,15 +837,38 @@ finish_after_unrolled_loop: /**end repeat2**/ data1 += 8; } - -#if EINSUM_USE_SSE1 && @float32@ /* Add the four SSE values and put in accum */ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); accum_sse = _mm_add_ps(a, accum_sse); a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); accum_sse = _mm_add_ps(a, accum_sse); _mm_store_ss(&accum, accum_sse); -#endif + + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#elif EINSUM_USE_SSE2 && @float64@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data1)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; + +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data1+@i@)); +/**end repeat2**/ + data1 += 8; + } + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); /* Finish off the loop */ goto finish_after_unrolled_loop; @@ -801,6 +889,16 @@ finish_after_unrolled_loop: */ accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data1+@i@)); /**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data1+@i@)); +/**end repeat2**/ #else /**begin repeat2 * #i = 0, 1, 2, 3, 4, 5, 6, 7# @@ -818,6 +916,11 @@ finish_after_unrolled_loop: a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); accum_sse = _mm_add_ps(a, accum_sse); _mm_store_ss(&accum, accum_sse); +#elif EINSUM_USE_SSE2 && @float64@ + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); #endif /* Finish off the loop */ @@ -834,6 +937,8 @@ static void #if EINSUM_USE_SSE1 && @float32@ __m128 a, accum_sse = _mm_setzero_ps(); +#elif EINSUM_USE_SSE2 && @float64@ + __m128d a, accum_sse = _mm_setzero_pd(); #endif NPY_EINSUM_DBG_PRINT1("@name@_sum_of_products_contig_stride0_outstride0_two (%d)\n", @@ -871,16 +976,37 @@ finish_after_unrolled_loop: /**end repeat2**/ data0 += 8; } - -#if EINSUM_USE_SSE1 && @float32@ /* Add the four SSE values and put in accum */ a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(2,3,0,1)); accum_sse = _mm_add_ps(a, accum_sse); a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); accum_sse = _mm_add_ps(a, accum_sse); _mm_store_ss(&accum, accum_sse); -#endif + /* Finish off the loop */ + goto finish_after_unrolled_loop; + } +#elif EINSUM_USE_SSE2 && @float64@ + /* Use aligned instructions if possible */ + if (EINSUM_IS_SSE_ALIGNED(data0)) { + /* Unroll the loop by 8 */ + while (count >= 8) { + count -= 8; +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_load_pd(data0+@i@)); +/**end repeat2**/ + data0 += 8; + } + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); /* Finish off the loop */ goto finish_after_unrolled_loop; } @@ -900,6 +1026,16 @@ finish_after_unrolled_loop: */ accum_sse = _mm_add_ps(accum_sse, _mm_loadu_ps(data0+@i@)); /**end repeat2**/ +#elif EINSUM_USE_SSE2 && @float64@ +/**begin repeat2 + * #i = 0, 2, 4, 6# + */ + /* + * NOTE: This accumulation changes the order, so will likely + * produce slightly different results. + */ + accum_sse = _mm_add_pd(accum_sse, _mm_loadu_pd(data0+@i@)); +/**end repeat2**/ #else /**begin repeat2 * #i = 0, 1, 2, 3, 4, 5, 6, 7# @@ -917,6 +1053,11 @@ finish_after_unrolled_loop: a = _mm_shuffle_ps(accum_sse, accum_sse, _MM_SHUFFLE(1,0,3,2)); accum_sse = _mm_add_ps(a, accum_sse); _mm_store_ss(&accum, accum_sse); +#elif EINSUM_USE_SSE2 && @float64@ + /* Add the two SSE2 values and put in accum */ + a = _mm_shuffle_pd(accum_sse, accum_sse, _MM_SHUFFLE2(0,1)); + accum_sse = _mm_add_pd(a, accum_sse); + _mm_store_sd(&accum, accum_sse); #endif /* Finish off the loop */ diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index 7aefbfc38..f73cb48d9 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -2480,8 +2480,6 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) int i; NPY_BEGIN_THREADS_DEF; - intp_type = PyArray_DescrFromType(NPY_INTP); - if (NpyIter_GetIterSize(mit->outer) == 0) { /* * When the outer iteration is empty, the indices broadcast to an @@ -2493,6 +2491,8 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) return 0; } + intp_type = PyArray_DescrFromType(NPY_INTP); + NPY_BEGIN_THREADS; for (i=0; i < mit->numiter; i++) { diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 84c22ba65..4190c53bd 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -4542,6 +4542,26 @@ PyMODINIT_FUNC PyInit__multiarray_umath(void) { } Py_DECREF(s); + s = npy_cpu_baseline_list(); + if (s == NULL) { + goto err; + } + if (PyDict_SetItemString(d, "__cpu_baseline__", s) < 0) { + Py_DECREF(s); + goto err; + } + Py_DECREF(s); + + s = npy_cpu_dispatch_list(); + if (s == NULL) { + goto err; + } + if (PyDict_SetItemString(d, "__cpu_dispatch__", s) < 0) { + Py_DECREF(s); + goto err; + } + Py_DECREF(s); + s = NpyCapsule_FromVoidPtr((void *)_datetime_strings, NULL); if (s == NULL) { goto err; diff --git a/numpy/core/src/multiarray/scalarapi.c b/numpy/core/src/multiarray/scalarapi.c index f3c440dc6..154f4d637 100644 --- a/numpy/core/src/multiarray/scalarapi.c +++ b/numpy/core/src/multiarray/scalarapi.c @@ -286,14 +286,10 @@ PyArray_CastScalarDirect(PyObject *scalar, PyArray_Descr *indescr, NPY_NO_EXPORT PyObject * PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) { - PyArray_Descr *typecode; - PyArrayObject *r; - char *memptr; - PyObject *ret; - /* convert to 0-dim array of scalar typecode */ - typecode = PyArray_DescrFromScalar(scalar); + PyArray_Descr *typecode = PyArray_DescrFromScalar(scalar); if (typecode == NULL) { + Py_XDECREF(outcode); return NULL; } if ((typecode->type_num == NPY_VOID) && @@ -307,49 +303,53 @@ PyArray_FromScalar(PyObject *scalar, PyArray_Descr *outcode) NULL, (PyObject *)scalar); } - /* Need to INCREF typecode because PyArray_NewFromDescr steals a - * reference below and we still need to access typecode afterwards. */ - Py_INCREF(typecode); - r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, + PyArrayObject *r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, typecode, 0, NULL, NULL, NULL, 0, NULL); - if (r==NULL) { - Py_DECREF(typecode); Py_XDECREF(outcode); + if (r == NULL) { + Py_XDECREF(outcode); return NULL; } + /* the dtype used by the array may be different to the one requested */ + typecode = PyArray_DESCR(r); if (PyDataType_FLAGCHK(typecode, NPY_USE_SETITEM)) { if (typecode->f->setitem(scalar, PyArray_DATA(r), r) < 0) { - Py_DECREF(typecode); Py_XDECREF(outcode); Py_DECREF(r); + Py_DECREF(r); + Py_XDECREF(outcode); return NULL; } - goto finish; } + else { + char *memptr = scalar_value(scalar, typecode); - memptr = scalar_value(scalar, typecode); - - memcpy(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r)); - if (PyDataType_FLAGCHK(typecode, NPY_ITEM_HASOBJECT)) { - /* Need to INCREF just the PyObject portion */ - PyArray_Item_INCREF(memptr, typecode); + memcpy(PyArray_DATA(r), memptr, PyArray_ITEMSIZE(r)); + if (PyDataType_FLAGCHK(typecode, NPY_ITEM_HASOBJECT)) { + /* Need to INCREF just the PyObject portion */ + PyArray_Item_INCREF(memptr, typecode); + } } -finish: if (outcode == NULL) { - Py_DECREF(typecode); return (PyObject *)r; } if (PyArray_EquivTypes(outcode, typecode)) { if (!PyTypeNum_ISEXTENDED(typecode->type_num) || (outcode->elsize == typecode->elsize)) { - Py_DECREF(typecode); Py_DECREF(outcode); + /* + * Since the type is equivalent, and we haven't handed the array + * to anyone yet, let's fix the dtype to be what was requested, + * even if it is equivalent to what was passed in. + */ + Py_SETREF(((PyArrayObject_fields *)r)->descr, outcode); + return (PyObject *)r; } } /* cast if necessary to desired output typecode */ - ret = PyArray_CastToType((PyArrayObject *)r, outcode, 0); - Py_DECREF(typecode); Py_DECREF(r); + PyObject *ret = PyArray_CastToType(r, outcode, 0); + Py_DECREF(r); return ret; } diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src index abc8d78c4..d08aabd64 100644 --- a/numpy/core/src/umath/_umath_tests.c.src +++ b/numpy/core/src/umath/_umath_tests.c.src @@ -576,6 +576,51 @@ fail: return NULL; } +// Testing the utilites of the CPU dispatcher +#ifndef NPY_DISABLE_OPTIMIZATION + #include "_umath_tests.dispatch.h" +#endif +NPY_CPU_DISPATCH_DECLARE(extern const char *_umath_tests_dispatch_var) +NPY_CPU_DISPATCH_DECLARE(const char *_umath_tests_dispatch_func, (void)) +NPY_CPU_DISPATCH_DECLARE(void _umath_tests_dispatch_attach, (PyObject *list)) + +static PyObject * +UMath_Tests_test_dispatch(PyObject *NPY_UNUSED(dummy), PyObject *NPY_UNUSED(dummy2)) +{ + const char *highest_func, *highest_var; + NPY_CPU_DISPATCH_CALL(highest_func = _umath_tests_dispatch_func, ()) + NPY_CPU_DISPATCH_CALL(highest_var = _umath_tests_dispatch_var) + const char *highest_func_xb = "nobase", *highest_var_xb = "nobase"; + NPY_CPU_DISPATCH_CALL_XB(highest_func_xb = _umath_tests_dispatch_func, ()) + NPY_CPU_DISPATCH_CALL_XB(highest_var_xb = _umath_tests_dispatch_var) + + PyObject *dict = PyDict_New(), *item; + if (dict == NULL) { + return NULL; + } + /**begin repeat + * #str = func, var, func_xb, var_xb# + */ + item = PyUnicode_FromString(highest_@str@); + if (item == NULL || PyDict_SetItemString(dict, "@str@", item) < 0) { + goto err; + } + /**end repeat**/ + item = PyList_New(0); + if (item == NULL || PyDict_SetItemString(dict, "all", item) < 0) { + goto err; + } + NPY_CPU_DISPATCH_CALL_ALL(_umath_tests_dispatch_attach, (item)) + if (PyErr_Occurred()) { + goto err; + } + return dict; +err: + Py_XDECREF(item); + Py_DECREF(dict); + return NULL; +} + static PyMethodDef UMath_TestsMethods[] = { {"test_signature", UMath_Tests_test_signature, METH_VARARGS, "Test signature parsing of ufunc. \n" @@ -583,6 +628,7 @@ static PyMethodDef UMath_TestsMethods[] = { "If fails, it returns NULL. Otherwise it returns a tuple of ufunc " "internals. \n", }, + {"test_dispatch", UMath_Tests_test_dispatch, METH_NOARGS, NULL}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -604,6 +650,11 @@ PyMODINIT_FUNC PyInit__umath_tests(void) { PyObject *d; PyObject *version; + // Initialize CPU features + if (npy_cpu_init() < 0) { + return NULL; + } + m = PyModule_Create(&moduledef); if (m == NULL) { return NULL; @@ -632,6 +683,5 @@ PyMODINIT_FUNC PyInit__umath_tests(void) { "cannot load _umath_tests module."); return NULL; } - return m; } diff --git a/numpy/core/src/umath/_umath_tests.dispatch.c b/numpy/core/src/umath/_umath_tests.dispatch.c new file mode 100644 index 000000000..d86a54411 --- /dev/null +++ b/numpy/core/src/umath/_umath_tests.dispatch.c @@ -0,0 +1,33 @@ +/** + * Testing the utilites of the CPU dispatcher + * + * @targets $werror baseline + * SSE2 SSE41 AVX2 + * VSX VSX2 VSX3 + * NEON ASIMD ASIMDHP + */ +#include <Python.h> +#include "npy_cpu_dispatch.h" + +#ifndef NPY_DISABLE_OPTIMIZATION + #include "_umath_tests.dispatch.h" +#endif + +NPY_CPU_DISPATCH_DECLARE(const char *_umath_tests_dispatch_func, (void)) +NPY_CPU_DISPATCH_DECLARE(extern const char *_umath_tests_dispatch_var) +NPY_CPU_DISPATCH_DECLARE(void _umath_tests_dispatch_attach, (PyObject *list)) + +const char *NPY_CPU_DISPATCH_CURFX(_umath_tests_dispatch_var) = NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(var)); +const char *NPY_CPU_DISPATCH_CURFX(_umath_tests_dispatch_func)(void) +{ + static const char *current = NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(func)); + return current; +} + +void NPY_CPU_DISPATCH_CURFX(_umath_tests_dispatch_attach)(PyObject *list) +{ + PyObject *item = PyUnicode_FromString(NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(func))); + if (item) { + PyList_Append(list, item); + } +} diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 48e89915c..e6414e29e 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -2698,17 +2698,17 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void /* process elements using glibc for large elements */ if (my_trig_op == npy_compute_cos) { - for (int ii = 0; iglibc_mask != 0; ii++) { + for (int ii = 0, jj = 0; iglibc_mask != 0; ii++, jj += stride) { if (iglibc_mask & 0x01) { - op[ii] = npy_cosf(ip[ii]); + op[ii] = npy_cosf(ip[jj]); } iglibc_mask = iglibc_mask >> 1; } } else { - for (int ii = 0; iglibc_mask != 0; ii++) { + for (int ii = 0, jj = 0; iglibc_mask != 0; ii++, jj += stride) { if (iglibc_mask & 0x01) { - op[ii] = npy_sinf(ip[ii]); + op[ii] = npy_sinf(ip[jj]); } iglibc_mask = iglibc_mask >> 1; } diff --git a/numpy/core/tests/test_api.py b/numpy/core/tests/test_api.py index 2600d409a..5d079d9d2 100644 --- a/numpy/core/tests/test_api.py +++ b/numpy/core/tests/test_api.py @@ -291,7 +291,7 @@ def test_array_astype_warning(t): @pytest.mark.parametrize(["dtype", "out_dtype"], [(np.bytes_, np.bool_), - (np.unicode, np.bool_), + (np.unicode_, np.bool_), (np.dtype("S10,S9"), np.dtype("?,?"))]) def test_string_to_boolean_cast(dtype, out_dtype): """ @@ -305,7 +305,7 @@ def test_string_to_boolean_cast(dtype, out_dtype): @pytest.mark.parametrize(["dtype", "out_dtype"], [(np.bytes_, np.bool_), - (np.unicode, np.bool_), + (np.unicode_, np.bool_), (np.dtype("S10,S9"), np.dtype("?,?"))]) def test_string_to_boolean_cast_errors(dtype, out_dtype): """ diff --git a/numpy/core/tests/test_array_coercion.py b/numpy/core/tests/test_array_coercion.py new file mode 100644 index 000000000..b8d4b5cdf --- /dev/null +++ b/numpy/core/tests/test_array_coercion.py @@ -0,0 +1,577 @@ +""" +Tests for array coercion, mainly through testing `np.array` results directly. +Note that other such tests exist e.g. in `test_api.py` and many corner-cases +are tested (sometimes indirectly) elsewhere. +""" + +import pytest +from pytest import param + +from itertools import product + +import numpy as np +from numpy.core._rational_tests import rational + +from numpy.testing import ( + assert_array_equal, assert_warns, IS_PYPY) + + +def arraylikes(): + """ + Generator for functions converting an array into various array-likes. + If full is True (default) includes array-likes not capable of handling + all dtypes + """ + # base array: + def ndarray(a): + return a + + yield param(ndarray, id="ndarray") + + # subclass: + class MyArr(np.ndarray): + pass + + def subclass(a): + return a.view(MyArr) + + yield subclass + + # Array-interface + class ArrayDunder: + def __init__(self, a): + self.a = a + + def __array__(self, dtype=None): + return self.a + + yield param(ArrayDunder, id="__array__") + + # memory-view + yield param(memoryview, id="memoryview") + + # Array-interface + class ArrayInterface: + def __init__(self, a): + self.a = a # need to hold on to keep interface valid + self.__array_interface__ = a.__array_interface__ + + yield param(ArrayInterface, id="__array_interface__") + + # Array-Struct + class ArrayStruct: + def __init__(self, a): + self.a = a # need to hold on to keep struct valid + self.__array_struct__ = a.__array_struct__ + + yield param(ArrayStruct, id="__array_struct__") + + +def scalar_instances(times=True, extended_precision=True, user_dtype=True): + # Hard-coded list of scalar instances. + # Floats: + yield param(np.sqrt(np.float16(5)), id="float16") + yield param(np.sqrt(np.float32(5)), id="float32") + yield param(np.sqrt(np.float64(5)), id="float64") + if extended_precision: + yield param(np.sqrt(np.longdouble(5)), id="longdouble") + + # Complex: + yield param(np.sqrt(np.complex64(2+3j)), id="complex64") + yield param(np.sqrt(np.complex128(2+3j)), id="complex128") + if extended_precision: + yield param(np.sqrt(np.longcomplex(2+3j)), id="clongdouble") + + # Bool: + # XFAIL: Bool should be added, but has some bad properties when it + # comes to strings, see also gh-9875 + # yield param(np.bool_(0), id="bool") + + # Integers: + yield param(np.int8(2), id="int8") + yield param(np.int16(2), id="int16") + yield param(np.int32(2), id="int32") + yield param(np.int64(2), id="int64") + + yield param(np.uint8(2), id="uint8") + yield param(np.uint16(2), id="uint16") + yield param(np.uint32(2), id="uint32") + yield param(np.uint64(2), id="uint64") + + # Rational: + if user_dtype: + yield param(rational(1, 2), id="rational") + + # Cannot create a structured void scalar directly: + structured = np.array([(1, 3)], "i,i")[0] + assert isinstance(structured, np.void) + assert structured.dtype == np.dtype("i,i") + yield param(structured, id="structured") + + if times: + # Datetimes and timedelta + yield param(np.timedelta64(2), id="timedelta64[generic]") + yield param(np.timedelta64(23, "s"), id="timedelta64[s]") + yield param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)") + + yield param(np.datetime64("NaT"), id="datetime64[generic](NaT)") + yield param(np.datetime64("2020-06-07 12:43", "ms"), id="datetime64[ms]") + + # Strings and unstructured void: + yield param(np.bytes_(b"1234"), id="bytes") + yield param(np.unicode_("2345"), id="unicode") + yield param(np.void(b"4321"), id="unstructured_void") + + +def is_parametric_dtype(dtype): + """Returns True if the the dtype is a parametric legacy dtype (itemsize + is 0, or a datetime without units) + """ + if dtype.itemsize == 0: + return True + if issubclass(dtype.type, (np.datetime64, np.timedelta64)): + if dtype.name.endswith("64"): + # Generic time units + return True + return False + + +class TestStringDiscovery: + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_basic_stringlength(self, obj): + if not isinstance(obj, (str, int)): + pytest.xfail( + "The Single object (first assert) uses a different branch " + "and thus gives a different result (either wrong or longer" + "string than normally discovered).") + + length = len(str(obj)) + expected = np.dtype(f"S{length}") + + assert np.array(obj, dtype="S").dtype == expected + assert np.array([obj], dtype="S").dtype == expected + + # A nested array is also discovered correctly + arr = np.array(obj, dtype="O") + assert np.array(arr, dtype="S").dtype == expected + + @pytest.mark.xfail(reason="Only single array unpacking is supported") + @pytest.mark.parametrize("obj", + [object(), 1.2, 10**43, None, "string"], + ids=["object", "1.2", "10**43", "None", "string"]) + def test_nested_arrays_stringlength(self, obj): + length = len(str(obj)) + expected = np.dtype(f"S{length}") + arr = np.array(obj, dtype="O") + assert np.array([arr, arr], dtype="S").dtype == expected + + @pytest.mark.xfail(reason="Only single array unpacking is supported") + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_unpack_first_level(self, arraylike): + # We unpack exactly one level of array likes + obj = np.array([None]) + obj[0] = np.array(1.2) + # the length of the included item, not of the float dtype + length = len(str(obj[0])) + expected = np.dtype(f"S{length}") + + obj = arraylike(obj) + # casting to string usually calls str(obj) + arr = np.array([obj], dtype="S") + assert arr.shape == (1, 1) + assert arr.dtype == expected + + +class TestScalarDiscovery: + def test_void_special_case(self): + # Void dtypes with structures discover tuples as elements + arr = np.array((1, 2, 3), dtype="i,i,i") + assert arr.shape == () + arr = np.array([(1, 2, 3)], dtype="i,i,i") + assert arr.shape == (1,) + + def test_char_special_case(self): + arr = np.array("string", dtype="c") + assert arr.shape == (6,) + assert arr.dtype.char == "c" + arr = np.array(["string"], dtype="c") + assert arr.shape == (1, 6) + assert arr.dtype.char == "c" + + def test_char_special_case_deep(self): + # Check that the character special case errors correctly if the + # array is too deep: + nested = ["string"] # 2 dimensions (due to string being sequence) + for i in range(np.MAXDIMS - 2): + nested = [nested] + + arr = np.array(nested, dtype='c') + assert arr.shape == (1,) * (np.MAXDIMS - 1) + (6,) + with pytest.raises(ValueError): + np.array([nested], dtype="c") + + def test_unknown_object(self): + arr = np.array(object()) + assert arr.shape == () + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar(self, scalar): + arr = np.array(scalar) + assert arr.shape == () + assert arr.dtype == scalar.dtype + + if type(scalar) is np.bytes_: + pytest.xfail("Nested bytes use len(str(scalar)) currently.") + + arr = np.array([[scalar, scalar]]) + assert arr.shape == (1, 2) + assert arr.dtype == scalar.dtype + + # Additionally to string this test also runs into a corner case + # with datetime promotion (the difference is the promotion order). + @pytest.mark.xfail(reason="Coercion to string is not symmetric") + def test_scalar_promotion(self): + for sc1, sc2 in product(scalar_instances(), scalar_instances()): + sc1, sc2 = sc1.values[0], sc2.values[0] + # test all combinations: + arr = np.array([sc1, sc2]) + assert arr.shape == (2,) + try: + dt1, dt2 = sc1.dtype, sc2.dtype + expected_dtype = np.promote_types(dt1, dt2) + assert arr.dtype == expected_dtype + except TypeError as e: + # Will currently always go to object dtype + assert arr.dtype == np.dtype("O") + + @pytest.mark.parametrize("scalar", scalar_instances()) + def test_scalar_coercion(self, scalar): + # This tests various scalar coercion paths, mainly for the numerical + # types. It includes some paths not directly related to `np.array` + if isinstance(scalar, np.inexact): + # Ensure we have a full-precision number if available + scalar = type(scalar)((scalar * 2)**0.5) + + if is_parametric_dtype(scalar.dtype) or type(scalar) is rational: + # datetime with unit will be named "datetime64[unit]" + # Rational generally fails due to a missing cast. In the future + # object casts should automatically be defined based on `setitem`. + pytest.xfail("0-D object array to a unit-less datetime cast fails") + + # Use casting from object: + arr = np.array(scalar, dtype=object).astype(scalar.dtype) + + # Test various ways to create an array containing this scalar: + arr1 = np.array(scalar).reshape(1) + arr2 = np.array([scalar]) + arr3 = np.empty(1, dtype=scalar.dtype) + arr3[0] = scalar + arr4 = np.empty(1, dtype=scalar.dtype) + arr4[:] = [scalar] + # All of these methods should yield the same results + assert_array_equal(arr, arr1) + assert_array_equal(arr, arr2) + assert_array_equal(arr, arr3) + assert_array_equal(arr, arr4) + + @pytest.mark.xfail(IS_PYPY, reason="`int(np.complex128(3))` fails on PyPy") + @pytest.mark.filterwarnings("ignore::numpy.ComplexWarning") + # After change, can enable times here, and below and it will work, + # Right now times are too complex, so map out some details below. + @pytest.mark.parametrize("cast_to", scalar_instances(times=False)) + def test_scalar_coercion_same_as_cast_and_assignment(self, cast_to): + """ + Test that in most cases: + * `np.array(scalar, dtype=dtype)` + * `np.empty((), dtype=dtype)[()] = scalar` + * `np.array(scalar).astype(dtype)` + should behave the same. The only exceptions are paramteric dtypes + (mainly datetime/timedelta without unit) and void without fields. + """ + dtype = cast_to.dtype # use to parametrize only the target dtype + + # XFAIL: Some extended precision tests fail, because assigning to + # complex256 will use float(float128). Rational fails currently. + for scalar in scalar_instances( + times=False, extended_precision=False, user_dtype=False): + scalar = scalar.values[0] + + if dtype.type == np.void: + if scalar.dtype.fields is not None and dtype.fields is None: + # Here, coercion to "V6" works, but the cast fails. + # Since the types are identical, SETITEM takes care of + # this, but has different rules than the cast. + with pytest.raises(TypeError): + np.array(scalar).astype(dtype) + # XFAIL: np.array(scalar, dtype=dtype) + np.array([scalar], dtype=dtype) + continue + + # The main test, we first try to use casting and if it succeeds + # continue below testing that things are the same, otherwise + # test that the alternative paths at least also fail. + try: + cast = np.array(scalar).astype(dtype) + except (TypeError, ValueError, RuntimeError): + # coercion should also raise (error type may change) + with pytest.raises(Exception): + np.array(scalar, dtype=dtype) + # assignment should also raise + res = np.zeros((), dtype=dtype) + with pytest.raises(Exception): + res[()] = scalar + + return + + # Non error path: + arr = np.array(scalar, dtype=dtype) + assert_array_equal(arr, cast) + # assignment behaves the same + ass = np.zeros((), dtype=dtype) + ass[()] = scalar + assert_array_equal(ass, cast) + + +class TestTimeScalars: + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64("NaT", "s"), id="timedelta64[s](NaT)"), + param(np.timedelta64(123, "s"), id="timedelta64[s]"), + param(np.datetime64("NaT", "generic"), id="datetime64[generic](NaT)"), + param(np.datetime64(1, "D"), id="datetime64[D]")],) + @pytest.mark.xfail( + reason="This uses int(scalar) or float(scalar) to assign, which " + "fails. However, casting currently does not fail.") + def test_coercion_basic(self, dtype, scalar): + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + ass = np.ones((), dtype=dtype) + ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) + + assert_array_equal(arr, cast) + assert_array_equal(cast, cast) + + @pytest.mark.parametrize("dtype", [np.int64, np.float32]) + @pytest.mark.parametrize("scalar", + [param(np.timedelta64(123, "ns"), id="timedelta64[ns]"), + param(np.timedelta64(12, "generic"), id="timedelta64[generic]")]) + def test_coercion_timedelta_convert_to_number(self, dtype, scalar): + # Only "ns" and "generic" timedeltas can be converted to numbers + # so these are slightly special. + arr = np.array(scalar, dtype=dtype) + cast = np.array(scalar).astype(dtype) + ass = np.ones((), dtype=dtype) + ass[()] = scalar # raises, as would np.array([scalar], dtype=dtype) + + assert_array_equal(arr, cast) + assert_array_equal(cast, cast) + + @pytest.mark.parametrize(["val", "unit"], + [param(123, "s", id="[s]"), param(123, "D", id="[D]")]) + @pytest.mark.parametrize("scalar_type", [np.datetime64, np.timedelta64]) + @pytest.mark.xfail(reason="Error not raised for assignment") + def test_coercion_assignment_times(self, scalar_type, val, unit): + scalar = scalar_type(val, unit) + + # The error type is not ideal, fails because string is too short: + with pytest.raises(RuntimeError): + np.array(scalar, dtype="S6") + with pytest.raises(RuntimeError): + cast = np.array(scalar).astype("S6") + ass = np.ones((), dtype="S6") + with pytest.raises(RuntimeError): + ass[()] = scalar + + +class TestNested: + @pytest.mark.xfail(reason="No deprecation warning given.") + def test_nested_simple(self): + initial = [1.2] + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + arr = np.array(nested, dtype="float64") + assert arr.shape == (1,) * np.MAXDIMS + with pytest.raises(ValueError): + np.array([nested], dtype="float64") + + # We discover object automatically at this time: + with assert_warns(np.VisibleDeprecationWarning): + arr = np.array([nested]) + assert arr.dtype == np.dtype("O") + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() is initial + + def test_pathological_self_containing(self): + # Test that this also works for two nested sequences + l = [] + l.append(l) + arr = np.array([l, l, l], dtype=object) + assert arr.shape == (3,) + (1,) * (np.MAXDIMS - 1) + + # Also check a ragged case: + arr = np.array([l, [None], l], dtype=object) + assert arr.shape == (3, 1) + + @pytest.mark.xfail( + reason="For arrays and memoryview, this used to not complain " + "and assign to a too small array instead. For other " + "array-likes the error is different because fewer (only " + "MAXDIM-1) dimensions are found, failing the last test.") + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_nested_arraylikes(self, arraylike): + # We try storing an array like into an array, but the array-like + # will have too many dimensions. This means the shape discovery + # decides that the array-like must be treated as an object (a special + # case of ragged discovery). The result will be an array with one + # dimension less than the maximum dimensions, and the array being + # assigned to it (which does work for object or if `float(arraylike)` + # works). + initial = arraylike(np.ones((1, 1))) + #if not isinstance(initial, (np.ndarray, memoryview)): + # pytest.xfail( + # "When coercing to object, these cases currently discover " + # "fewer dimensions than ndarray failing the second part.") + + nested = initial + for i in range(np.MAXDIMS - 1): + nested = [nested] + + with pytest.raises(ValueError): + # It will refuse to assign the array into + np.array(nested, dtype="float64") + + # If this is object, we end up assigning a (1, 1) array into (1,) + # (due to running out of dimensions), this is currently supported but + # a special case which is not ideal. + arr = np.array(nested, dtype=object) + assert arr.shape == (1,) * np.MAXDIMS + assert arr.item() == np.array(initial).item() + + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_uneven_depth_ragged(self, arraylike): + arr = np.arange(4).reshape((2, 2)) + arr = arraylike(arr) + + # Array is ragged in the second dimension already: + out = np.array([arr, [arr]], dtype=object) + assert out.shape == (2,) + assert out[0] is arr + assert type(out[1]) is list + + if not isinstance(arr, (np.ndarray, memoryview)): + pytest.xfail( + "does not raise ValueError below, because it discovers " + "the dimension as (2,) and not (2, 2, 2)") + + # Array is ragged in the third dimension: + with pytest.raises(ValueError): + # This is a broadcast error during assignment, because + # the array shape would be (2, 2, 2) but `arr[0, 0] = arr` fails. + np.array([arr, [arr, arr]], dtype=object) + + def test_empty_sequence(self): + arr = np.array([[], [1], [[1]]], dtype=object) + assert arr.shape == (3,) + + # The empty sequence stops further dimension discovery, so the + # result shape will be (0,) which leads to an error during: + with pytest.raises(ValueError): + np.array([[], np.empty((0, 1))], dtype=object) + + +class TestBadSequences: + # These are tests for bad objects passed into `np.array`, in general + # these have undefined behaviour. In the old code they partially worked + # when now they will fail. We could (and maybe should) create a copy + # of all sequences to be safe against bad-actors. + + def test_growing_list(self): + # List to coerce, `mylist` will append to it during coercion + obj = [] + class mylist(list): + def __len__(self): + obj.append([1, 2]) + return super().__len__() + + obj.append(mylist([1, 2])) + + with pytest.raises(ValueError): # changes to RuntimeError + np.array(obj) + + # Note: We do not test a shrinking list. These do very evil things + # and the only way to fix them would be to copy all sequences. + # (which may be a real option in the future). + + def test_mutated_list(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class mylist(list): + def __len__(self): + obj[0] = [2, 3] # replace with a different list. + return super().__len__() + + obj.append([2, 3]) + obj.append(mylist([1, 2])) + #with pytest.raises(RuntimeError): # Will error in the future + np.array(obj) + + def test_replace_0d_array(self): + # List to coerce, `mylist` will mutate the first element + obj = [] + class baditem: + def __len__(self): + obj[0][0] = 2 # replace with a different list. + raise ValueError("not actually a sequence!") + + def __getitem__(self): + pass + + # Runs into a corner case in the new code, the `array(2)` is cached + # so replacing it invalidates the cache. + obj.append([np.array(2), baditem()]) + # with pytest.raises(RuntimeError): # Will error in the future + np.array(obj) + + +class TestArrayLikes: + @pytest.mark.parametrize("arraylike", arraylikes()) + def test_0d_object_special_case(self, arraylike): + arr = np.array(0.) + obj = arraylike(arr) + # A single array-like is always converted: + res = np.array(obj, dtype=object) + assert_array_equal(arr, res) + + # But a single 0-D nested array-like never: + res = np.array([obj], dtype=object) + assert res[0] is obj + + def test_0d_generic_special_case(self): + class ArraySubclass(np.ndarray): + def __float__(self): + raise TypeError("e.g. quantities raise on this") + + arr = np.array(0.) + obj = arr.view(ArraySubclass) + res = np.array(obj) + # The subclass is simply cast: + assert_array_equal(arr, res) + + # If the 0-D array-like is included, __float__ is currently + # guaranteed to be used. We may want to change that, quantities + # and masked arrays half make use of this. + with pytest.raises(TypeError): + np.array([obj]) + + # The same holds for memoryview: + obj = memoryview(arr) + res = np.array(obj) + assert_array_equal(arr, res) + with pytest.raises(ValueError): + # The error type does not matter much here. + np.array([obj]) diff --git a/numpy/core/tests/test_cpu_dispatcher.py b/numpy/core/tests/test_cpu_dispatcher.py new file mode 100644 index 000000000..8712dee1a --- /dev/null +++ b/numpy/core/tests/test_cpu_dispatcher.py @@ -0,0 +1,42 @@ +from numpy.core._multiarray_umath import __cpu_features__, __cpu_baseline__, __cpu_dispatch__ +from numpy.core import _umath_tests +from numpy.testing import assert_equal + +def test_dispatcher(): + """ + Testing the utilites of the CPU dispatcher + """ + targets = ( + "SSE2", "SSE41", "AVX2", + "VSX", "VSX2", "VSX3", + "NEON", "ASIMD", "ASIMDHP" + ) + highest_sfx = "" # no suffix for the baseline + all_sfx = [] + for feature in reversed(targets): + # skip baseline features, by the default `CCompilerOpt` do not generate separated objects + # for the baseline, just one object combined all of them via 'baseline' option + # within the configuration statments. + if feature in __cpu_baseline__: + continue + # check compiler and running machine support + if feature not in __cpu_dispatch__ or not __cpu_features__[feature]: + continue + + if not highest_sfx: + highest_sfx = "_" + feature + all_sfx.append("func" + "_" + feature) + + test = _umath_tests.test_dispatch() + assert_equal(test["func"], "func" + highest_sfx) + assert_equal(test["var"], "var" + highest_sfx) + + if highest_sfx: + assert_equal(test["func_xb"], "func" + highest_sfx) + assert_equal(test["var_xb"], "var" + highest_sfx) + else: + assert_equal(test["func_xb"], "nobase") + assert_equal(test["var_xb"], "nobase") + + all_sfx.append("func") # add the baseline + assert_equal(test["all"], all_sfx) diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 438d52f97..fef1e24d8 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -775,6 +775,12 @@ class TestDateTime: np.dtype('m8[Y]'), np.dtype('m8[D]')) assert_raises(TypeError, np.promote_types, np.dtype('m8[M]'), np.dtype('m8[W]')) + # timedelta and float cannot be safely cast with each other + assert_raises(TypeError, np.promote_types, "float32", "m8") + assert_raises(TypeError, np.promote_types, "m8", "float32") + assert_raises(TypeError, np.promote_types, "uint64", "m8") + assert_raises(TypeError, np.promote_types, "m8", "uint64") + # timedelta <op> timedelta may overflow with big unit ranges assert_raises(OverflowError, np.promote_types, np.dtype('m8[W]'), np.dtype('m8[fs]')) diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 01924410f..239d20c9d 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -9,6 +9,7 @@ import warnings import pytest import tempfile import re +import sys import numpy as np from numpy.testing import ( @@ -313,19 +314,14 @@ class TestBinaryReprInsufficientWidthParameterForRepresentation(_DeprecationTest class TestNumericStyleTypecodes(_DeprecationTestCase): """ - Deprecate the old numeric-style dtypes, which are especially - confusing for complex types, e.g. Complex32 -> complex64. When the - deprecation cycle is complete, the check for the strings should be - removed from PyArray_DescrConverter in descriptor.c, and the - deprecated keys should not be added as capitalized aliases in - _add_aliases in numerictypes.py. + Most numeric style typecodes were previously deprecated (and removed) + in 1.20. This also deprecates the remaining ones. """ + # 2020-06-09, NumPy 1.20 def test_all_dtypes(self): - deprecated_types = [ - 'Bool', 'Complex32', 'Complex64', 'Float16', 'Float32', 'Float64', - 'Int8', 'Int16', 'Int32', 'Int64', 'Object0', 'Timedelta64', - 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'Void0' - ] + deprecated_types = ['Bytes0', 'Datetime64', 'Str0'] + # Depending on intp size, either Uint32 or Uint64 is defined: + deprecated_types.append(f"U{np.dtype(np.intp).name}") for dt in deprecated_types: self.assert_deprecated(np.dtype, exceptions=(TypeError,), args=(dt,)) @@ -438,14 +434,6 @@ class TestGeneratorSum(_DeprecationTestCase): self.assert_deprecated(np.sum, args=((i for i in range(5)),)) -class TestSctypeNA(_VisibleDeprecationTestCase): - # 2018-06-24, 1.16 - def test_sctypeNA(self): - self.assert_deprecated(lambda: np.sctypeNA['?']) - self.assert_deprecated(lambda: np.typeNA['?']) - self.assert_deprecated(lambda: np.typeNA.get('?')) - - class TestPositiveOnNonNumerical(_DeprecationTestCase): # 2018-06-28, 1.16.0 def test_positive_on_non_number(self): @@ -655,3 +643,22 @@ class TestNonExactMatchDeprecation(_DeprecationTestCase): self.assert_deprecated(lambda: np.ravel_multi_index(arr, (7, 6), mode='Cilp')) # using completely different word with first character as R self.assert_deprecated(lambda: np.searchsorted(arr[0], 4, side='Random')) + + +class TestDeprecatedGlobals(_DeprecationTestCase): + # 2020-06-06 + @pytest.mark.skipif( + sys.version_info < (3, 7), + reason='module-level __getattr__ not supported') + def test_type_aliases(self): + # from builtins + self.assert_deprecated(lambda: np.bool) + self.assert_deprecated(lambda: np.int) + self.assert_deprecated(lambda: np.float) + self.assert_deprecated(lambda: np.complex) + self.assert_deprecated(lambda: np.object) + self.assert_deprecated(lambda: np.str) + + # from np.compat + self.assert_deprecated(lambda: np.long) + self.assert_deprecated(lambda: np.unicode) diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index 73aa01de6..2e2b0dbe2 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -86,6 +86,15 @@ class TestBuiltin: assert_raises(TypeError, np.dtype, 'q8') assert_raises(TypeError, np.dtype, 'Q8') + @pytest.mark.parametrize("dtype", + ['Bool', 'Complex32', 'Complex64', 'Float16', 'Float32', 'Float64', + 'Int8', 'Int16', 'Int32', 'Int64', 'Object0', 'Timedelta64', + 'UInt8', 'UInt16', 'UInt32', 'UInt64', 'Void0', + "Float128", "Complex128"]) + def test_numeric_style_types_are_invalid(self, dtype): + with assert_raises(TypeError): + np.dtype(dtype) + @pytest.mark.parametrize( 'value', ['m8', 'M8', 'datetime64', 'timedelta64', @@ -1047,6 +1056,11 @@ def test_invalid_dtype_string(): assert_raises(TypeError, np.dtype, u'Fl\xfcgel') +def test_keyword_argument(): + # test for https://github.com/numpy/numpy/pull/16574#issuecomment-642660971 + assert np.dtype(dtype=np.float64) == np.dtype(np.float64) + + class TestFromDTypeAttribute: def test_simple(self): class dt: @@ -1324,4 +1338,3 @@ class TestFromCTypes: pair_type = np.dtype('{},{}'.format(*pair)) expected = np.dtype([('f0', pair[0]), ('f1', pair[1])]) assert_equal(pair_type, expected) - diff --git a/numpy/core/tests/test_function_base.py b/numpy/core/tests/test_function_base.py index 2197ef0cd..62a9772c8 100644 --- a/numpy/core/tests/test_function_base.py +++ b/numpy/core/tests/test_function_base.py @@ -1,6 +1,6 @@ from numpy import ( logspace, linspace, geomspace, dtype, array, sctypes, arange, isnan, - ndarray, sqrt, nextafter, stack + ndarray, sqrt, nextafter, stack, errstate ) from numpy.testing import ( assert_, assert_equal, assert_raises, assert_array_equal, assert_allclose, @@ -113,6 +113,40 @@ class TestGeomspace: assert_array_equal(y, [-100, -10, -1]) assert_array_equal(y.imag, 0) + def test_boundaries_match_start_and_stop_exactly(self): + # make sure that the boundaries of the returned array exactly + # equal 'start' and 'stop' - this isn't obvious because + # np.exp(np.log(x)) isn't necessarily exactly equal to x + start = 0.3 + stop = 20.3 + + y = geomspace(start, stop, num=1) + assert_equal(y[0], start) + + y = geomspace(start, stop, num=1, endpoint=False) + assert_equal(y[0], start) + + y = geomspace(start, stop, num=3) + assert_equal(y[0], start) + assert_equal(y[-1], stop) + + y = geomspace(start, stop, num=3, endpoint=False) + assert_equal(y[0], start) + + def test_nan_interior(self): + with errstate(invalid='ignore'): + y = geomspace(-3, 3, num=4) + + assert_equal(y[0], -3.0) + assert_(isnan(y[1:-1]).all()) + assert_equal(y[3], 3.0) + + with errstate(invalid='ignore'): + y = geomspace(-3, 3, num=4, endpoint=False) + + assert_equal(y[0], -3.0) + assert_(isnan(y[1:]).all()) + def test_complex(self): # Purely imaginary y = geomspace(1j, 16j, num=5) diff --git a/numpy/core/tests/test_indexing.py b/numpy/core/tests/test_indexing.py index 4bb5cb11a..f6e263774 100644 --- a/numpy/core/tests/test_indexing.py +++ b/numpy/core/tests/test_indexing.py @@ -370,6 +370,20 @@ class TestIndexing: a[...] = s assert_((a == 1).all()) + def test_array_like_values(self): + # Similar to the above test, but use a memoryview instead + a = np.zeros((5, 5)) + s = np.arange(25, dtype=np.float64).reshape(5, 5) + + a[[0, 1, 2, 3, 4], :] = memoryview(s) + assert_array_equal(a, s) + + a[:, [0, 1, 2, 3, 4]] = memoryview(s) + assert_array_equal(a, s) + + a[...] = memoryview(s) + assert_array_equal(a, s) + def test_subclass_writeable(self): d = np.rec.array([('NGC1001', 11), ('NGC1002', 1.), ('NGC1003', 1.)], dtype=[('target', 'S20'), ('V_mag', '>f4')]) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index e116077f9..09adddf6d 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4702,6 +4702,10 @@ class TestIO: e = np.array([-25041670086757, 104783749223640], dtype=np.int64) assert_array_equal(d, e) + def test_fromstring_count0(self): + d = np.fromstring("1,2", sep=",", dtype=np.int64, count=0) + assert d.shape == (0,) + def test_empty_files_binary(self): with open(self.filename, 'w') as f: pass diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index 96a6d810f..cf18a5d93 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -42,13 +42,6 @@ class TestRegression: b = pickle.load(f) assert_array_equal(a, b) - def test_typeNA(self): - # Issue gh-515 - with suppress_warnings() as sup: - sup.filter(np.VisibleDeprecationWarning) - assert_equal(np.typeNA[np.int64], 'Int64') - assert_equal(np.typeNA[np.uint64], 'UInt64') - def test_dtype_names(self): # Ticket #35 # Should succeed diff --git a/numpy/core/tests/test_scalar_ctors.py b/numpy/core/tests/test_scalar_ctors.py index 7645a0853..7e933537d 100644 --- a/numpy/core/tests/test_scalar_ctors.py +++ b/numpy/core/tests/test_scalar_ctors.py @@ -65,7 +65,7 @@ class TestExtraArgs: def test_bool(self): with pytest.raises(TypeError): - np.bool(False, garbage=True) + np.bool_(False, garbage=True) def test_void(self): with pytest.raises(TypeError): @@ -79,3 +79,37 @@ class TestFromInt: def test_uint64_from_negative(self): assert_equal(np.uint64(-2), np.uint64(18446744073709551614)) + + +int_types = [np.byte, np.short, np.intc, np.int_, np.longlong] +uint_types = [np.ubyte, np.ushort, np.uintc, np.uint, np.ulonglong] +float_types = [np.half, np.single, np.double, np.longdouble] +cfloat_types = [np.csingle, np.cdouble, np.clongdouble] + + +class TestArrayFromScalar: + """ gh-15467 """ + + def _do_test(self, t1, t2): + x = t1(2) + arr = np.array(x, dtype=t2) + # type should be preserved exactly + if t2 is None: + assert arr.dtype.type is t1 + else: + assert arr.dtype.type is t2 + + @pytest.mark.parametrize('t1', int_types + uint_types) + @pytest.mark.parametrize('t2', int_types + uint_types + [None]) + def test_integers(self, t1, t2): + return self._do_test(t1, t2) + + @pytest.mark.parametrize('t1', float_types) + @pytest.mark.parametrize('t2', float_types + [None]) + def test_reals(self, t1, t2): + return self._do_test(t1, t2) + + @pytest.mark.parametrize('t1', cfloat_types) + @pytest.mark.parametrize('t2', cfloat_types + [None]) + def test_complex(self, t1, t2): + return self._do_test(t1, t2) diff --git a/numpy/core/tests/test_scalarbuffer.py b/numpy/core/tests/test_scalarbuffer.py index b1c1bbbb1..574c56864 100644 --- a/numpy/core/tests/test_scalarbuffer.py +++ b/numpy/core/tests/test_scalarbuffer.py @@ -2,6 +2,7 @@ Test scalar buffer interface adheres to PEP 3118 """ import numpy as np +from numpy.core._rational_tests import rational import pytest from numpy.testing import assert_, assert_equal, assert_raises @@ -117,3 +118,8 @@ class TestScalarPEP3118: code_points = np.frombuffer(v, dtype='i4') assert_equal(code_points, [ord(c) for c in s]) + + def test_user_scalar_fails_buffer(self): + r = rational(1) + with assert_raises(TypeError): + memoryview(r) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 91acd6ac3..f836af168 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -890,15 +890,17 @@ class TestAVXFloat32Transcendental: sizes = np.arange(2,100) for ii in sizes: x_f32 = np.float32(np.random.uniform(low=0.01,high=88.1,size=ii)) + x_f32_large = x_f32.copy() + x_f32_large[3:-1:4] = 120000.0 exp_true = np.exp(x_f32) log_true = np.log(x_f32) - sin_true = np.sin(x_f32) - cos_true = np.cos(x_f32) + sin_true = np.sin(x_f32_large) + cos_true = np.cos(x_f32_large) for jj in strides: assert_array_almost_equal_nulp(np.exp(x_f32[::jj]), exp_true[::jj], nulp=2) assert_array_almost_equal_nulp(np.log(x_f32[::jj]), log_true[::jj], nulp=2) - assert_array_almost_equal_nulp(np.sin(x_f32[::jj]), sin_true[::jj], nulp=2) - assert_array_almost_equal_nulp(np.cos(x_f32[::jj]), cos_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.sin(x_f32_large[::jj]), sin_true[::jj], nulp=2) + assert_array_almost_equal_nulp(np.cos(x_f32_large[::jj]), cos_true[::jj], nulp=2) class TestLogAddExp(_FilterInvalids): def test_logaddexp_values(self): diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py index e3c2eb025..33080edbb 100644 --- a/numpy/core/tests/test_umath_accuracy.py +++ b/numpy/core/tests/test_umath_accuracy.py @@ -57,9 +57,3 @@ class TestAccuracy: outval = outval[perm] maxulperr = data_subset['ulperr'].max() assert_array_max_ulp(npfunc(inval), outval, maxulperr) - - def test_ignore_nan_ulperror(self): - # Ignore ULP differences between various NAN's - nan1_f32 = np.array(str_to_float('0xffffffff'), dtype=np.float32) - nan2_f32 = np.array(str_to_float('0x7fddbfbf'), dtype=np.float32) - assert_array_max_ulp(nan1_f32, nan2_f32, 0) diff --git a/numpy/core/tests/test_umath_complex.py b/numpy/core/tests/test_umath_complex.py index a21158420..a626219c5 100644 --- a/numpy/core/tests/test_umath_complex.py +++ b/numpy/core/tests/test_umath_complex.py @@ -545,25 +545,25 @@ class TestSpecialComplexAVX(object): @pytest.mark.parametrize("stride", [-4,-2,-1,1,2,4]) @pytest.mark.parametrize("astype", [np.complex64, np.complex128]) def test_array(self, stride, astype): - arr = np.array([np.complex(np.nan , np.nan), - np.complex(np.nan , np.inf), - np.complex(np.inf , np.nan), - np.complex(np.inf , np.inf), - np.complex(0. , np.inf), - np.complex(np.inf , 0.), - np.complex(0. , 0.), - np.complex(0. , np.nan), - np.complex(np.nan , 0.)], dtype=astype) + arr = np.array([complex(np.nan , np.nan), + complex(np.nan , np.inf), + complex(np.inf , np.nan), + complex(np.inf , np.inf), + complex(0. , np.inf), + complex(np.inf , 0.), + complex(0. , 0.), + complex(0. , np.nan), + complex(np.nan , 0.)], dtype=astype) abs_true = np.array([np.nan, np.inf, np.inf, np.inf, np.inf, np.inf, 0., np.nan, np.nan], dtype=arr.real.dtype) - sq_true = np.array([np.complex(np.nan, np.nan), - np.complex(np.nan, np.nan), - np.complex(np.nan, np.nan), - np.complex(np.nan, np.inf), - np.complex(-np.inf, np.nan), - np.complex(np.inf, np.nan), - np.complex(0., 0.), - np.complex(np.nan, np.nan), - np.complex(np.nan, np.nan)], dtype=astype) + sq_true = np.array([complex(np.nan, np.nan), + complex(np.nan, np.nan), + complex(np.nan, np.nan), + complex(np.nan, np.inf), + complex(-np.inf, np.nan), + complex(np.inf, np.nan), + complex(0., 0.), + complex(np.nan, np.nan), + complex(np.nan, np.nan)], dtype=astype) assert_equal(np.abs(arr[::stride]), abs_true[::stride]) with np.errstate(invalid='ignore'): assert_equal(np.square(arr[::stride]), sq_true[::stride]) diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py new file mode 100644 index 000000000..edf6c1ba0 --- /dev/null +++ b/numpy/distutils/ccompiler_opt.py @@ -0,0 +1,2463 @@ +"""Provides the `CCompilerOpt` class, used for handling the CPU/hardware +optimization, starting from parsing the command arguments, to managing the +relation between the CPU baseline and dispatch-able features, +also generating the required C headers and ending with compiling +the sources with proper compiler's flags. + +`CCompilerOpt` doesn't provide runtime detection for the CPU features, +instead only focuses on the compiler side, but it creates abstract C headers +that can be used later for the final runtime dispatching process.""" + +import sys, io, os, re, textwrap, pprint, inspect, atexit, subprocess + +class _Config: + """An abstract class holds all configurable attributes of `CCompilerOpt`, + these class attributes can be used to change the default behavior + of `CCompilerOpt` in order to fit other requirements. + + Attributes + ---------- + conf_nocache : bool + Set True to disable memory and file cache. + Default is False. + + conf_noopt : bool + Set True to forces the optimization to be disabled, + in this case `CCompilerOpt` tends to generate all + expected headers in order to 'not' break the build. + Default is False. + + conf_cache_factors : list + Add extra factors to the primary caching factors. The caching factors + are utilized to determine if there are changes had happened that + requires to discard the cache and re-updating it. The primary factors + are the arguments of `CCompilerOpt` and `CCompiler`'s properties(type, flags, etc). + Default is list of two items, containing the time of last modification + of `ccompiler_opt` and value of attribute "conf_noopt" + + conf_tmp_path : str, + The path of temporary directory. Default is auto-created + temporary directory via ``tempfile.mkdtemp()``. + + conf_check_path : str + The path of testing files. Each added CPU feature must have a + **C** source file contains at least one intrinsic or instruction that + related to this feature, so it can be tested against the compiler. + Default is ``./distutils/checks``. + + conf_target_groups : dict + Extra tokens that can be reached from dispatch-able sources through + the special mark ``@targets``. Default is an empty dictionary. + + **Notes**: + - case-insensitive for tokens and group names + - sign '#' must stick in the begin of group name and only within ``@targets`` + + **Example**: + .. code-block:: console + + $ "@targets #avx_group other_tokens" > group_inside.c + + >>> CCompilerOpt.conf_target_groups["avx_group"] = \\ + "$werror $maxopt avx2 avx512f avx512_skx" + >>> cco = CCompilerOpt(cc_instance) + >>> cco.try_dispatch(["group_inside.c"]) + + conf_c_prefix : str + The prefix of public C definitions. Default is ``"NPY_"``. + + conf_c_prefix_ : str + The prefix of internal C definitions. Default is ``"NPY__"``. + + conf_cc_flags : dict + Nested dictionaries defining several compiler flags + that linked to some major functions, the main key + represent the compiler name and sub-keys represent + flags names. Default is already covers all supported + **C** compilers. + + Sub-keys explained as follows: + + "native": str or None + used by argument option `native`, to detect the current + machine support via the compiler. + "werror": str or None + utilized to treat warning as errors during testing CPU features + against the compiler and also for target's policy `$werror` + via dispatch-able sources. + "maxopt": str or None + utilized for target's policy '$maxopt' and the value should + contains the maximum acceptable optimization by the compiler. + e.g. in gcc `'-O3'` + + **Notes**: + * case-sensitive for compiler names and flags + * use space to separate multiple flags + * any flag will tested against the compiler and it will skipped + if it's not applicable. + + conf_min_features : dict + A dictionary defines the used CPU features for + argument option `'min'`, the key represent the CPU architecture + name e.g. `'x86'`. Default values provide the best effort + on wide range of users platforms. + + **Note**: case-sensitive for architecture names. + + conf_features : dict + Nested dictionaries used for identifying the CPU features. + the primary key is represented as a feature name or group name + that gathers several features. Default values covers all + supported features but without the major options like "flags", + these undefined options handle it by method `conf_features_partial()`. + Default value is covers almost all CPU features for *X86*, *IBM/Power64* + and *ARM 7/8*. + + Sub-keys explained as follows: + + "implies" : str or list, optional, + List of CPU feature names to be implied by it, + the feature name must be defined within `conf_features`. + Default is None. + + "flags": str or list, optional + List of compiler flags. Default is None. + + "detect": str or list, optional + List of CPU feature names that required to be detected + in runtime. By default, its the feature name or features + in "group" if its specified. + + "implies_detect": bool, optional + If True, all "detect" of implied features will be combined. + Default is True. see `feature_detect()`. + + "group": str or list, optional + Same as "implies" but doesn't require the feature name to be + defined within `conf_features`. + + "interest": int, required + a key for sorting CPU features + + "headers": str or list, optional + intrinsics C header file + + "disable": str, optional + force disable feature, the string value should contains the + reason of disabling. + + "autovec": bool or None, optional + True or False to declare that CPU feature can be auto-vectorized + by the compiler. + By default(None), treated as True if the feature contains at + least one applicable flag. see `feature_can_autovec()` + + **NOTES**: + * space can be used as separator with options that supports "str or list" + * case-sensitive for all values and feature name must be in upper-case. + * if flags aren't applicable, its will skipped rather than disable the + CPU feature + * the CPU feature will disabled if the compiler fail to compile + the test file + """ + conf_nocache = False + conf_noopt = False + conf_cache_factors = None + conf_tmp_path = None + conf_check_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "checks" + ) + conf_target_groups = {} + conf_c_prefix = 'NPY_' + conf_c_prefix_ = 'NPY__' + conf_cc_flags = dict( + gcc = dict( + # native should always fail on arm and ppc64, + # native usually works only with x86 + native = '-march=native', + opt = '-O3', + werror = '-Werror' + ), + clang = dict( + native = '-march=native', + opt = "-O3", + werror = '-Werror' + ), + icc = dict( + native = '-xHost', + opt = '-O3', + werror = '-Werror' + ), + iccw = dict( + native = '/QxHost', + opt = '/O3', + werror = '/Werror' + ), + msvc = dict( + native = None, + opt = '/O2', + werror = '/WX' + ) + ) + conf_min_features = dict( + x86 = "SSE SSE2", + x64 = "SSE SSE2 SSE3", + ppc64 = '', # play it safe + ppc64le = "VSX VSX2", + armhf = '', # play it safe + aarch64 = "NEON NEON_FP16 NEON_VFPV4 ASIMD" + ) + conf_features = dict( + # X86 + SSE = dict( + interest=1, headers="xmmintrin.h", + # enabling SSE without SSE2 is useless also + # it's non-optional for x86_64 + implies="SSE2" + ), + SSE2 = dict(interest=2, implies="SSE", headers="emmintrin.h"), + SSE3 = dict(interest=3, implies="SSE2", headers="pmmintrin.h"), + SSSE3 = dict(interest=4, implies="SSE3", headers="tmmintrin.h"), + SSE41 = dict(interest=5, implies="SSSE3", headers="smmintrin.h"), + POPCNT = dict(interest=6, implies="SSE41", headers="popcntintrin.h"), + SSE42 = dict(interest=7, implies="POPCNT"), + AVX = dict( + interest=8, implies="SSE42", headers="immintrin.h", + implies_detect=False + ), + XOP = dict(interest=9, implies="AVX", headers="x86intrin.h"), + FMA4 = dict(interest=10, implies="AVX", headers="x86intrin.h"), + F16C = dict(interest=11, implies="AVX"), + FMA3 = dict(interest=12, implies="F16C"), + AVX2 = dict(interest=13, implies="F16C"), + AVX512F = dict(interest=20, implies="FMA3 AVX2", implies_detect=False), + AVX512CD = dict(interest=21, implies="AVX512F"), + AVX512_KNL = dict( + interest=40, implies="AVX512CD", group="AVX512ER AVX512PF", + detect="AVX512_KNL", implies_detect=False + ), + AVX512_KNM = dict( + interest=41, implies="AVX512_KNL", + group="AVX5124FMAPS AVX5124VNNIW AVX512VPOPCNTDQ", + detect="AVX512_KNM", implies_detect=False + ), + AVX512_SKX = dict( + interest=42, implies="AVX512CD", group="AVX512VL AVX512BW AVX512DQ", + detect="AVX512_SKX", implies_detect=False + ), + AVX512_CLX = dict( + interest=43, implies="AVX512_SKX", group="AVX512VNNI", + detect="AVX512_CLX" + ), + AVX512_CNL = dict( + interest=44, implies="AVX512_SKX", group="AVX512IFMA AVX512VBMI", + detect="AVX512_CNL", implies_detect=False + ), + AVX512_ICL = dict( + interest=45, implies="AVX512_CLX AVX512_CNL", + group="AVX512VBMI2 AVX512BITALG AVX512VPOPCNTDQ", + detect="AVX512_ICL", implies_detect=False + ), + # IBM/Power + ## Power7/ISA 2.06 + VSX = dict(interest=1, headers="altivec.h"), + ## Power8/ISA 2.07 + VSX2 = dict(interest=2, implies="VSX", implies_detect=False), + ## Power9/ISA 3.00 + VSX3 = dict(interest=3, implies="VSX2", implies_detect=False), + # ARM + NEON = dict(interest=1, headers="arm_neon.h"), + NEON_FP16 = dict(interest=2, implies="NEON"), + ## FMA + NEON_VFPV4 = dict(interest=3, implies="NEON_FP16"), + ## Advanced SIMD + ASIMD = dict(interest=4, implies="NEON_FP16 NEON_VFPV4", implies_detect=False), + ## ARMv8.2 half-precision & vector arithm + ASIMDHP = dict(interest=5, implies="ASIMD"), + ## ARMv8.2 dot product + ASIMDDP = dict(interest=6, implies="ASIMD"), + ## ARMv8.2 Single & half-precision Multiply + ASIMDFHM = dict(interest=7, implies="ASIMDHP"), + ) + def conf_features_partial(self): + """Return a dictionary of supported CPU features by the platform, + and accumulate the rest of undefined options in `conf_features`, + the returned dict has same rules and notes in + class attribute `conf_features`, also its override + any options that been set in 'conf_features'. + """ + if self.cc_noopt: + # optimization is disabled + return {} + + on_x86 = self.cc_on_x86 or self.cc_on_x64 + is_unix = self.cc_is_gcc or self.cc_is_clang + + if on_x86 and is_unix: return dict( + SSE = dict(flags="-msse"), + SSE2 = dict(flags="-msse2"), + SSE3 = dict(flags="-msse3"), + SSSE3 = dict(flags="-mssse3"), + SSE41 = dict(flags="-msse4.1"), + POPCNT = dict(flags="-mpopcnt"), + SSE42 = dict(flags="-msse4.2"), + AVX = dict(flags="-mavx"), + F16C = dict(flags="-mf16c"), + XOP = dict(flags="-mxop"), + FMA4 = dict(flags="-mfma4"), + FMA3 = dict(flags="-mfma"), + AVX2 = dict(flags="-mavx2"), + AVX512F = dict(flags="-mavx512f"), + AVX512CD = dict(flags="-mavx512cd"), + AVX512_KNL = dict(flags="-mavx512er -mavx512pf"), + AVX512_KNM = dict( + flags="-mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq" + ), + AVX512_SKX = dict(flags="-mavx512vl -mavx512bw -mavx512dq"), + AVX512_CLX = dict(flags="-mavx512vnni"), + AVX512_CNL = dict(flags="-mavx512ifma -mavx512vbmi"), + AVX512_ICL = dict( + flags="-mavx512vbmi2 -mavx512bitalg -mavx512vpopcntdq" + ) + ) + if on_x86 and self.cc_is_icc: return dict( + SSE = dict(flags="-msse"), + SSE2 = dict(flags="-msse2"), + SSE3 = dict(flags="-msse3"), + SSSE3 = dict(flags="-mssse3"), + SSE41 = dict(flags="-msse4.1"), + POPCNT = {}, + SSE42 = dict(flags="-msse4.2"), + AVX = dict(flags="-mavx"), + F16C = {}, + XOP = dict(disable="Intel Compiler doesn't support it"), + FMA4 = dict(disable="Intel Compiler doesn't support it"), + # Intel Compiler doesn't support AVX2 or FMA3 independently + FMA3 = dict( + implies="F16C AVX2", flags="-march=core-avx2" + ), + AVX2 = dict(implies="FMA3", flags="-march=core-avx2"), + # Intel Compiler doesn't support AVX512F or AVX512CD independently + AVX512F = dict( + implies="AVX2 AVX512CD", flags="-march=common-avx512" + ), + AVX512CD = dict( + implies="AVX2 AVX512F", flags="-march=common-avx512" + ), + AVX512_KNL = dict(flags="-xKNL"), + AVX512_KNM = dict(flags="-xKNM"), + AVX512_SKX = dict(flags="-xSKYLAKE-AVX512"), + AVX512_CLX = dict(flags="-xCASCADELAKE"), + AVX512_CNL = dict(flags="-xCANNONLAKE"), + AVX512_ICL = dict(flags="-xICELAKE-CLIENT"), + ) + if on_x86 and self.cc_is_iccw: return dict( + SSE = dict(flags="/arch:SSE"), + SSE2 = dict(flags="/arch:SSE2"), + SSE3 = dict(flags="/arch:SSE3"), + SSSE3 = dict(flags="/arch:SSSE3"), + SSE41 = dict(flags="/arch:SSE4.1"), + POPCNT = {}, + SSE42 = dict(flags="/arch:SSE4.2"), + AVX = dict(flags="/arch:AVX"), + F16C = {}, + XOP = dict(disable="Intel Compiler doesn't support it"), + FMA4 = dict(disable="Intel Compiler doesn't support it"), + # Intel Compiler doesn't support FMA3 or AVX2 independently + FMA3 = dict( + implies="F16C AVX2", flags="/arch:CORE-AVX2" + ), + AVX2 = dict( + implies="FMA3", flags="/arch:CORE-AVX2" + ), + # Intel Compiler doesn't support AVX512F or AVX512CD independently + AVX512F = dict( + implies="AVX2 AVX512CD", flags="/Qx:COMMON-AVX512" + ), + AVX512CD = dict( + implies="AVX2 AVX512F", flags="/Qx:COMMON-AVX512" + ), + AVX512_KNL = dict(flags="/Qx:KNL"), + AVX512_KNM = dict(flags="/Qx:KNM"), + AVX512_SKX = dict(flags="/Qx:SKYLAKE-AVX512"), + AVX512_CLX = dict(flags="/Qx:CASCADELAKE"), + AVX512_CNL = dict(flags="/Qx:CANNONLAKE"), + AVX512_ICL = dict(flags="/Qx:ICELAKE-CLIENT") + ) + if on_x86 and self.cc_is_msvc: return dict( + SSE = dict(flags="/arch:SSE"), + SSE2 = dict(flags="/arch:SSE2"), + SSE3 = {}, + SSSE3 = {}, + SSE41 = {}, + POPCNT = dict(headers="nmmintrin.h"), + SSE42 = {}, + AVX = dict(flags="/arch:AVX"), + F16C = {}, + XOP = dict(headers="ammintrin.h"), + FMA4 = dict(headers="ammintrin.h"), + # MSVC doesn't support FMA3 or AVX2 independently + FMA3 = dict( + implies="F16C AVX2", flags="/arch:AVX2" + ), + AVX2 = dict( + implies="F16C FMA3", flags="/arch:AVX2" + ), + # MSVC doesn't support AVX512F or AVX512CD independently, + # always generate instructions belong to (VL/VW/DQ) + AVX512F = dict( + implies="AVX2 AVX512CD AVX512_SKX", flags="/arch:AVX512" + ), + AVX512CD = dict( + implies="AVX512F AVX512_SKX", flags="/arch:AVX512" + ), + AVX512_KNL = dict( + disable="MSVC compiler doesn't support it" + ), + AVX512_KNM = dict( + disable="MSVC compiler doesn't support it" + ), + AVX512_SKX = dict(flags="/arch:AVX512"), + AVX512_CLX = {}, + AVX512_CNL = {}, + AVX512_ICL = {} + ) + + on_power = self.cc_on_ppc64le or self.cc_on_ppc64 + if on_power: + partial = dict( + VSX = dict( + implies=("VSX2" if self.cc_on_ppc64le else ""), + flags="-mvsx" + ), + VSX2 = dict( + flags="-mcpu=power8", implies_detect=False + ), + VSX3 = dict( + flags="-mcpu=power9 -mtune=power9", implies_detect=False + ) + ) + if self.cc_is_clang: + partial["VSX"]["flags"] = "-maltivec -mvsx" + partial["VSX2"]["flags"] = "-mpower8-vector" + partial["VSX3"]["flags"] = "-mpower9-vector" + + return partial + + if self.cc_on_aarch64 and is_unix: return dict( + NEON = dict( + implies="NEON_FP16 NEON_VFPV4 ASIMD", autovec=True + ), + NEON_FP16 = dict( + implies="NEON NEON_VFPV4 ASIMD", autovec=True + ), + NEON_VFPV4 = dict( + implies="NEON NEON_FP16 ASIMD", autovec=True + ), + ASIMD = dict( + implies="NEON NEON_FP16 NEON_VFPV4", autovec=True + ), + ASIMDHP = dict( + flags="-march=armv8.2-a+fp16" + ), + ASIMDDP = dict( + flags="-march=armv8.2-a+dotprod" + ), + ASIMDFHM = dict( + flags="-march=armv8.2-a+fp16fml" + ), + ) + if self.cc_on_armhf and is_unix: return dict( + NEON = dict( + flags="-mfpu=neon" + ), + NEON_FP16 = dict( + flags="-mfpu=neon-fp16 -mfp16-format=ieee" + ), + NEON_VFPV4 = dict( + flags="-mfpu=neon-vfpv4", + ), + ASIMD = dict( + flags="-mfpu=neon-fp-armv8 -march=armv8-a+simd", + ), + ASIMDHP = dict( + flags="-march=armv8.2-a+fp16" + ), + ASIMDDP = dict( + flags="-march=armv8.2-a+dotprod", + ), + ASIMDFHM = dict( + flags="-march=armv8.2-a+fp16fml" + ) + ) + # TODO: ARM MSVC + return {} + + def __init__(self): + if self.conf_tmp_path is None: + import tempfile, shutil + tmp = tempfile.mkdtemp() + def rm_temp(): + try: + shutil.rmtree(tmp) + except IOError: + pass + atexit.register(rm_temp) + self.conf_tmp_path = tmp + + if self.conf_cache_factors is None: + self.conf_cache_factors = [ + os.path.getmtime(__file__), + self.conf_nocache + ] + +class _Distutils: + """A helper class that provides a collection of fundamental methods + implemented in a top of Python and NumPy Distutils. + + The idea behind this class is to gather all methods that it may + need to override in case of reuse 'CCompilerOpt' in environment + different than of what NumPy has. + + Parameters + ---------- + ccompiler : `CCompiler` + The generate instance that returned from `distutils.ccompiler.new_compiler()`. + """ + def __init__(self, ccompiler): + self._ccompiler = ccompiler + + def dist_compile(self, sources, flags, **kwargs): + """Wrap CCompiler.compile()""" + assert(isinstance(sources, list)) + assert(isinstance(flags, list)) + flags = kwargs.pop("extra_postargs", []) + flags + return self._ccompiler.compile( + sources, extra_postargs=flags, **kwargs + ) + + def dist_test(self, source, flags): + """Return True if 'CCompiler.compile()' able to compile + a source file with certain flags. + """ + assert(isinstance(source, str)) + from distutils.errors import CompileError + cc = self._ccompiler; + bk_spawn = getattr(cc, 'spawn', None) + if bk_spawn: + cc_type = getattr(self._ccompiler, "compiler_type", "") + if cc_type in ("msvc",): + setattr(cc, 'spawn', self._dist_test_spawn_paths) + else: + setattr(cc, 'spawn', self._dist_test_spawn) + test = False + try: + self.dist_compile( + [source], flags, output_dir=self.conf_tmp_path + ) + test = True + except CompileError as e: + self.dist_log(str(e), stderr=True) + if bk_spawn: + setattr(cc, 'spawn', bk_spawn) + return test + + def dist_info(self): + """Return a string containing all environment information, required + by the abstract class '_CCompiler' to discovering the platform + environment, also used as a cache factor in order to detect + any changes from outside. + """ + if hasattr(self, "_dist_info"): + return self._dist_info + # play it safe + cc_info = "" + compiler = getattr(self._ccompiler, "compiler", None) + if compiler is not None: + if isinstance(compiler, str): + cc_info += compiler + elif hasattr(compiler, "__iter__"): + cc_info += ' '.join(compiler) + # in case if 'compiler' attribute doesn't provide anything + cc_type = getattr(self._ccompiler, "compiler_type", "") + if cc_type in ("intelem", "intelemw", "mingw64"): + cc_info += "x86_64" + elif cc_type in ("intel", "intelw", "intele"): + cc_info += "x86" + elif cc_type in ("msvc", "mingw32"): + import platform + if platform.architecture()[0] == "32bit": + cc_info += "x86" + else: + cc_info += "x86_64" + else: + # the last hope, too bad for cross-compiling + import platform + cc_info += platform.machine() + + cc_info += cc_type + cflags = os.environ.get("CFLAGS", "") + if cflags not in cc_info: + cc_info += cflags + + self._dist_info = cc_info + return cc_info + + @staticmethod + def dist_error(*args): + """Raise a compiler error""" + from distutils.errors import CompileError + raise CompileError(_Distutils._dist_str(*args)) + + @staticmethod + def dist_fatal(*args): + """Raise a distutils error""" + from distutils.errors import DistutilsError + raise DistutilsError(_Distutils._dist_str(*args)) + + @staticmethod + def dist_log(*args, stderr=False): + """Print a console message""" + from numpy.distutils import log + out = _Distutils._dist_str(*args) + if stderr: + log.warn(out) + else: + log.info(out) + + @staticmethod + def dist_load_module(name, path): + """Load a module from file, required by the abstract class '_Cache'.""" + from numpy.compat import npy_load_module + try: + return npy_load_module(name, path) + except Exception as e: + _Distutils.dist_log(e, stderr=True) + return None + + @staticmethod + def _dist_str(*args): + """Return a string to print by log and errors.""" + def to_str(arg): + if not isinstance(arg, str) and hasattr(arg, '__iter__'): + ret = [] + for a in arg: + ret.append(to_str(a)) + return '('+ ' '.join(ret) + ')' + return str(arg) + + stack = inspect.stack()[2] + start = "CCompilerOpt.%s[%d] : " % (stack.function, stack.lineno) + out = ' '.join([ + to_str(a) + for a in (*args,) + ]) + return start + out + + def _dist_test_spawn_paths(self, cmd, display=None): + """ + Fix msvc SDK ENV path same as distutils do + without it we get c1: fatal error C1356: unable to find mspdbcore.dll + """ + if not hasattr(self._ccompiler, "_paths"): + self._dist_test_spawn(cmd) + return + old_path = os.getenv("path") + try: + os.environ["path"] = self._ccompiler._paths + self._dist_test_spawn(cmd) + finally: + os.environ["path"] = old_path + + _dist_warn_regex = re.compile( + # intel and msvc compilers don't raise + # fatal errors when flags are wrong or unsupported + ".*(" + "ignoring unknown option|" # msvc + "invalid argument for option" # intel + ").*" + ) + @staticmethod + def _dist_test_spawn(cmd, display=None): + from distutils.errors import CompileError + try: + o = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + if isinstance(o, bytes): + o = o.decode() + if o and re.match(_Distutils._dist_warn_regex, o): + _Distutils.dist_error( + "Flags in command", cmd ,"aren't supported by the compiler" + ", output -> \n%s" % o + ) + except subprocess.CalledProcessError as exc: + o = exc.output + s = exc.returncode + except OSError: + o = b'' + s = 127 + else: + return None + o = o.decode() + _Distutils.dist_error( + "Command", cmd, "failed with exit status %d output -> \n%s" % ( + s, o + )) + +_share_cache = {} +class _Cache: + """An abstract class handles caching functionality, provides two + levels of caching, in-memory by share instances attributes among + each other and by store attributes into files. + + **Note**: + any attributes that start with ``_`` or ``conf_`` will be ignored. + + Parameters + ---------- + cache_path: str or None + The path of cache file, if None then cache in file will disabled. + + *factors: + The caching factors that need to utilize next to `conf_cache_factors`. + + Attributes + ---------- + cache_private: set + Hold the attributes that need be skipped from "in-memory cache". + + cache_infile: bool + Utilized during initializing this class, to determine if the cache was able + to loaded from the specified cache path in 'cache_path'. + """ + + # skip attributes from cache + _cache_ignore = re.compile("^(_|conf_)") + + def __init__(self, cache_path=None, *factors): + self.cache_me = {} + self.cache_private = set() + self.cache_infile = False + + if self.conf_nocache: + self.dist_log("cache is disabled by `Config`") + return + + chash = self.cache_hash(*factors, *self.conf_cache_factors) + if cache_path: + if os.path.exists(cache_path): + self.dist_log("load cache from file ->", cache_path) + cache_mod = self.dist_load_module("cache", cache_path) + if not cache_mod: + self.dist_log( + "unable to load the cache file as a module", + stderr=True + ) + elif not hasattr(cache_mod, "hash") or \ + not hasattr(cache_mod, "data"): + self.dist_log("invalid cache file", stderr=True) + elif chash == cache_mod.hash: + self.dist_log("hit the file cache") + for attr, val in cache_mod.data.items(): + setattr(self, attr, val) + self.cache_infile = True + else: + self.dist_log("miss the file cache") + + atexit.register(self._cache_write, cache_path, chash) + + if not self.cache_infile: + other_cache = _share_cache.get(chash) + if other_cache: + self.dist_log("hit the memory cache") + for attr, val in other_cache.__dict__.items(): + if attr in other_cache.cache_private or \ + re.match(self._cache_ignore, attr): + continue + setattr(self, attr, val) + + _share_cache[chash] = self + + def __del__(self): + # TODO: remove the cache form share on del + pass + + def _cache_write(self, cache_path, cache_hash): + # TODO: don't write if the cache doesn't change + self.dist_log("write cache to path ->", cache_path) + for attr in list(self.__dict__.keys()): + if re.match(self._cache_ignore, attr): + self.__dict__.pop(attr) + + d = os.path.dirname(cache_path) + if not os.path.exists(d): + os.makedirs(d) + + repr_dict = pprint.pformat(self.__dict__, compact=True) + with open(cache_path, "w") as f: + f.write(textwrap.dedent("""\ + # AUTOGENERATED DON'T EDIT + # Please make changes to the code generator \ + (distutils/ccompiler_opt.py) + hash = {} + data = \\ + """).format(cache_hash)) + f.write(repr_dict) + + def cache_hash(self, *factors): + # is there a built-in non-crypto hash? + # sdbm + chash = 0 + for f in factors: + for char in str(f): + chash = ord(char) + (chash << 6) + (chash << 16) - chash + chash &= 0xFFFFFFFF + return chash + + @staticmethod + def me(cb): + """ + A static method that can be treated as a decorator to + dynamically cache certain methods. + """ + def cache_wrap_me(self, *args, **kwargs): + # good for normal args + cache_key = str(( + cb.__name__, *args, *kwargs.keys(), *kwargs.values() + )) + if cache_key in self.cache_me: + return self.cache_me[cache_key] + ccb = cb(self, *args, **kwargs) + self.cache_me[cache_key] = ccb + return ccb + return cache_wrap_me + +class _CCompiler(object): + """A helper class for `CCompilerOpt` containing all utilities that + related to the fundamental compiler's functions. + + Attributes + ---------- + cc_on_x86 : bool + True when the target architecture is 32-bit x86 + cc_on_x64 : bool + True when the target architecture is 64-bit x86 + cc_on_ppc64 : bool + True when the target architecture is 64-bit big-endian PowerPC + cc_on_armhf : bool + True when the target architecture is 32-bit ARMv7+ + cc_on_aarch64 : bool + True when the target architecture is 64-bit Armv8-a+ + cc_on_noarch : bool + True when the target architecture is unknown or not supported + cc_is_gcc : bool + True if the compiler is GNU or + if the compiler is unknown + cc_is_clang : bool + True if the compiler is Clang + cc_is_icc : bool + True if the compiler is Intel compiler (unix like) + cc_is_iccw : bool + True if the compiler is Intel compiler (msvc like) + cc_is_nocc : bool + True if the compiler isn't supported directly, + Note: that cause a fail-back to gcc + cc_has_debug : bool + True if the compiler has debug flags + cc_has_native : bool + True if the compiler has native flags + cc_noopt : bool + True if the compiler has definition 'DISABLE_OPT*', + or 'cc_on_noarch' is True + cc_march : str + The target architecture name, or "unknown" if + the architecture isn't supported + cc_name : str + The compiler name, or "unknown" if the compiler isn't supported + cc_flags : dict + Dictionary containing the initialized flags of `_Config.conf_cc_flags` + """ + def __init__(self): + if hasattr(self, "cc_is_cached"): + return + to_detect = ( + # attr regex + ( + ("cc_on_x64", "^(x|x86_|amd)64"), + ("cc_on_x86", "^(x86|i386|i686)"), + ("cc_on_ppc64le", "^(powerpc|ppc)64(el|le)"), + ("cc_on_ppc64", "^(powerpc|ppc)64"), + ("cc_on_armhf", "^arm"), + ("cc_on_aarch64", "^aarch64"), + # priority is given to first of string + # if it fail we search in the rest, due + # to append platform.machine() at the end, + # check method 'dist_info()' for more clarification. + ("cc_on_x64", ".*(x|x86_|amd)64.*"), + ("cc_on_x86", ".*(x86|i386|i686).*"), + ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"), + ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), + ("cc_on_armhf", ".*arm.*"), + ("cc_on_aarch64", ".*aarch64.*"), + # undefined platform + ("cc_on_noarch", ""), + ), + ( + ("cc_is_gcc", r".*(gcc|gnu\-g).*"), + ("cc_is_clang", ".*clang.*"), + ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like + ("cc_is_icc", ".*(intel|icc).*"), # intel unix like + ("cc_is_msvc", ".*msvc.*"), + ("cc_is_nocc", ""), + ), + (("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"),), + (("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),), + # in case if the class run with -DNPY_DISABLE_OPTIMIZATION + (("cc_noopt", ".*DISABLE_OPT.*"),), + ) + for section in to_detect: + for attr, rgex in section: + setattr(self, attr, False) + + dist_info = self.dist_info() + for section in to_detect: + for attr, rgex in section: + if rgex and not re.match(rgex, dist_info, re.IGNORECASE): + continue + setattr(self, attr, True) + break + + if self.cc_on_noarch: + self.dist_log( + "unable to detect CPU arch via compiler info, " + "optimization is disabled \ninfo << %s >> " % dist_info, + stderr=True + ) + self.cc_noopt = True + + if self.conf_noopt: + self.dist_log("Optimization is disabled by the Config", stderr=True) + self.cc_noopt = True + + if self.cc_is_nocc: + """ + mingw can be treated as a gcc, and also xlc even if it based on clang, + but still has the same gcc optimization flags. + """ + self.dist_log( + "unable to detect compiler name via info <<\n%s\n>> " + "treating it as a gcc" % dist_info, + stderr=True + ) + self.cc_is_gcc = True + + self.cc_march = "unknown" + for arch in ("x86", "x64", "ppc64", "ppc64le", "armhf", "aarch64"): + if getattr(self, "cc_on_" + arch): + self.cc_march = arch + break + + self.cc_name = "unknown" + for name in ("gcc", "clang", "iccw", "icc", "msvc"): + if getattr(self, "cc_is_" + name): + self.cc_name = name + break + + self.cc_flags = {} + compiler_flags = self.conf_cc_flags.get(self.cc_name) + if compiler_flags is None: + self.dist_fatal( + "undefined flag for compiler '%s', " + "leave an empty dict instead" % self.cc_name + ) + for name, flags in compiler_flags.items(): + self.cc_flags[name] = nflags = [] + if flags: + assert(isinstance(flags, str)) + flags = flags.split() + for f in flags: + if self.cc_test_flags([f]): + nflags.append(f) + + self.cc_is_cached = True + + @_Cache.me + def cc_test_flags(self, flags): + """ + Returns True if the compiler supports 'flags'. + """ + assert(isinstance(flags, list)) + self.dist_log("testing flags", flags) + test_path = os.path.join(self.conf_check_path, "test_flags.c") + test = self.dist_test(test_path, flags) + if not test: + self.dist_log("testing failed", stderr=True) + return test + + def cc_normalize_flags(self, flags): + """ + Remove the conflicts that caused due gathering implied features flags. + + Parameters + ---------- + 'flags' list, compiler flags + flags should be sorted from the lowest to the highest interest. + + Returns + ------- + list, filtered from any conflicts. + + Examples + -------- + >>> self.cc_normalize_flags(['-march=armv8.2-a+fp16', '-march=armv8.2-a+dotprod']) + ['armv8.2-a+fp16+dotprod'] + + >>> self.cc_normalize_flags( + ['-msse', '-msse2', '-msse3', '-mssse3', '-msse4.1', '-msse4.2', '-mavx', '-march=core-avx2'] + ) + ['-march=core-avx2'] + """ + assert(isinstance(flags, list)) + if self.cc_is_gcc or self.cc_is_clang or self.cc_is_icc: + return self._cc_normalize_unix(flags) + + if self.cc_is_msvc or self.cc_is_iccw: + return self._cc_normalize_win(flags) + return flags + + _cc_normalize_unix_mrgx = re.compile( + # 1- to check the highest of + r"^(-mcpu=|-march=|-x[A-Z0-9\-])" + ) + _cc_normalize_unix_frgx = re.compile( + # 2- to remove any flags starts with + # -march, -mcpu, -x(INTEL) and '-m' without '=' + r"^(?!(-mcpu=|-march=|-x[A-Z0-9\-]))(?!-m[a-z0-9\-\.]*.$)" + ) + _cc_normalize_unix_krgx = re.compile( + # 3- keep only the highest of + r"^(-mfpu|-mtune)" + ) + _cc_normalize_arch_ver = re.compile( + r"[0-9.]" + ) + def _cc_normalize_unix(self, flags): + def ver_flags(f): + # arch ver subflag + # -march=armv8.2-a+fp16fml + tokens = f.split('+') + ver = float('0' + ''.join( + re.findall(self._cc_normalize_arch_ver, tokens[0]) + )) + return ver, tokens[0], tokens[1:] + + if len(flags) <= 1: + return flags + # get the highest matched flag + for i, cur_flag in enumerate(reversed(flags)): + if not re.match(self._cc_normalize_unix_mrgx, cur_flag): + continue + lower_flags = flags[:-(i+1)] + upper_flags = flags[-i:] + filterd = list(filter( + self._cc_normalize_unix_frgx.search, lower_flags + )) + # gather subflags + ver, arch, subflags = ver_flags(cur_flag) + if ver > 0 and len(subflags) > 0: + for xflag in lower_flags: + xver, _, xsubflags = ver_flags(xflag) + if ver == xver: + subflags = xsubflags + subflags + cur_flag = arch + '+' + '+'.join(subflags) + + flags = filterd + [cur_flag] + if i > 0: + flags += upper_flags + break + + # to remove overridable flags + final_flags = [] + matched = set() + for f in reversed(flags): + match = re.match(self._cc_normalize_unix_krgx, f) + if not match: + pass + elif match[0] in matched: + continue + else: + matched.add(match[0]) + final_flags.insert(0, f) + return final_flags + + _cc_normalize_win_frgx = re.compile( + r"^(?!(/arch\:|/Qx\:))" + ) + _cc_normalize_win_mrgx = re.compile( + r"^(/arch|/Qx:)" + ) + def _cc_normalize_win(self, flags): + for i, f in enumerate(reversed(flags)): + if not re.match(self._cc_normalize_win_mrgx, f): + continue + i += 1 + return list(filter( + self._cc_normalize_win_frgx.search, flags[:-i] + )) + flags[-i:] + return flags + +class _Feature: + """A helper class for `CCompilerOpt` that managing CPU features. + + Attributes + ---------- + feature_supported : dict + Dictionary containing all CPU features that supported + by the platform, according to the specified values in attribute + `_Config.conf_features` and `_Config.conf_features_partial()` + + feature_min : set + The minimum support of CPU features, according to + the specified values in attribute `_Config.conf_min_features`. + """ + def __init__(self): + if hasattr(self, "feature_is_cached"): + return + self.feature_supported = pfeatures = self.conf_features_partial() + for feature_name in list(pfeatures.keys()): + feature = pfeatures[feature_name] + cfeature = self.conf_features[feature_name] + feature.update({ + k:v for k,v in cfeature.items() if k not in feature + }) + disabled = feature.get("disable") + if disabled is not None: + pfeatures.pop(feature_name) + self.dist_log( + "feature '%s' is disabled," % feature_name, + disabled, stderr=True + ) + continue + # list is used internally for these options + for option in ( + "implies", "group", "detect", "headers", "flags" + ) : + oval = feature.get(option) + if isinstance(oval, str): + feature[option] = oval.split() + + self.feature_min = set() + min_f = self.conf_min_features.get(self.cc_march, "") + for F in min_f.upper().split(): + if F in self.feature_supported: + self.feature_min.add(F) + + self.feature_is_cached = True + + def feature_names(self, names=None, force_flags=None): + """ + Returns a set of CPU feature names that supported by platform and the **C** compiler. + + Parameters + ---------- + 'names': sequence or None, optional + Specify certain CPU features to test it against the **C** compiler. + if None(default), it will test all current supported features. + **Note**: feature names must be in upper-case. + + 'force_flags': list or None, optional + If None(default), default compiler flags for every CPU feature will be used + during the test. + """ + assert( + names is None or ( + not isinstance(names, str) and + hasattr(names, "__iter__") + ) + ) + assert(force_flags is None or isinstance(force_flags, list)) + if names is None: + names = self.feature_supported.keys() + supported_names = set() + for f in names: + if self.feature_is_supported(f, force_flags=force_flags): + supported_names.add(f) + return supported_names + + def feature_is_exist(self, name): + """ + Returns True if a certain feature is exist and covered within + `_Config.conf_features`. + + Parameters + ---------- + 'name': str + feature name in uppercase. + """ + assert(name.isupper()) + return name in self.conf_features + + def feature_sorted(self, names, reverse=False): + """ + Sort a list of CPU features ordered by the lowest interest. + + Parameters + ---------- + 'names': sequence + sequence of supported feature names in uppercase. + 'reverse': bool, optional + If true, the sorted features is reversed. (highest interest) + + Returns + ------- + list, sorted CPU features + """ + def sort_cb(k): + if isinstance(k, str): + return self.feature_supported[k]["interest"] + # multiple features + rank = max([self.feature_supported[f]["interest"] for f in k]) + # FIXME: that's not a safe way to increase the rank for + # multi targets + rank += len(k) -1 + return rank + return sorted(names, reverse=reverse, key=sort_cb) + + def feature_implies(self, names, keep_origins=False): + """ + Return a set of CPU features that implied by 'names' + + Parameters + ---------- + names: str or sequence of str + CPU feature name(s) in uppercase. + + keep_origins: bool + if False(default) then the returned set will not contain any + features from 'names'. This case happens only when two features + imply each other. + + Examples + -------- + >>> self.feature_implies("SSE3") + {'SSE', 'SSE2'} + >>> self.feature_implies("SSE2") + {'SSE'} + >>> self.feature_implies("SSE2", keep_origins=True) + # 'SSE2' found here since 'SSE' and 'SSE2' imply each other + {'SSE', 'SSE2'} + """ + def get_implies(name, _caller=set()): + implies = set() + d = self.feature_supported[name] + for i in d.get("implies", []): + implies.add(i) + if i in _caller: + # infinity recursive guard since + # features can imply each other + continue + _caller.add(name) + implies = implies.union(get_implies(i, _caller)) + return implies + + if isinstance(names, str): + implies = get_implies(names) + names = [names] + else: + assert(hasattr(names, "__iter__")) + implies = set() + for n in names: + implies = implies.union(get_implies(n)) + if not keep_origins: + implies.difference_update(names) + return implies + + def feature_implies_c(self, names): + """same as feature_implies() but combining 'names'""" + if isinstance(names, str): + names = set((names,)) + else: + names = set(names) + return names.union(self.feature_implies(names)) + + def feature_ahead(self, names): + """ + Return list of features in 'names' after remove any + implied features and keep the origins. + + Parameters + ---------- + 'names': sequence + sequence of CPU feature names in uppercase. + + Returns + ------- + list of CPU features sorted as-is 'names' + + Examples + -------- + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41"]) + ["SSE41"] + # assume AVX2 and FMA3 implies each other and AVX2 + # is the highest interest + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) + ["AVX2"] + # assume AVX2 and FMA3 don't implies each other + >>> self.feature_ahead(["SSE2", "SSE3", "SSE41", "AVX2", "FMA3"]) + ["AVX2", "FMA3"] + """ + assert( + not isinstance(names, str) + and hasattr(names, '__iter__') + ) + implies = self.feature_implies(names, keep_origins=True) + ahead = [n for n in names if n not in implies] + if len(ahead) == 0: + # return the highest interested feature + # if all features imply each other + ahead = self.feature_sorted(names, reverse=True)[:1] + return ahead + + def feature_untied(self, names): + """ + same as 'feature_ahead()' but if both features implied each other + and keep the highest interest. + + Parameters + ---------- + 'names': sequence + sequence of CPU feature names in uppercase. + + Returns + ------- + list of CPU features sorted as-is 'names' + + Examples + -------- + >>> self.feature_untied(["SSE2", "SSE3", "SSE41"]) + ["SSE2", "SSE3", "SSE41"] + # assume AVX2 and FMA3 implies each other + >>> self.feature_untied(["SSE2", "SSE3", "SSE41", "FMA3", "AVX2"]) + ["SSE2", "SSE3", "SSE41", "AVX2"] + """ + assert( + not isinstance(names, str) + and hasattr(names, '__iter__') + ) + final = [] + for n in names: + implies = self.feature_implies(n) + tied = [ + nn for nn in final + if nn in implies and n in self.feature_implies(nn) + ] + if tied: + tied = self.feature_sorted(tied + [n]) + if n not in tied[1:]: + continue + final.remove(tied[:1][0]) + final.append(n) + return final + + def feature_get_til(self, names, keyisfalse): + """ + same as `feature_implies_c()` but stop collecting implied + features when feature's option that provided through + parameter 'keyisfalse' is False, also sorting the returned + features. + """ + def til(tnames): + # sort from highest to lowest interest then cut if "key" is False + tnames = self.feature_implies_c(tnames) + tnames = self.feature_sorted(tnames, reverse=True) + for i, n in enumerate(tnames): + if not self.feature_supported[n].get(keyisfalse, True): + tnames = tnames[:i+1] + break + return tnames + + if isinstance(names, str) or len(names) <= 1: + names = til(names) + # normalize the sort + names.reverse() + return names + + names = self.feature_ahead(names) + names = {t for n in names for t in til(n)} + return self.feature_sorted(names) + + def feature_detect(self, names): + """ + Return a list of CPU features that required to be detected + sorted from the lowest to highest interest. + """ + names = self.feature_get_til(names, "implies_detect") + detect = [] + for n in names: + d = self.feature_supported[n] + detect += d.get("detect", d.get("group", [n])) + return detect + + @_Cache.me + def feature_flags(self, names): + """ + Return a list of CPU features flags sorted from the lowest + to highest interest. + """ + names = self.feature_sorted(self.feature_implies_c(names)) + flags = [] + for n in names: + d = self.feature_supported[n] + f = d.get("flags", []) + if not f or not self.cc_test_flags(f): + continue + flags += f + return self.cc_normalize_flags(flags) + + @_Cache.me + def feature_test(self, name, force_flags=None): + """ + Test a certain CPU feature against the compiler through its own + check file. + + Parameters + ---------- + 'name': str + Supported CPU feature name. + + 'force_flags': list or None, optional + If None(default), the returned flags from `feature_flags()` + will be used. + """ + if force_flags is None: + force_flags = self.feature_flags(name) + + self.dist_log( + "testing feature '%s' with flags (%s)" % ( + name, ' '.join(force_flags) + )) + # Each CPU feature must have C source code contains at + # least one intrinsic or instruction related to this feature. + test_path = os.path.join( + self.conf_check_path, "cpu_%s.c" % name.lower() + ) + if not os.path.exists(test_path): + self.dist_fatal("feature test file is not exist", path) + + test = self.dist_test(test_path, force_flags + self.cc_flags["werror"]) + if not test: + self.dist_log("testing failed", stderr=True) + return test + + @_Cache.me + def feature_is_supported(self, name, force_flags=None): + """ + Check if a certain CPU feature is supported by the platform and compiler. + + Parameters + ---------- + 'name': str + CPU feature name in uppercase. + + 'force_flags': list or None, optional + If None(default), default compiler flags for every CPU feature will be used + during test. + """ + assert(name.isupper()) + assert(force_flags is None or isinstance(force_flags, list)) + + supported = name in self.feature_supported + if supported: + for impl in self.feature_implies(name): + if not self.feature_test(impl, force_flags): + return False + if not self.feature_test(name, force_flags): + return False + return supported + + @_Cache.me + def feature_can_autovec(self, name): + """ + check if the feature can be auto-vectorized by the compiler + """ + assert(isinstance(name, str)) + d = self.feature_supported[name] + can = d.get("autovec", None) + if can is None: + valid_flags = [ + self.cc_test_flags([f]) for f in d.get("flags", []) + ] + can = valid_flags and any(valid_flags) + return can + + def feature_c_preprocessor(self, feature_name, tabs=0): + """ + Generate C preprocessor definitions and include headers of a CPU feature. + + Parameters + ---------- + 'feature_name': str + CPU feature name in uppercase. + 'tabs': int + if > 0, align the generated strings to the right depend on number of tabs. + + Returns + ------- + str, generated C preprocessor + + Examples + -------- + >>> self.feature_c_preprocessor("SSE3") + /** SSE3 **/ + #define NPY_HAVE_SSE3 1 + #include <pmmintrin.h> + """ + assert(feature_name.isupper()) + feature = self.feature_supported.get(feature_name) + assert(feature is not None) + + prepr = [ + "/** %s **/" % feature_name, + "#define %sHAVE_%s 1" % (self.conf_c_prefix, feature_name) + ] + prepr += [ + "#include <%s>" % h for h in feature.get("headers", []) + ] + group = feature.get("group", []) + for f in group: + # Guard features in case of duplicate definitions + prepr += [ + "#ifndef %sHAVE_%s" % (self.conf_c_prefix, f), + "\t#define %sHAVE_%s 1" % (self.conf_c_prefix, f), + "#endif", + ] + if tabs > 0: + prepr = [('\t'*tabs) + l for l in prepr] + return '\n'.join(prepr) + +class _Parse: + """A helper class that parsing main arguments of `CCompilerOpt`, + also parsing configuration statements in dispatch-able sources. + + Parameters + ---------- + cpu_baseline: str or None + minimal set of required CPU features or special options. + + cpu_dispatch: str or None + dispatched set of additional CPU features or special options. + + Special options can be: + - **MIN**: Enables the minimum CPU features that utilized via `_Config.conf_min_features` + - **MAX**: Enables all supported CPU features by the Compiler and platform. + - **NATIVE**: Enables all CPU features that supported by the current machine. + - **NONE**: Enables nothing + - **Operand +/-**: remove or add features, useful with options **MAX**, **MIN** and **NATIVE**. + NOTE: operand + is only added for nominal reason. + + NOTES: + - Case-insensitive among all CPU features and special options. + - Comma or space can be used as a separator. + - If the CPU feature is not supported by the user platform or compiler, + it will be skipped rather than raising a fatal error. + - Any specified CPU features to 'cpu_dispatch' will be skipped if its part of CPU baseline features + - 'cpu_baseline' force enables implied features. + + Attributes + ---------- + parse_baseline_names : list + Final CPU baseline's feature names(sorted from low to high) + parse_baseline_flags : list + Compiler flags of baseline features + parse_dispatch_names : list + Final CPU dispatch-able feature names(sorted from low to high) + parse_target_groups : dict + Dictionary containing initialized target groups that configured + through class attribute `conf_target_groups`. + + The key is represent the group name and value is a tuple + contains three items : + - bool, True if group has the 'baseline' option. + - list, list of CPU features. + - list, list of extra compiler flags. + + """ + def __init__(self, cpu_baseline, cpu_dispatch): + self._parse_policies = dict( + # POLICY NAME, (HAVE, NOT HAVE, [DEB]) + KEEP_BASELINE = ( + None, self._parse_policy_not_keepbase, + [] + ), + KEEP_SORT = ( + self._parse_policy_keepsort, + self._parse_policy_not_keepsort, + [] + ), + MAXOPT = ( + self._parse_policy_maxopt, None, + [] + ), + WERROR = ( + self._parse_policy_werror, None, + [] + ), + AUTOVEC = ( + self._parse_policy_autovec, None, + ["MAXOPT"] + ) + ) + if hasattr(self, "parse_is_cached"): + return + + self.parse_baseline_names = [] + self.parse_baseline_flags = [] + self.parse_dispatch_names = [] + self.parse_target_groups = {} + + if self.cc_noopt: + # skip parsing baseline and dispatch args and keep parsing target groups + cpu_baseline = cpu_dispatch = None + + self.dist_log("check requested baseline") + if cpu_baseline is not None: + cpu_baseline = self._parse_arg_features("cpu_baseline", cpu_baseline) + baseline_names = self.feature_names(cpu_baseline) + self.parse_baseline_flags = self.feature_flags(baseline_names) + self.parse_baseline_names = self.feature_sorted( + self.feature_implies_c(baseline_names) + ) + + self.dist_log("check requested dispatch-able features") + if cpu_dispatch is not None: + cpu_dispatch_ = self._parse_arg_features("cpu_dispatch", cpu_dispatch) + cpu_dispatch = { + f for f in cpu_dispatch_ + if f not in self.parse_baseline_names + } + conflict_baseline = cpu_dispatch_.difference(cpu_dispatch) + self.parse_dispatch_names = self.feature_sorted( + self.feature_names(cpu_dispatch) + ) + if len(conflict_baseline) > 0: + self.dist_log( + "skip features", conflict_baseline, "since its part of baseline" + ) + + self.dist_log("initialize targets groups") + for group_name, tokens in self.conf_target_groups.items(): + self.dist_log("parse target group", group_name) + GROUP_NAME = group_name.upper() + if not tokens or not tokens.strip(): + # allow empty groups, useful in case if there's a need + # to disable certain group since '_parse_target_tokens()' + # requires at least one valid target + self.parse_target_groups[GROUP_NAME] = ( + False, [], [] + ) + continue + has_baseline, features, extra_flags = \ + self._parse_target_tokens(tokens) + self.parse_target_groups[GROUP_NAME] = ( + has_baseline, features, extra_flags + ) + + self.parse_is_cached = True + + def parse_targets(self, source): + """ + Fetch and parse configuration statements that required for + defining the targeted CPU features, statements should be declared + in the top of source in between **C** comment and start + with a special mark **@targets**. + + Configuration statements are sort of keywords representing + CPU features names, group of statements and policies, combined + together to determine the required optimization. + + Parameters + ---------- + source: str + the path of **C** source file. + + Returns + ------- + - bool, True if group has the 'baseline' option + - list, list of CPU features + - list, list of extra compiler flags + """ + self.dist_log("looking for '@targets' inside -> ", source) + # get lines between /*@targets and */ + with open(source) as fd: + tokens = "" + max_to_reach = 1000 # good enough, isn't? + start_with = "@targets" + start_pos = -1 + end_with = "*/" + end_pos = -1 + for current_line, line in enumerate(fd): + if current_line == max_to_reach: + self.dist_fatal("reached the max of lines") + break + if start_pos == -1: + start_pos = line.find(start_with) + if start_pos == -1: + continue + start_pos += len(start_with) + tokens += line + end_pos = line.find(end_with) + if end_pos != -1: + end_pos += len(tokens) - len(line) + break + + if start_pos == -1: + self.dist_fatal("expected to find '%s' within a C comment" % start_with) + if end_pos == -1: + self.dist_fatal("expected to end with '%s'" % end_with) + + tokens = tokens[start_pos:end_pos] + return self._parse_target_tokens(tokens) + + _parse_regex_arg = re.compile(r'\s|[,]|([+-])') + def _parse_arg_features(self, arg_name, req_features): + if not isinstance(req_features, str): + self.dist_fatal("expected a string in '%s'" % arg_name) + + final_features = set() + # space and comma can be used as a separator + tokens = list(filter(None, re.split(self._parse_regex_arg, req_features))) + append = True # append is the default + for tok in tokens: + if tok[0] in ("#", "$"): + self.dist_fatal( + arg_name, "target groups and policies " + "aren't allowed from arguments, " + "only from dispatch-able sources" + ) + if tok == '+': + append = True + continue + if tok == '-': + append = False + continue + + TOK = tok.upper() # we use upper-case internally + features_to = set() + if TOK == "NONE": + pass + elif TOK == "NATIVE": + native = self.cc_flags["native"] + if not native: + self.dist_fatal(arg_name, + "native option isn't supported by the compiler" + ) + features_to = self.feature_names(force_flags=native) + elif TOK == "MAX": + features_to = self.feature_supported.keys() + elif TOK == "MIN": + features_to = self.feature_min + else: + if TOK in self.feature_supported: + features_to.add(TOK) + else: + if not self.feature_is_exist(TOK): + self.dist_fatal(arg_name, + ", '%s' isn't a known feature or option" % tok + ) + if append: + final_features = final_features.union(features_to) + else: + final_features = final_features.difference(features_to) + + append = True # back to default + + return final_features + + _parse_regex_target = re.compile(r'\s|[*,/]|([()])') + def _parse_target_tokens(self, tokens): + assert(isinstance(tokens, str)) + final_targets = [] # to keep it sorted as specified + extra_flags = [] + has_baseline = False + + skipped = set() + policies = set() + multi_target = None + + tokens = list(filter(None, re.split(self._parse_regex_target, tokens))) + if not tokens: + self.dist_fatal("expected one token at least") + + for tok in tokens: + TOK = tok.upper() + ch = tok[0] + if ch in ('+', '-'): + self.dist_fatal( + "+/- are 'not' allowed from target's groups or @targets, " + "only from cpu_baseline and cpu_dispatch parms" + ) + elif ch == '$': + if multi_target is not None: + self.dist_fatal( + "policies aren't allowed inside multi-target '()'" + ", only CPU features" + ) + policies.add(self._parse_token_policy(TOK)) + elif ch == '#': + if multi_target is not None: + self.dist_fatal( + "target groups aren't allowed inside multi-target '()'" + ", only CPU features" + ) + has_baseline, final_targets, extra_flags = \ + self._parse_token_group(TOK, has_baseline, final_targets, extra_flags) + elif ch == '(': + if multi_target is not None: + self.dist_fatal("unclosed multi-target, missing ')'") + multi_target = set() + elif ch == ')': + if multi_target is None: + self.dist_fatal("multi-target opener '(' wasn't found") + targets = self._parse_multi_target(multi_target) + if targets is None: + skipped.add(tuple(multi_target)) + else: + if len(targets) == 1: + targets = targets[0] + if targets and targets not in final_targets: + final_targets.append(targets) + multi_target = None # back to default + else: + if TOK == "BASELINE": + if multi_target is not None: + self.dist_fatal("baseline isn't allowed inside multi-target '()'") + has_baseline = True + continue + + if multi_target is not None: + multi_target.add(TOK) + continue + + if not self.feature_is_exist(TOK): + self.dist_fatal("invalid target name '%s'" % TOK) + + is_enabled = ( + TOK in self.parse_baseline_names or + TOK in self.parse_dispatch_names + ) + if is_enabled: + if TOK not in final_targets: + final_targets.append(TOK) + continue + + skipped.add(TOK) + + if multi_target is not None: + self.dist_fatal("unclosed multi-target, missing ')'") + if skipped: + self.dist_log( + "skip targets", skipped, + "not part of baseline or dispatch-able features" + ) + + final_targets = self.feature_untied(final_targets) + + # add polices dependencies + for p in list(policies): + _, _, deps = self._parse_policies[p] + for d in deps: + if d in policies: + continue + self.dist_log( + "policy '%s' force enables '%s'" % ( + p, d + )) + policies.add(d) + + # release policies filtrations + for p, (have, nhave, _) in self._parse_policies.items(): + func = None + if p in policies: + func = have + self.dist_log("policy '%s' is ON" % p) + else: + func = nhave + if not func: + continue + has_baseline, final_targets, extra_flags = func( + has_baseline, final_targets, extra_flags + ) + + return has_baseline, final_targets, extra_flags + + def _parse_token_policy(self, token): + """validate policy token""" + if len(token) <= 1 or token[-1:] == token[0]: + self.dist_fatal("'$' must stuck in the begin of policy name") + token = token[1:] + if token not in self._parse_policies: + self.dist_fatal( + "'%s' is an invalid policy name, available policies are" % token, + self._parse_policies.keys() + ) + return token + + def _parse_token_group(self, token, has_baseline, final_targets, extra_flags): + """validate group token""" + if len(token) <= 1 or token[-1:] == token[0]: + self.dist_fatal("'#' must stuck in the begin of group name") + + token = token[1:] + ghas_baseline, gtargets, gextra_flags = self.parse_target_groups.get( + token, (False, None, []) + ) + if gtargets is None: + self.dist_fatal( + "'%s' is an invalid target group name, " % token + \ + "available target groups are", + self.parse_target_groups.keys() + ) + if ghas_baseline: + has_baseline = True + # always keep sorting as specified + final_targets += [f for f in gtargets if f not in final_targets] + extra_flags += [f for f in gextra_flags if f not in extra_flags] + return has_baseline, final_targets, extra_flags + + def _parse_multi_target(self, targets): + """validate multi targets that defined between parentheses()""" + # remove any implied features and keep the origins + if not targets: + self.dist_fatal("empty multi-target '()'") + if not all([ + self.feature_is_exist(tar) for tar in targets + ]) : + self.dist_fatal("invalid target name in multi-target", targets) + if not all([ + ( + tar in self.parse_baseline_names or + tar in self.parse_dispatch_names + ) + for tar in targets + ]) : + return None + targets = self.feature_ahead(targets) + if not targets: + return None + # force sort multi targets, so it can be comparable + targets = self.feature_sorted(targets) + targets = tuple(targets) # hashable + return targets + + def _parse_policy_not_keepbase(self, has_baseline, final_targets, extra_flags): + """skip all baseline features""" + skipped = [] + for tar in final_targets[:]: + is_base = False + if isinstance(tar, str): + is_base = tar in self.parse_baseline_names + else: + # multi targets + is_base = all([ + f in self.parse_baseline_names + for f in tar + ]) + if is_base: + skipped.append(tar) + final_targets.remove(tar) + + if skipped: + self.dist_log("skip baseline features", skipped) + + return has_baseline, final_targets, extra_flags + + def _parse_policy_keepsort(self, has_baseline, final_targets, extra_flags): + """leave a notice that $keep_sort is on""" + self.dist_log( + "policy 'keep_sort' is on, dispatch-able targets", final_targets, "\n" + "are 'not' sorted depend on the highest interest but" + "as specified in the dispatch-able source or the extra group" + ) + return has_baseline, final_targets, extra_flags + + def _parse_policy_not_keepsort(self, has_baseline, final_targets, extra_flags): + """sorted depend on the highest interest""" + final_targets = self.feature_sorted(final_targets, reverse=True) + return has_baseline, final_targets, extra_flags + + def _parse_policy_maxopt(self, has_baseline, final_targets, extra_flags): + """append the compiler optimization flags""" + if self.cc_has_debug: + self.dist_log("debug mode is detected, policy 'maxopt' is skipped.") + elif self.cc_noopt: + self.dist_log("optimization is disabled, policy 'maxopt' is skipped.") + else: + flags = self.cc_flags["opt"] + if not flags: + self.dist_log( + "current compiler doesn't support optimization flags, " + "policy 'maxopt' is skipped", stderr=True + ) + else: + extra_flags += flags + return has_baseline, final_targets, extra_flags + + def _parse_policy_werror(self, has_baseline, final_targets, extra_flags): + """force warnings to treated as errors""" + flags = self.cc_flags["werror"] + if not flags: + self.dist_log( + "current compiler doesn't support werror flags, " + "warnings will 'not' treated as errors", stderr=True + ) + else: + self.dist_log("compiler warnings are treated as errors") + extra_flags += flags + return has_baseline, final_targets, extra_flags + + def _parse_policy_autovec(self, has_baseline, final_targets, extra_flags): + """skip features that has no auto-vectorized support by compiler""" + skipped = [] + for tar in final_targets[:]: + if isinstance(tar, str): + can = self.feature_can_autovec(tar) + else: # multiple target + can = all([ + self.feature_can_autovec(t) + for t in tar + ]) + if not can: + final_targets.remove(tar) + skipped.append(tar) + + if skipped: + self.dist_log("skip non auto-vectorized features", skipped) + + return has_baseline, final_targets, extra_flags + +class CCompilerOpt(_Config, _Distutils, _Cache, _CCompiler, _Feature, _Parse): + """ + A helper class for `CCompiler` aims to provide extra build options + to effectively control of compiler optimizations that are directly + related to CPU features. + """ + def __init__(self, ccompiler, cpu_baseline="min", cpu_dispatch="max", cache_path=None): + _Config.__init__(self) + _Distutils.__init__(self, ccompiler) + _Cache.__init__(self, cache_path, self.dist_info(), cpu_baseline, cpu_dispatch) + _CCompiler.__init__(self) + _Feature.__init__(self) + if not self.cc_noopt and self.cc_has_native: + self.dist_log( + "native flag is specified through environment variables. " + "force cpu-baseline='native'" + ) + cpu_baseline = "native" + _Parse.__init__(self, cpu_baseline, cpu_dispatch) + # keep the requested features untouched, need it later for report + # and trace purposes + self._requested_baseline = cpu_baseline + self._requested_dispatch = cpu_dispatch + # key is the dispatch-able source and value is a tuple + # contains two items (has_baseline[boolean], dispatched-features[list]) + self.sources_status = getattr(self, "sources_status", {}) + # every instance should has a separate one + self.cache_private.add("sources_status") + # set it at the end to make sure the cache writing was done after init + # this class + self.hit_cache = hasattr(self, "hit_cache") + + def is_cached(self): + """ + Returns True if the class loaded from the cache file + """ + return self.cache_infile and self.hit_cache + + def cpu_baseline_flags(self): + """ + Returns a list of final CPU baseline compiler flags + """ + return self.parse_baseline_flags + + def cpu_baseline_names(self): + """ + return a list of final CPU baseline feature names + """ + return self.parse_baseline_names + + def cpu_dispatch_names(self): + """ + return a list of final CPU dispatch feature names + """ + return self.parse_dispatch_names + + def try_dispatch(self, sources, src_dir=None, **kwargs): + """ + Compile one or more dispatch-able sources and generates object files, + also generates abstract C config headers and macros that + used later for the final runtime dispatching process. + + The mechanism behind it is to takes each source file that specified + in 'sources' and branching it into several files depend on + special configuration statements that must be declared in the + top of each source which contains targeted CPU features, + then it compiles every branched source with the proper compiler flags. + + Parameters + ---------- + sources : list + Must be a list of dispatch-able sources file paths, + and configuration statements must be declared inside + each file. + + src_dir : str + Path of parent directory for the generated headers and wrapped sources. + If None(default) the files will generated in-place. + + **kwargs : any + Arguments to pass on to the `CCompiler.compile()` + + Returns + ------- + list : generated object files + + Raises + ------ + CompileError + Raises by `CCompiler.compile()` on compiling failure. + DistutilsError + Some errors during checking the sanity of configuration statements. + + See Also + -------- + parse_targets() : + Parsing the configuration statements of dispatch-able sources. + """ + to_compile = {} + baseline_flags = self.cpu_baseline_flags() + include_dirs = kwargs.setdefault("include_dirs", []) + + for src in sources: + output_dir = os.path.dirname(src) + if src_dir and not output_dir.startswith(src_dir): + output_dir = os.path.join(src_dir, output_dir) + if output_dir not in include_dirs: + include_dirs.append(output_dir) + + has_baseline, targets, extra_flags = self.parse_targets(src) + nochange = self._generate_config(output_dir, src, targets, has_baseline) + for tar in targets: + tar_src = self._wrap_target(output_dir, src, tar, nochange=nochange) + flags = tuple(extra_flags + self.feature_flags(tar)) + to_compile.setdefault(flags, []).append(tar_src) + + if has_baseline: + flags = tuple(extra_flags + baseline_flags) + to_compile.setdefault(flags, []).append(src) + + self.sources_status[src] = (has_baseline, targets) + + # For these reasons, the sources are compiled in a separate loop: + # - Gathering all sources with the same flags to benefit from + # the parallel compiling as much as possible. + # - To generate all config headers of the dispatchable sources, + # before the compilation in case if there are dependency relationships + # among them. + objects = [] + for flags, srcs in to_compile.items(): + objects += self.dist_compile(srcs, list(flags), **kwargs) + return objects + + def generate_dispatch_header(self, header_path): + """ + Generate the dispatch header which containing all definitions + and headers of instruction-sets for the enabled CPU baseline and + dispatch-able features. + + Its highly recommended to take a look at the generated header + also the generated source files via `try_dispatch()` + in order to get the full picture. + """ + self.dist_log("generate CPU dispatch header: (%s)" % header_path) + + baseline_names = self.cpu_baseline_names() + dispatch_names = self.cpu_dispatch_names() + baseline_len = len(baseline_names) + dispatch_len = len(dispatch_names) + + with open(header_path, 'w') as f: + baseline_calls = ' \\\n'.join([ + ( + "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" + ) % (self.conf_c_prefix, f) + for f in baseline_names + ]) + dispatch_calls = ' \\\n'.join([ + ( + "\t%sWITH_CPU_EXPAND_(MACRO_TO_CALL(%s, __VA_ARGS__))" + ) % (self.conf_c_prefix, f) + for f in dispatch_names + ]) + f.write(textwrap.dedent("""\ + /* + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator (distutils/ccompiler_opt.py) + */ + #define {pfx}WITH_CPU_BASELINE "{baseline_str}" + #define {pfx}WITH_CPU_DISPATCH "{dispatch_str}" + #define {pfx}WITH_CPU_BASELINE_N {baseline_len} + #define {pfx}WITH_CPU_DISPATCH_N {dispatch_len} + #define {pfx}WITH_CPU_EXPAND_(X) X + #define {pfx}WITH_CPU_BASELINE_CALL(MACRO_TO_CALL, ...) \\ + {baseline_calls} + #define {pfx}WITH_CPU_DISPATCH_CALL(MACRO_TO_CALL, ...) \\ + {dispatch_calls} + """).format( + pfx=self.conf_c_prefix, baseline_str=" ".join(baseline_names), + dispatch_str=" ".join(dispatch_names), baseline_len=baseline_len, + dispatch_len=dispatch_len, baseline_calls=baseline_calls, + dispatch_calls=dispatch_calls + )) + baseline_pre = '' + for name in baseline_names: + baseline_pre += self.feature_c_preprocessor(name, tabs=1) + '\n' + + dispatch_pre = '' + for name in dispatch_names: + dispatch_pre += textwrap.dedent("""\ + #ifdef {pfx}CPU_TARGET_{name} + {pre} + #endif /*{pfx}CPU_TARGET_{name}*/ + """).format( + pfx=self.conf_c_prefix_, name=name, pre=self.feature_c_preprocessor( + name, tabs=1 + )) + + f.write(textwrap.dedent("""\ + /******* baseline features *******/ + {baseline_pre} + /******* dispatch features *******/ + {dispatch_pre} + """).format( + pfx=self.conf_c_prefix_, baseline_pre=baseline_pre, + dispatch_pre=dispatch_pre + )) + + def report(self, full=False): + report = [] + baseline_rows = [] + dispatch_rows = [] + report.append(("CPU baseline", baseline_rows)) + report.append(("", "")) + report.append(("CPU dispatch", dispatch_rows)) + + ########## baseline ########## + if self.cc_noopt: + baseline_rows.append(( + "Requested", "optimization disabled %s" % ( + "(unsupported arch)" if self.cc_on_noarch else "" + ) + )) + else: + baseline_rows.append(("Requested", repr(self._requested_baseline))) + + baseline_names = self.cpu_baseline_names() + baseline_rows.append(( + "Enabled", (' '.join(baseline_names) if baseline_names else "none") + )) + baseline_flags = self.cpu_baseline_flags() + baseline_rows.append(( + "Flags", (' '.join(baseline_flags) if baseline_flags else "none") + )) + + ########## dispatch ########## + if self.cc_noopt: + dispatch_rows.append(( + "Requested", "optimization disabled %s" % ( + "(unsupported arch)" if self.cc_on_noarch else "" + ) + )) + else: + dispatch_rows.append(("Requested", repr(self._requested_dispatch))) + + dispatch_names = self.cpu_dispatch_names() + dispatch_rows.append(( + "Enabled", (' '.join(dispatch_names) if dispatch_names else "none") + )) + ########## Generated ########## + # TODO: + # - collect object names from 'try_dispatch()' + # then get size of each object and printed + # - give more details about the features that not + # generated due compiler support + # - find a better output's design. + # + target_sources = {} + for source, (_, targets) in self.sources_status.items(): + for tar in targets: + target_sources.setdefault(tar, []).append(source) + + if not full or not target_sources: + generated = "" + for tar in self.feature_sorted(target_sources): + sources = target_sources[tar] + name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) + generated += name + "[%d] " % len(sources) + dispatch_rows.append(("Generated", generated[:-1] if generated else "none")) + else: + dispatch_rows.append(("Generated", '')) + for tar in self.feature_sorted(target_sources): + sources = target_sources[tar] + name = tar if isinstance(tar, str) else '(%s)' % ' '.join(tar) + flags = ' '.join(self.feature_flags(tar)) + implies = ' '.join(self.feature_sorted(self.feature_implies(tar))) + detect = ' '.join(self.feature_detect(tar)) + dispatch_rows.append(('', '')) + dispatch_rows.append((name, implies)) + dispatch_rows.append(("Flags", flags)) + dispatch_rows.append(("Detect", detect)) + for src in sources: + dispatch_rows.append(("", src)) + + ############################### + # TODO: add support for 'markdown' format + text = [] + secs_len = [len(secs) for secs, _ in report] + cols_len = [len(col) for _, rows in report for col, _ in rows] + tab = ' ' * 2 + pad = max(max(secs_len), max(cols_len)) + for sec, rows in report: + if not sec: + text.append("") # empty line + continue + sec += ' ' * (pad - len(sec)) + text.append(sec + tab + ': ') + for col, val in rows: + col += ' ' * (pad - len(col)) + text.append(tab + col + ': ' + val) + + return '\n'.join(text) + + def _wrap_target(self, output_dir, dispatch_src, target, nochange=False): + assert(isinstance(target, (str, tuple))) + if isinstance(target, str): + ext_name = target_name = target + else: + # multi-target + ext_name = '.'.join(target) + target_name = '__'.join(target) + + wrap_path = os.path.join(output_dir, os.path.basename(dispatch_src)) + wrap_path = "{0}.{2}{1}".format(*os.path.splitext(wrap_path), ext_name.lower()) + if nochange and os.path.exists(wrap_path): + return wrap_path + + self.dist_log("wrap dispatch-able target -> ", wrap_path) + # sorting for readability + features = self.feature_sorted(self.feature_implies_c(target)) + target_join = "#define %sCPU_TARGET_" % self.conf_c_prefix_ + target_defs = [target_join + f for f in features] + target_defs = '\n'.join(target_defs) + + with open(wrap_path, "w") as fd: + fd.write(textwrap.dedent("""\ + /** + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator \ + (distutils/ccompiler_opt.py) + */ + #define {pfx}CPU_TARGET_MODE + #define {pfx}CPU_TARGET_CURRENT {target_name} + {target_defs} + #include "{path}" + """).format( + pfx=self.conf_c_prefix_, target_name=target_name, + path=os.path.abspath(dispatch_src), target_defs=target_defs + )) + return wrap_path + + def _generate_config(self, output_dir, dispatch_src, targets, has_baseline=False): + config_path = os.path.basename(dispatch_src).replace(".c", ".h") + config_path = os.path.join(output_dir, config_path) + # check if targets didn't change to avoid recompiling + cache_hash = self.cache_hash(targets, has_baseline) + try: + with open(config_path) as f: + last_hash = f.readline().split("cache_hash:") + if len(last_hash) == 2 and int(last_hash[1]) == cache_hash: + return True + except IOError: + pass + + self.dist_log("generate dispatched config -> ", config_path) + dispatch_calls = [] + for tar in targets: + if isinstance(tar, str): + target_name = tar + else: # multi target + target_name = '__'.join([t for t in tar]) + req_detect = self.feature_detect(tar) + req_detect = '&&'.join([ + "CHK(%s)" % f for f in req_detect + ]) + dispatch_calls.append( + "\t%sCPU_DISPATCH_EXPAND_(CB((%s), %s, __VA_ARGS__))" % ( + self.conf_c_prefix_, req_detect, target_name + )) + dispatch_calls = ' \\\n'.join(dispatch_calls) + + if has_baseline: + baseline_calls = ( + "\t%sCPU_DISPATCH_EXPAND_(CB(__VA_ARGS__))" + ) % self.conf_c_prefix_ + else: + baseline_calls = '' + + with open(config_path, "w") as fd: + fd.write(textwrap.dedent("""\ + // cache_hash:{cache_hash} + /** + * AUTOGENERATED DON'T EDIT + * Please make changes to the code generator (distutils/ccompiler_opt.py) + */ + #ifndef {pfx}CPU_DISPATCH_EXPAND_ + #define {pfx}CPU_DISPATCH_EXPAND_(X) X + #endif + #undef {pfx}CPU_DISPATCH_BASELINE_CALL + #undef {pfx}CPU_DISPATCH_CALL + #define {pfx}CPU_DISPATCH_BASELINE_CALL(CB, ...) \\ + {baseline_calls} + #define {pfx}CPU_DISPATCH_CALL(CHK, CB, ...) \\ + {dispatch_calls} + """).format( + pfx=self.conf_c_prefix_, baseline_calls=baseline_calls, + dispatch_calls=dispatch_calls, cache_hash=cache_hash + )) + return False + +def new_ccompiler_opt(compiler, **kwargs): + """ + Create a new instance of 'CCompilerOpt' and generate the dispatch header + inside NumPy source dir. + + Parameters + ---------- + 'compiler' : CCompiler instance + '**kwargs': passed as-is to `CCompilerOpt(...)` + + Returns + ------- + new instance of CCompilerOpt + """ + opt = CCompilerOpt(compiler, **kwargs) + npy_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) + header_dir = os.path.join(npy_path, *("core/src/common".split("/"))) + header_path = os.path.join(header_dir, "_cpu_dispatch.h") + if not os.path.exists(header_path) or not opt.is_cached(): + if not os.path.exists(header_dir): + opt.dist_log( + "dispatch header dir '%s' isn't exist, creating it" % header_dir, + stderr=True + ) + os.makedirs(header_dir) + opt.generate_dispatch_header(header_path) + return opt diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c new file mode 100644 index 000000000..8df556b6c --- /dev/null +++ b/numpy/distutils/checks/cpu_asimd.c @@ -0,0 +1,25 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + /* MAXMIN */ + int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0); + ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0); + /* ROUNDING */ + ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0); +#ifdef __aarch64__ + { + float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + /* MAXMIN */ + ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0); + ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0); + /* ROUNDING */ + ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0); + } +#endif + return ret; +} diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c new file mode 100644 index 000000000..0158d1354 --- /dev/null +++ b/numpy/distutils/checks/cpu_asimddp.c @@ -0,0 +1,15 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2); + uint32x4_t va = vdupq_n_u32(3); + int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0); +#ifdef __aarch64__ + ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0); +#endif + return ret; +} diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c new file mode 100644 index 000000000..bb437aa40 --- /dev/null +++ b/numpy/distutils/checks/cpu_asimdfhm.c @@ -0,0 +1,17 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + float16x8_t vhp = vdupq_n_f16((float16_t)1); + float16x4_t vlhp = vdup_n_f16((float16_t)1); + float32x4_t vf = vdupq_n_f32(1.0f); + float32x2_t vlf = vdup_n_f32(1.0f); + + int ret = (int)vget_lane_f32(vfmlal_low_u32(vlf, vlhp, vlhp), 0); + ret += (int)vgetq_lane_f32(vfmlslq_high_u32(vf, vhp, vhp), 0); + + return ret; +} diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c new file mode 100644 index 000000000..80b94000f --- /dev/null +++ b/numpy/distutils/checks/cpu_asimdhp.c @@ -0,0 +1,14 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + float16x8_t vhp = vdupq_n_f16((float16_t)-1); + float16x4_t vlhp = vdup_n_f16((float16_t)-1); + + int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0); + ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0); + return ret; +} diff --git a/numpy/distutils/checks/cpu_avx.c b/numpy/distutils/checks/cpu_avx.c new file mode 100644 index 000000000..737c0d2e9 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx.c @@ -0,0 +1,7 @@ +#include <immintrin.h> + +int main(void) +{ + __m256 a = _mm256_add_ps(_mm256_setzero_ps(), _mm256_setzero_ps()); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx2.c b/numpy/distutils/checks/cpu_avx2.c new file mode 100644 index 000000000..dfb11fd79 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx2.c @@ -0,0 +1,7 @@ +#include <immintrin.h> + +int main(void) +{ + __m256i a = _mm256_abs_epi16(_mm256_setzero_si256()); + return _mm_cvtsi128_si32(_mm256_castsi256_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512_clx.c b/numpy/distutils/checks/cpu_avx512_clx.c new file mode 100644 index 000000000..71dad83a7 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_clx.c @@ -0,0 +1,8 @@ +#include <immintrin.h> + +int main(void) +{ + /* VNNI */ + __m512i a = _mm512_dpbusd_epi32(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512()); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512_cnl.c b/numpy/distutils/checks/cpu_avx512_cnl.c new file mode 100644 index 000000000..dfab4436d --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_cnl.c @@ -0,0 +1,10 @@ +#include <immintrin.h> + +int main(void) +{ + /* IFMA */ + __m512i a = _mm512_madd52hi_epu64(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512()); + /* VMBI */ + a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), _mm512_setzero_si512()); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512_icl.c b/numpy/distutils/checks/cpu_avx512_icl.c new file mode 100644 index 000000000..cf2706b3b --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_icl.c @@ -0,0 +1,12 @@ +#include <immintrin.h> + +int main(void) +{ + /* VBMI2 */ + __m512i a = _mm512_shrdv_epi64(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512()); + /* BITLAG */ + a = _mm512_popcnt_epi8(a); + /* VPOPCNTDQ */ + a = _mm512_popcnt_epi64(a); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512_knl.c b/numpy/distutils/checks/cpu_avx512_knl.c new file mode 100644 index 000000000..0699f37a6 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_knl.c @@ -0,0 +1,11 @@ +#include <immintrin.h> + +int main(void) +{ + int base[128]; + /* ER */ + __m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(_mm512_setzero_pd())); + /* PF */ + _mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1); + return base[0]; +} diff --git a/numpy/distutils/checks/cpu_avx512_knm.c b/numpy/distutils/checks/cpu_avx512_knm.c new file mode 100644 index 000000000..db61b4bfa --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_knm.c @@ -0,0 +1,17 @@ +#include <immintrin.h> + +int main(void) +{ + __m512i a = _mm512_setzero_si512(); + __m512 b = _mm512_setzero_ps(); + + /* 4FMAPS */ + b = _mm512_4fmadd_ps(b, b, b, b, b, NULL); + /* 4VNNIW */ + a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL); + /* VPOPCNTDQ */ + a = _mm512_popcnt_epi64(a); + + a = _mm512_add_epi32(a, _mm512_castps_si512(b)); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512_skx.c b/numpy/distutils/checks/cpu_avx512_skx.c new file mode 100644 index 000000000..1d5e15b5e --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512_skx.c @@ -0,0 +1,12 @@ +#include <immintrin.h> + +int main(void) +{ + /* VL */ + __m256i a = _mm256_abs_epi64(_mm256_setzero_si256()); + /* DQ */ + __m512i b = _mm512_broadcast_i32x8(a); + /* BW */ + b = _mm512_abs_epi16(b); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(b)); +} diff --git a/numpy/distutils/checks/cpu_avx512cd.c b/numpy/distutils/checks/cpu_avx512cd.c new file mode 100644 index 000000000..61bef6b82 --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512cd.c @@ -0,0 +1,7 @@ +#include <immintrin.h> + +int main(void) +{ + __m512i a = _mm512_lzcnt_epi32(_mm512_setzero_si512()); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_avx512f.c b/numpy/distutils/checks/cpu_avx512f.c new file mode 100644 index 000000000..f60cc09dd --- /dev/null +++ b/numpy/distutils/checks/cpu_avx512f.c @@ -0,0 +1,7 @@ +#include <immintrin.h> + +int main(void) +{ + __m512i a = _mm512_abs_epi32(_mm512_setzero_si512()); + return _mm_cvtsi128_si32(_mm512_castsi512_si128(a)); +} diff --git a/numpy/distutils/checks/cpu_f16c.c b/numpy/distutils/checks/cpu_f16c.c new file mode 100644 index 000000000..a5a343e2d --- /dev/null +++ b/numpy/distutils/checks/cpu_f16c.c @@ -0,0 +1,9 @@ +#include <emmintrin.h> +#include <immintrin.h> + +int main(void) +{ + __m128 a = _mm_cvtph_ps(_mm_setzero_si128()); + __m256 a8 = _mm256_cvtph_ps(_mm_setzero_si128()); + return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8))); +} diff --git a/numpy/distutils/checks/cpu_fma3.c b/numpy/distutils/checks/cpu_fma3.c new file mode 100644 index 000000000..cf34c6cb1 --- /dev/null +++ b/numpy/distutils/checks/cpu_fma3.c @@ -0,0 +1,8 @@ +#include <xmmintrin.h> +#include <immintrin.h> + +int main(void) +{ + __m256 a = _mm256_fmadd_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _mm256_setzero_ps()); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/numpy/distutils/checks/cpu_fma4.c b/numpy/distutils/checks/cpu_fma4.c new file mode 100644 index 000000000..1ad717033 --- /dev/null +++ b/numpy/distutils/checks/cpu_fma4.c @@ -0,0 +1,12 @@ +#include <immintrin.h> +#ifdef _MSC_VER + #include <ammintrin.h> +#else + #include <x86intrin.h> +#endif + +int main(void) +{ + __m256 a = _mm256_macc_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _mm256_setzero_ps()); + return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a)); +} diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c new file mode 100644 index 000000000..4eab1f384 --- /dev/null +++ b/numpy/distutils/checks/cpu_neon.c @@ -0,0 +1,15 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0); +#ifdef __aarch64__ + float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0); +#endif + return ret; +} diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c new file mode 100644 index 000000000..745d2e793 --- /dev/null +++ b/numpy/distutils/checks/cpu_neon_fp16.c @@ -0,0 +1,11 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + short z4[] = {0, 0, 0, 0, 0, 0, 0, 0}; + float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4)); + return (int)vgetq_lane_f32(v_z4, 0); +} diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c new file mode 100644 index 000000000..45f7b5d69 --- /dev/null +++ b/numpy/distutils/checks/cpu_neon_vfpv4.c @@ -0,0 +1,19 @@ +#ifdef _MSC_VER + #include <Intrin.h> +#endif +#include <arm_neon.h> + +int main(void) +{ + float32x4_t v1 = vdupq_n_f32(1.0f); + float32x4_t v2 = vdupq_n_f32(2.0f); + float32x4_t v3 = vdupq_n_f32(3.0f); + int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0); +#ifdef __aarch64__ + float64x2_t vd1 = vdupq_n_f64(1.0); + float64x2_t vd2 = vdupq_n_f64(2.0); + float64x2_t vd3 = vdupq_n_f64(3.0); + ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0); +#endif + return ret; +} diff --git a/numpy/distutils/checks/cpu_popcnt.c b/numpy/distutils/checks/cpu_popcnt.c new file mode 100644 index 000000000..e6a80fb40 --- /dev/null +++ b/numpy/distutils/checks/cpu_popcnt.c @@ -0,0 +1,23 @@ +#ifdef _MSC_VER + #include <nmmintrin.h> +#else + #include <popcntintrin.h> +#endif + +int main(void) +{ + long long a = 0; + int b; +#ifdef _MSC_VER + #ifdef _M_X64 + a = _mm_popcnt_u64(1); + #endif + b = _mm_popcnt_u32(1); +#else + #ifdef __x86_64__ + a = __builtin_popcountll(1); + #endif + b = __builtin_popcount(1); +#endif + return (int)a + b; +} diff --git a/numpy/distutils/checks/cpu_sse.c b/numpy/distutils/checks/cpu_sse.c new file mode 100644 index 000000000..bb98bf63c --- /dev/null +++ b/numpy/distutils/checks/cpu_sse.c @@ -0,0 +1,7 @@ +#include <xmmintrin.h> + +int main(void) +{ + __m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/numpy/distutils/checks/cpu_sse2.c b/numpy/distutils/checks/cpu_sse2.c new file mode 100644 index 000000000..658afc9b4 --- /dev/null +++ b/numpy/distutils/checks/cpu_sse2.c @@ -0,0 +1,7 @@ +#include <emmintrin.h> + +int main(void) +{ + __m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128()); + return _mm_cvtsi128_si32(a); +} diff --git a/numpy/distutils/checks/cpu_sse3.c b/numpy/distutils/checks/cpu_sse3.c new file mode 100644 index 000000000..aece1e601 --- /dev/null +++ b/numpy/distutils/checks/cpu_sse3.c @@ -0,0 +1,7 @@ +#include <pmmintrin.h> + +int main(void) +{ + __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/numpy/distutils/checks/cpu_sse41.c b/numpy/distutils/checks/cpu_sse41.c new file mode 100644 index 000000000..bfdb9feac --- /dev/null +++ b/numpy/distutils/checks/cpu_sse41.c @@ -0,0 +1,7 @@ +#include <smmintrin.h> + +int main(void) +{ + __m128 a = _mm_floor_ps(_mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/numpy/distutils/checks/cpu_sse42.c b/numpy/distutils/checks/cpu_sse42.c new file mode 100644 index 000000000..24f5d93fe --- /dev/null +++ b/numpy/distutils/checks/cpu_sse42.c @@ -0,0 +1,7 @@ +#include <smmintrin.h> + +int main(void) +{ + __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps()); + return (int)_mm_cvtss_f32(a); +} diff --git a/numpy/distutils/checks/cpu_ssse3.c b/numpy/distutils/checks/cpu_ssse3.c new file mode 100644 index 000000000..ad0abc1e6 --- /dev/null +++ b/numpy/distutils/checks/cpu_ssse3.c @@ -0,0 +1,7 @@ +#include <tmmintrin.h> + +int main(void) +{ + __m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128()); + return (int)_mm_cvtsi128_si32(a); +} diff --git a/numpy/distutils/checks/cpu_vsx.c b/numpy/distutils/checks/cpu_vsx.c new file mode 100644 index 000000000..0b3f30d6a --- /dev/null +++ b/numpy/distutils/checks/cpu_vsx.c @@ -0,0 +1,21 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__)) + #define vsx_ld vec_vsx_ld + #define vsx_st vec_vsx_st +#else + #define vsx_ld vec_xl + #define vsx_st vec_xst +#endif + +int main(void) +{ + unsigned int zout[4]; + unsigned int z4[] = {0, 0, 0, 0}; + __vector unsigned int v_z4 = vsx_ld(0, z4); + vsx_st(v_z4, 0, zout); + return zout[0]; +} diff --git a/numpy/distutils/checks/cpu_vsx2.c b/numpy/distutils/checks/cpu_vsx2.c new file mode 100644 index 000000000..410fb29d6 --- /dev/null +++ b/numpy/distutils/checks/cpu_vsx2.c @@ -0,0 +1,13 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector unsigned long long v_uint64x2; + +int main(void) +{ + v_uint64x2 z2 = (v_uint64x2){0, 0}; + z2 = (v_uint64x2)vec_cmpeq(z2, z2); + return (int)vec_extract(z2, 0); +} diff --git a/numpy/distutils/checks/cpu_vsx3.c b/numpy/distutils/checks/cpu_vsx3.c new file mode 100644 index 000000000..857526535 --- /dev/null +++ b/numpy/distutils/checks/cpu_vsx3.c @@ -0,0 +1,13 @@ +#ifndef __VSX__ + #error "VSX is not supported" +#endif +#include <altivec.h> + +typedef __vector unsigned int v_uint32x4; + +int main(void) +{ + v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0}; + z4 = vec_absd(z4, z4); + return (int)vec_extract(z4, 0); +} diff --git a/numpy/distutils/checks/cpu_xop.c b/numpy/distutils/checks/cpu_xop.c new file mode 100644 index 000000000..51d70cf2b --- /dev/null +++ b/numpy/distutils/checks/cpu_xop.c @@ -0,0 +1,12 @@ +#include <immintrin.h> +#ifdef _MSC_VER + #include <ammintrin.h> +#else + #include <x86intrin.h> +#endif + +int main(void) +{ + __m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128()); + return _mm_cvtsi128_si32(a); +} diff --git a/numpy/distutils/checks/test_flags.c b/numpy/distutils/checks/test_flags.c new file mode 100644 index 000000000..4cd09d42a --- /dev/null +++ b/numpy/distutils/checks/test_flags.c @@ -0,0 +1 @@ +int test_flags; diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index a156a7c6e..60ba4c917 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -16,6 +16,12 @@ class build(old_build): "specify the Fortran compiler type"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] help_options = old_build.help_options + [ @@ -27,6 +33,9 @@ class build(old_build): old_build.initialize_options(self) self.fcompiler = None self.warn_error = False + self.cpu_baseline = "min" + self.cpu_dispatch = "max -xop -fma4" # drop AMD legacy features by default + self.disable_optimization = False def finalize_options(self): build_scripts = self.build_scripts diff --git a/numpy/distutils/command/build_clib.py b/numpy/distutils/command/build_clib.py index f6a84e351..87345adbc 100644 --- a/numpy/distutils/command/build_clib.py +++ b/numpy/distutils/command/build_clib.py @@ -13,6 +13,7 @@ from numpy.distutils.misc_util import ( filter_sources, get_lib_source_files, get_numpy_include_dirs, has_cxx_sources, has_f_sources, is_sequence ) +from numpy.distutils.ccompiler_opt import new_ccompiler_opt # Fix Python distutils bug sf #1718574: _l = old_build_clib.user_options @@ -34,9 +35,16 @@ class build_clib(old_build_clib): "number of parallel jobs"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] - boolean_options = old_build_clib.boolean_options + ['inplace', 'warn-error'] + boolean_options = old_build_clib.boolean_options + \ + ['inplace', 'warn-error', 'disable-optimization'] def initialize_options(self): old_build_clib.initialize_options(self) @@ -44,6 +52,10 @@ class build_clib(old_build_clib): self.inplace = 0 self.parallel = None self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None + def finalize_options(self): if self.parallel: @@ -55,6 +67,9 @@ class build_clib(old_build_clib): self.set_undefined_options('build', ('parallel', 'parallel'), ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization') ) def have_f_sources(self): @@ -102,6 +117,25 @@ class build_clib(old_build_clib): self.compiler.show_customization() + if not self.disable_optimization: + opt_cache_path = os.path.abspath( + os.path.join(self.build_temp, 'ccompiler_opt_cache_clib.py' + )) + self.compiler_opt = new_ccompiler_opt( + compiler=self.compiler, cpu_baseline=self.cpu_baseline, + cpu_dispatch=self.cpu_dispatch, cache_path=opt_cache_path + ) + if not self.compiler_opt.is_cached(): + log.info("Detected changes on compiler optimizations, force rebuilding") + self.force = True + + import atexit + def report(): + log.info("\n########### CLIB COMPILER OPTIMIZATION ###########") + log.info(self.compiler_opt.report(full=True)) + + atexit.register(report) + if self.have_f_sources(): from numpy.distutils.fcompiler import new_fcompiler self._f_compiler = new_fcompiler(compiler=self.fcompiler, @@ -211,6 +245,8 @@ class build_clib(old_build_clib): 'extra_f90_compile_args') or [] macros = build_info.get('macros') + if macros is None: + macros = [] include_dirs = build_info.get('include_dirs') if include_dirs is None: include_dirs = [] @@ -223,6 +259,31 @@ class build_clib(old_build_clib): if requiref90: self.mkpath(module_build_dir) + dispatch_objects = [] + if not self.disable_optimization: + dispatch_sources = [ + c_sources.pop(c_sources.index(src)) + for src in c_sources[:] if src.endswith(".dispatch.c") + ] + if dispatch_sources: + if not self.inplace: + build_src = self.get_finalized_command("build_src").build_src + else: + build_src = None + dispatch_objects = self.compiler_opt.try_dispatch( + dispatch_sources, + output_dir=self.build_temp, + src_dir=build_src, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_postargs + ) + extra_args_baseopt = extra_postargs + self.compiler_opt.cpu_baseline_flags() + else: + extra_args_baseopt = extra_postargs + macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + if compiler.compiler_type == 'msvc': # this hack works around the msvc compiler attributes # problem, msvc uses its own convention :( @@ -237,7 +298,8 @@ class build_clib(old_build_clib): macros=macros, include_dirs=include_dirs, debug=self.debug, - extra_postargs=extra_postargs) + extra_postargs=extra_args_baseopt) + objects.extend(dispatch_objects) if cxx_sources: log.info("compiling C++ sources") diff --git a/numpy/distutils/command/build_ext.py b/numpy/distutils/command/build_ext.py index d53285c92..b6557fcf6 100644 --- a/numpy/distutils/command/build_ext.py +++ b/numpy/distutils/command/build_ext.py @@ -19,7 +19,7 @@ from numpy.distutils.misc_util import ( has_cxx_sources, has_f_sources, is_sequence ) from numpy.distutils.command.config_compiler import show_fortran_compilers - +from numpy.distutils.ccompiler_opt import new_ccompiler_opt class build_ext (old_build_ext): @@ -33,6 +33,12 @@ class build_ext (old_build_ext): "number of parallel jobs"), ('warn-error', None, "turn all warnings into errors (-Werror)"), + ('cpu-baseline=', None, + "specify a list of enabled baseline CPU optimizations"), + ('cpu-dispatch=', None, + "specify a list of dispatched CPU optimizations"), + ('disable-optimization', None, + "disable CPU optimized code(dispatch,simd,fast...)"), ] help_options = old_build_ext.help_options + [ @@ -40,13 +46,16 @@ class build_ext (old_build_ext): show_fortran_compilers), ] - boolean_options = old_build_ext.boolean_options + ['warn-error'] + boolean_options = old_build_ext.boolean_options + ['warn-error', 'disable-optimization'] def initialize_options(self): old_build_ext.initialize_options(self) self.fcompiler = None self.parallel = None self.warn_error = None + self.cpu_baseline = None + self.cpu_dispatch = None + self.disable_optimization = None def finalize_options(self): if self.parallel: @@ -75,6 +84,9 @@ class build_ext (old_build_ext): self.set_undefined_options('build', ('parallel', 'parallel'), ('warn_error', 'warn_error'), + ('cpu_baseline', 'cpu_baseline'), + ('cpu_dispatch', 'cpu_dispatch'), + ('disable_optimization', 'disable_optimization'), ) def run(self): @@ -129,6 +141,22 @@ class build_ext (old_build_ext): self.compiler.show_customization() + if not self.disable_optimization: + opt_cache_path = os.path.abspath(os.path.join(self.build_temp, 'ccompiler_opt_cache_ext.py')) + self.compiler_opt = new_ccompiler_opt(compiler=self.compiler, + cpu_baseline=self.cpu_baseline, + cpu_dispatch=self.cpu_dispatch, + cache_path=opt_cache_path) + if not self.compiler_opt.is_cached(): + log.info("Detected changes on compiler optimizations, force rebuilding") + self.force = True + + import atexit + def report(): + log.info("\n########### EXT COMPILER OPTIMIZATION ###########") + log.info(self.compiler_opt.report(full=True)) + atexit.register(report) + # Setup directory for storing generated extra DLL files on Windows self.extra_dll_dir = os.path.join(self.build_temp, '.libs') if not os.path.isdir(self.extra_dll_dir): @@ -378,6 +406,32 @@ class build_ext (old_build_ext): include_dirs = ext.include_dirs + get_numpy_include_dirs() + dispatch_objects = [] + if not self.disable_optimization: + dispatch_sources = [ + c_sources.pop(c_sources.index(src)) + for src in c_sources[:] if src.endswith(".dispatch.c") + ] + if dispatch_sources: + if not self.inplace: + build_src = self.get_finalized_command("build_src").build_src + else: + build_src = None + dispatch_objects = self.compiler_opt.try_dispatch( + dispatch_sources, + output_dir=output_dir, + src_dir=build_src, + macros=macros, + include_dirs=include_dirs, + debug=self.debug, + extra_postargs=extra_args, + **kws + ) + extra_args_baseopt = extra_args + self.compiler_opt.cpu_baseline_flags() + else: + extra_args_baseopt = extra_args + macros.append(("NPY_DISABLE_OPTIMIZATION", 1)) + c_objects = [] if c_sources: log.info("compiling C sources") @@ -386,8 +440,9 @@ class build_ext (old_build_ext): macros=macros, include_dirs=include_dirs, debug=self.debug, - extra_postargs=extra_args, + extra_postargs=extra_args_baseopt, **kws) + c_objects.extend(dispatch_objects) if cxx_sources: log.info("compiling C++ sources") diff --git a/numpy/distutils/setup.py b/numpy/distutils/setup.py index 88cd1a160..798c3686f 100644 --- a/numpy/distutils/setup.py +++ b/numpy/distutils/setup.py @@ -7,6 +7,7 @@ def configuration(parent_package='',top_path=None): config.add_subpackage('tests') config.add_data_files('site.cfg') config.add_data_files('mingw/gfortran_vs2003_hack.c') + config.add_data_dir('checks') config.make_config_py() return config diff --git a/numpy/distutils/tests/test_ccompiler_opt.py b/numpy/distutils/tests/test_ccompiler_opt.py new file mode 100644 index 000000000..a789be1ea --- /dev/null +++ b/numpy/distutils/tests/test_ccompiler_opt.py @@ -0,0 +1,787 @@ +import re, textwrap, os +from os import sys, path +from distutils.errors import DistutilsError + +is_standalone = __name__ == '__main__' and __package__ is None +if is_standalone: + import unittest, contextlib, tempfile, shutil + sys.path.append(path.abspath(path.join(path.dirname(__file__), ".."))) + from ccompiler_opt import CCompilerOpt + + # from numpy/testing/_private/utils.py + @contextlib.contextmanager + def tempdir(*args, **kwargs): + tmpdir = tempfile.mkdtemp(*args, **kwargs) + try: + yield tmpdir + finally: + shutil.rmtree(tmpdir) + + def assert_(expr, msg=''): + if not expr: + raise AssertionError(msg) +else: + from numpy.distutils.ccompiler_opt import CCompilerOpt + from numpy.testing import assert_, tempdir + +# architectures and compilers to test +arch_compilers = dict( + x86 = ("gcc", "clang", "icc", "iccw", "msvc"), + x64 = ("gcc", "clang", "icc", "iccw", "msvc"), + ppc64 = ("gcc", "clang"), + ppc64le = ("gcc", "clang"), + armhf = ("gcc", "clang"), + aarch64 = ("gcc", "clang"), + noarch = ("gcc",) +) + +class FakeCCompilerOpt(CCompilerOpt): + fake_info = "" + def __init__(self, trap_files="", trap_flags="", *args, **kwargs): + self.fake_trap_files = trap_files + self.fake_trap_flags = trap_flags + CCompilerOpt.__init__(self, None, **kwargs) + + def __repr__(self): + return textwrap.dedent("""\ + <<<< + march : {} + compiler : {} + ---------------- + {} + >>>> + """).format(self.cc_march, self.cc_name, self.report()) + + def dist_compile(self, sources, flags, **kwargs): + assert(isinstance(sources, list)) + assert(isinstance(flags, list)) + if self.fake_trap_files: + for src in sources: + if re.match(self.fake_trap_files, src): + self.dist_error("source is trapped by a fake interface") + if self.fake_trap_flags: + for f in flags: + if re.match(self.fake_trap_flags, f): + self.dist_error("flag is trapped by a fake interface") + # fake objects + return zip(sources, [' '.join(flags)] * len(sources)) + + def dist_info(self): + return FakeCCompilerOpt.fake_info + + @staticmethod + def dist_log(*args, stderr=False): + pass + +class _Test_CCompilerOpt(object): + arch = None # x86_64 + cc = None # gcc + + def setup(self): + FakeCCompilerOpt.conf_nocache = True + self._opt = None + + def nopt(self, *args, **kwargs): + FakeCCompilerOpt.fake_info = self.arch + '_' + self.cc + return FakeCCompilerOpt(*args, **kwargs) + + def opt(self): + if not self._opt: + self._opt = self.nopt() + return self._opt + + def march(self): + return self.opt().cc_march + + def cc_name(self): + return self.opt().cc_name + + def get_targets(self, targets, groups, **kwargs): + FakeCCompilerOpt.conf_target_groups = groups + opt = self.nopt( + cpu_baseline=kwargs.get("baseline", "min"), + cpu_dispatch=kwargs.get("dispatch", "max"), + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + with tempdir() as tmpdir: + file = os.path.join(tmpdir, "test_targets.c") + with open(file, 'w') as f: + f.write(targets) + gtargets = [] + gflags = {} + fake_objects = opt.try_dispatch([file]) + for source, flags in fake_objects: + gtar = source.split('.')[1:-1] + glen = len(gtar) + if glen == 0: + gtar = "baseline" + elif glen == 1: + gtar = gtar[0].upper() + else: + # converting multi-target into parentheses str format to be equivalent + # to the configuration statements syntax. + gtar = ('('+' '.join(gtar)+')').upper() + gtargets.append(gtar) + gflags[gtar] = flags + + has_baseline, targets = opt.sources_status[file] + targets = targets + ["baseline"] if has_baseline else targets + # convert tuple that represent multi-target into parentheses str format + targets = [ + '('+' '.join(tar)+')' if isinstance(tar, tuple) else tar + for tar in targets + ] + if len(targets) != len(gtargets) or not all(t in gtargets for t in targets): + raise AssertionError( + "'sources_status' returns different targets than the compiled targets\n" + "%s != %s" % (targets, gtargets) + ) + # return targets from 'sources_status' since the order is matters + return targets, gflags + + def arg_regex(self, **kwargs): + map2origin = dict( + x64 = "x86", + ppc64le = "ppc64", + aarch64 = "armhf", + clang = "gcc", + ) + march = self.march(); cc_name = self.cc_name() + map_march = map2origin.get(march, march) + map_cc = map2origin.get(cc_name, cc_name) + for key in ( + march, cc_name, map_march, map_cc, + march + '_' + cc_name, + map_march + '_' + cc_name, + march + '_' + map_cc, + map_march + '_' + map_cc, + ) : + regex = kwargs.pop(key, None) + if regex is not None: + break + if regex: + if isinstance(regex, dict): + for k, v in regex.items(): + if v[-1:] not in ')}$?\\.+*': + regex[k] = v + '$' + else: + assert(isinstance(regex, str)) + if regex[-1:] not in ')}$?\\.+*': + regex += '$' + return regex + + def expect(self, dispatch, baseline="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + features = ' '.join(opt.cpu_dispatch_names()) + if not match: + if len(features) != 0: + raise AssertionError( + 'expected empty features, not "%s"' % features + ) + return + if not re.match(match, features, re.IGNORECASE): + raise AssertionError( + 'dispatch features "%s" not match "%s"' % (features, match) + ) + + def expect_baseline(self, baseline, dispatch="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + features = ' '.join(opt.cpu_baseline_names()) + if not match: + if len(features) != 0: + raise AssertionError( + 'expected empty features, not "%s"' % features + ) + return + if not re.match(match, features, re.IGNORECASE): + raise AssertionError( + 'baseline features "%s" not match "%s"' % (features, match) + ) + + def expect_flags(self, baseline, dispatch="", **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + opt = self.nopt( + cpu_baseline=baseline, cpu_dispatch=dispatch, + trap_files=kwargs.get("trap_files", ""), + trap_flags=kwargs.get("trap_flags", "") + ) + flags = ' '.join(opt.cpu_baseline_flags()) + if not match: + if len(flags) != 0: + raise AssertionError( + 'expected empty flags not "%s"' % flags + ) + return + if not re.match(match, flags): + raise AssertionError( + 'flags "%s" not match "%s"' % (flags, match) + ) + + def expect_targets(self, targets, groups={}, **kwargs): + match = self.arg_regex(**kwargs) + if match is None: + return + targets, _ = self.get_targets(targets=targets, groups=groups, **kwargs) + targets = ' '.join(targets) + if not match: + if len(targets) != 0: + raise AssertionError( + 'expected empty targets, not "%s"' % targets + ) + return + if not re.match(match, targets, re.IGNORECASE): + raise AssertionError( + 'targets "%s" not match "%s"' % (targets, match) + ) + + def expect_target_flags(self, targets, groups={}, **kwargs): + match_dict = self.arg_regex(**kwargs) + if match_dict is None: + return + assert(isinstance(match_dict, dict)) + _, tar_flags = self.get_targets(targets=targets, groups=groups) + + for match_tar, match_flags in match_dict.items(): + if match_tar not in tar_flags: + raise AssertionError( + 'expected to find target "%s"' % match_tar + ) + flags = tar_flags[match_tar] + if not match_flags: + if len(flags) != 0: + raise AssertionError( + 'expected to find empty flags in target "%s"' % match_tar + ) + if not re.match(match_flags, flags): + raise AssertionError( + '"%s" flags "%s" not match "%s"' % (match_tar, flags, match_flags) + ) + + def test_interface(self): + wrong_arch = "ppc64" if self.arch != "ppc64" else "x86" + wrong_cc = "clang" if self.cc != "clang" else "icc" + opt = self.opt() + assert_(getattr(opt, "cc_on_" + self.arch)) + assert_(not getattr(opt, "cc_on_" + wrong_arch)) + assert_(getattr(opt, "cc_is_" + self.cc)) + assert_(not getattr(opt, "cc_is_" + wrong_cc)) + + def test_args_empty(self): + for baseline, dispatch in ( + ("", "none"), + (None, ""), + ("none +none", "none - none"), + ("none -max", "min - max"), + ("+vsx2 -VSX2", "vsx avx2 avx512f -max"), + ("max -vsx - avx + avx512f neon -MAX ", + "min -min + max -max -vsx + avx2 -avx2 +NONE") + ) : + opt = self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch) + assert(len(opt.cpu_baseline_names()) == 0) + assert(len(opt.cpu_dispatch_names()) == 0) + + def test_args_validation(self): + if self.march() == "unknown": + return + # check sanity of argument's validation + for baseline, dispatch in ( + ("unkown_feature - max +min", "unknown max min"), # unknowing features + ("#avx2", "$vsx") # groups and polices aren't acceptable + ) : + try: + self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch) + raise AssertionError("excepted an exception for invalid arguments") + except DistutilsError: + pass + + def test_skip(self): + # only takes what platform supports and skip the others + # without casing exceptions + self.expect( + "sse vsx neon", + x86="sse", ppc64="vsx", armhf="neon", unknown="" + ) + self.expect( + "sse41 avx avx2 vsx2 vsx3 neon_vfpv4 asimd", + x86 = "sse41 avx avx2", + ppc64 = "vsx2 vsx3", + armhf = "neon_vfpv4 asimd", + unknown = "" + ) + # any features in cpu_dispatch must be ignored if it's part of baseline + self.expect( + "sse neon vsx", baseline="sse neon vsx", + x86="", ppc64="", armhf="" + ) + self.expect( + "avx2 vsx3 asimdhp", baseline="avx2 vsx3 asimdhp", + x86="", ppc64="", armhf="" + ) + + def test_implies(self): + # baseline combining implied features, so we count + # on it instead of testing 'feature_implies()'' directly + self.expect_baseline( + "fma3 avx2 asimd vsx3", + # .* between two spaces can validate features in between + x86 = "sse .* sse41 .* fma3.*avx2", + ppc64 = "vsx vsx2 vsx3", + armhf = "neon neon_fp16 neon_vfpv4 asimd" + ) + """ + special cases + """ + # in icc and msvc, FMA3 and AVX2 can't be separated + # both need to implies each other, same for avx512f & cd + for f0, f1 in ( + ("fma3", "avx2"), + ("avx512f", "avx512cd"), + ): + diff = ".* sse42 .* %s .*%s$" % (f0, f1) + self.expect_baseline(f0, + x86_gcc=".* sse42 .* %s$" % f0, + x86_icc=diff, x86_iccw=diff + ) + self.expect_baseline(f1, + x86_gcc=".* avx .* %s$" % f1, + x86_icc=diff, x86_iccw=diff + ) + # in msvc, following features can't be separated too + for f in (("fma3", "avx2"), ("avx512f", "avx512cd", "avx512_skx")): + for ff in f: + self.expect_baseline(ff, + x86_msvc=".*%s" % ' '.join(f) + ) + + # in ppc64le VSX and VSX2 can't be separated + self.expect_baseline("vsx", ppc64le="vsx vsx2") + # in aarch64 following features can't be separated + for f in ("neon", "neon_fp16", "neon_vfpv4", "asimd"): + self.expect_baseline(f, aarch64="neon neon_fp16 neon_vfpv4 asimd") + + def test_args_options(self): + # max & native + for o in ("max", "native"): + if o == "native" and self.cc_name() == "msvc": + continue + self.expect(o, + trap_files=".*cpu_(sse|vsx|neon).c", + x86="", ppc64="", armhf="" + ) + self.expect(o, + trap_files=".*cpu_(sse3|vsx2|neon_vfpv4).c", + x86="sse sse2", ppc64="vsx", armhf="neon neon_fp16", + aarch64="", ppc64le="" + ) + self.expect(o, + trap_files=".*cpu_(popcnt|vsx3).c", + x86="sse .* sse41", ppc64="vsx vsx2", + armhf="neon neon_fp16 .* asimd .*" + ) + self.expect(o, + x86_gcc=".* xop fma4 .* avx512f .* avx512_knl avx512_knm avx512_skx .*", + # in icc, xop and fam4 aren't supported + x86_icc=".* avx512f .* avx512_knl avx512_knm avx512_skx .*", + x86_iccw=".* avx512f .* avx512_knl avx512_knm avx512_skx .*", + # in msvc, avx512_knl avx512_knm aren't supported + x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*", + armhf=".* asimd asimdhp asimddp .*", + ppc64="vsx vsx2 vsx3.*" + ) + # min + self.expect("min", + x86="sse sse2", x64="sse sse2 sse3", + armhf="", aarch64="neon neon_fp16 .* asimd", + ppc64="", ppc64le="vsx vsx2" + ) + self.expect( + "min", trap_files=".*cpu_(sse2|vsx2).c", + x86="", ppc64le="" + ) + # an exception must triggered if native flag isn't supported + # when option "native" is activated through the args + try: + self.expect("native", + trap_flags=".*(-march=native|-xHost|/QxHost).*", + x86=".*", ppc64=".*", armhf=".*" + ) + if self.march() != "unknown": + raise AssertionError( + "excepted an exception for %s" % self.march() + ) + except DistutilsError: + if self.march() == "unknown": + raise AssertionError("excepted no exceptions") + + def test_flags(self): + self.expect_flags( + "sse sse2 vsx vsx2 neon neon_fp16", + x86_gcc="-msse -msse2", x86_icc="-msse -msse2", + x86_iccw="/arch:SSE2", x86_msvc="/arch:SSE2", + ppc64_gcc= "-mcpu=power8", + ppc64_clang="-maltivec -mvsx -mpower8-vector", + armhf_gcc="-mfpu=neon-fp16 -mfp16-format=ieee", + aarch64="" + ) + # testing normalize -march + self.expect_flags( + "asimd", + aarch64="", + armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8-a\+simd" + ) + self.expect_flags( + "asimdhp", + aarch64_gcc=r"-march=armv8.2-a\+fp16", + armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8.2-a\+fp16" + ) + self.expect_flags( + "asimddp", aarch64_gcc=r"-march=armv8.2-a\+dotprod" + ) + self.expect_flags( + # asimdfhm implies asimdhp + "asimdfhm", aarch64_gcc=r"-march=armv8.2-a\+fp16\+fp16fml" + ) + self.expect_flags( + "asimddp asimdhp asimdfhm", + aarch64_gcc=r"-march=armv8.2-a\+dotprod\+fp16\+fp16fml" + ) + + def test_targets_exceptions(self): + for targets in ( + "bla bla", "/*@targets", + "/*@targets */", + "/*@targets unknown */", + "/*@targets $unknown_policy avx2 */", + "/*@targets #unknown_group avx2 */", + "/*@targets $ */", + "/*@targets # vsx */", + "/*@targets #$ vsx */", + "/*@targets vsx avx2 ) */", + "/*@targets vsx avx2 (avx2 */", + "/*@targets vsx avx2 () */", + "/*@targets vsx avx2 ($autovec) */", # no features + "/*@targets vsx avx2 (xxx) */", + "/*@targets vsx avx2 (baseline) */", + ) : + try: + self.expect_targets( + targets, + x86="", armhf="", ppc64="" + ) + if self.march() != "unknown": + raise AssertionError( + "excepted an exception for %s" % self.march() + ) + except DistutilsError: + if self.march() == "unknown": + raise AssertionError("excepted no exceptions") + + def test_targets_syntax(self): + for targets in ( + "/*@targets $keep_baseline sse vsx neon*/", + "/*@targets,$keep_baseline,sse,vsx,neon*/", + "/*@targets*$keep_baseline*sse*vsx*neon*/", + """ + /* + ** @targets + ** $keep_baseline, sse vsx,neon + */ + """, + """ + /* + ************@targets************* + ** $keep_baseline, sse vsx, neon + ********************************* + */ + """, + """ + /* + /////////////@targets///////////////// + //$keep_baseline//sse//vsx//neon + ///////////////////////////////////// + */ + """, + """ + /* + @targets + $keep_baseline + SSE VSX NEON*/ + """ + ) : + self.expect_targets(targets, + x86="sse", ppc64="vsx", armhf="neon", unknown="" + ) + + def test_targets(self): + # test skipping baseline features + self.expect_targets( + """ + /*@targets + sse sse2 sse41 avx avx2 avx512f + vsx vsx2 vsx3 + neon neon_fp16 asimdhp asimddp + */ + """, + baseline="avx vsx2 asimd", + x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx3" + ) + # test skipping non-dispatch features + self.expect_targets( + """ + /*@targets + sse41 avx avx2 avx512f + vsx2 vsx3 + asimd asimdhp asimddp + */ + """, + baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp", + x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2" + ) + # test skipping features that not supported + self.expect_targets( + """ + /*@targets + sse2 sse41 avx2 avx512f + vsx2 vsx3 + neon asimdhp asimddp + */ + """, + baseline="", + trap_files=".*(avx2|avx512f|vsx3|asimddp).c", + x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon" + ) + # test skipping features that implies each other + self.expect_targets( + """ + /*@targets + sse sse2 avx fma3 avx2 avx512f avx512cd + vsx vsx2 vsx3 + neon neon_vfpv4 neon_fp16 neon_fp16 asimd asimdhp + asimddp asimdfhm + */ + """, + baseline="", + x86_gcc="avx512cd avx512f avx2 fma3 avx sse2", + x86_msvc="avx512cd avx2 avx sse2", + x86_icc="avx512cd avx2 avx sse2", + x86_iccw="avx512cd avx2 avx sse2", + ppc64="vsx3 vsx2 vsx", + ppc64le="vsx3 vsx2", + armhf="asimdfhm asimddp asimdhp asimd neon_vfpv4 neon_fp16 neon", + aarch64="asimdfhm asimddp asimdhp asimd" + ) + + def test_targets_policies(self): + # 'keep_baseline', generate objects for baseline features + self.expect_targets( + """ + /*@targets + $keep_baseline + sse2 sse42 avx2 avx512f + vsx2 vsx3 + neon neon_vfpv4 asimd asimddp + */ + """, + baseline="sse41 avx2 vsx2 asimd vsx3", + x86="avx512f avx2 sse42 sse2", + ppc64="vsx3 vsx2", + armhf="asimddp asimd neon_vfpv4 neon", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimddp asimd" + ) + # 'keep_sort', leave the sort as-is + self.expect_targets( + """ + /*@targets + $keep_baseline $keep_sort + avx512f sse42 avx2 sse2 + vsx2 vsx3 + asimd neon neon_vfpv4 asimddp + */ + """, + x86="avx512f sse42 avx2 sse2", + ppc64="vsx2 vsx3", + armhf="asimd neon neon_vfpv4 asimddp", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimd asimddp" + ) + # 'autovec', skipping features that can't be + # vectorized by the compiler + self.expect_targets( + """ + /*@targets + $keep_baseline $keep_sort $autovec + avx512f avx2 sse42 sse41 sse2 + vsx3 vsx2 + asimddp asimd neon_vfpv4 neon + */ + """, + x86_gcc="avx512f avx2 sse42 sse41 sse2", + x86_icc="avx512f avx2 sse42 sse41 sse2", + x86_iccw="avx512f avx2 sse42 sse41 sse2", + x86_msvc="avx512f avx2 sse2", + ppc64="vsx3 vsx2", + armhf="asimddp asimd neon_vfpv4 neon", + # neon, neon_vfpv4, asimd implies each other + aarch64="asimddp asimd" + ) + for policy in ("$maxopt", "$autovec"): + # 'maxopt' and autovec set the max acceptable optimization flags + self.expect_target_flags( + "/*@targets baseline %s */" % policy, + gcc={"baseline":".*-O3.*"}, icc={"baseline":".*-O3.*"}, + iccw={"baseline":".*/O3.*"}, msvc={"baseline":".*/O2.*"}, + unknown={"baseline":".*"} + ) + + # 'werror', force compilers to treat warnings as errors + self.expect_target_flags( + "/*@targets baseline $werror */", + gcc={"baseline":".*-Werror.*"}, icc={"baseline":".*-Werror.*"}, + iccw={"baseline":".*/Werror.*"}, msvc={"baseline":".*/WX.*"}, + unknown={"baseline":".*"} + ) + + def test_targets_groups(self): + self.expect_targets( + """ + /*@targets $keep_baseline baseline #test_group */ + """, + groups=dict( + test_group=(""" + $keep_baseline + asimddp sse2 vsx2 avx2 vsx3 + avx512f asimdhp + """) + ), + x86="avx512f avx2 sse2 baseline", + ppc64="vsx3 vsx2 baseline", + armhf="asimddp asimdhp baseline" + ) + # test skip duplicating and sorting + self.expect_targets( + """ + /*@targets + * sse42 avx avx512f + * #test_group_1 + * vsx2 + * #test_group_2 + * asimddp asimdfhm + */ + """, + groups=dict( + test_group_1=(""" + VSX2 vsx3 asimd avx2 SSE41 + """), + test_group_2=(""" + vsx2 vsx3 asImd aVx2 sse41 + """) + ), + x86="avx512f avx2 avx sse42 sse41", + ppc64="vsx3 vsx2", + # vsx2 part of the default baseline of ppc64le, option ("min") + ppc64le="vsx3", + armhf="asimdfhm asimddp asimd", + # asimd part of the default baseline of aarch64, option ("min") + aarch64="asimdfhm asimddp" + ) + + def test_targets_multi(self): + self.expect_targets( + """ + /*@targets + (avx512_clx avx512_cnl) (asimdhp asimddp) + */ + """, + x86=r"\(avx512_clx avx512_cnl\)", + armhf=r"\(asimdhp asimddp\)", + ) + # test skipping implied features and auto-sort + self.expect_targets( + """ + /*@targets + f16c (sse41 avx sse42) (sse3 avx2 avx512f) + vsx2 (vsx vsx3 vsx2) + (neon neon_vfpv4 asimd asimdhp asimddp) + */ + """, + x86="avx512f f16c avx", + ppc64="vsx3 vsx2", + ppc64le="vsx3", # vsx2 part of baseline + armhf=r"\(asimdhp asimddp\)", + ) + # test skipping implied features and keep sort + self.expect_targets( + """ + /*@targets $keep_sort + (sse41 avx sse42) (sse3 avx2 avx512f) + (vsx vsx3 vsx2) + (asimddp neon neon_vfpv4 asimd asimdhp) + */ + """, + x86="avx avx512f", + ppc64="vsx3", + armhf=r"\(asimdhp asimddp\)", + ) + # test compiler variety and avoiding duplicating + self.expect_targets( + """ + /*@targets $keep_sort + fma3 avx2 (fma3 avx2) (avx2 fma3) avx2 fma3 + */ + """, + x86_gcc=r"fma3 avx2 \(fma3 avx2\)", + x86_icc="avx2", x86_iccw="avx2", + x86_msvc="avx2" + ) + +def new_test(arch, cc): + if is_standalone: return textwrap.dedent("""\ + class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt, unittest.TestCase): + arch = '{arch}' + cc = '{cc}' + def __init__(self, methodName="runTest"): + unittest.TestCase.__init__(self, methodName) + self.setup() + """).format( + class_name=arch + '_' + cc, arch=arch, cc=cc + ) + return textwrap.dedent("""\ + class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt): + arch = '{arch}' + cc = '{cc}' + """).format( + class_name=arch + '_' + cc, arch=arch, cc=cc + ) +""" +if 1 and is_standalone: + FakeCCompilerOpt.fake_info = "x86_icc" + cco = FakeCCompilerOpt(None, cpu_baseline="avx2") + print(' '.join(cco.cpu_baseline_names())) + print(cco.cpu_baseline_flags()) + unittest.main() + sys.exit() +""" +for arch, compilers in arch_compilers.items(): + for cc in compilers: + exec(new_test(arch, cc)) + +if is_standalone: + unittest.main() diff --git a/numpy/distutils/tests/test_ccompiler_opt_conf.py b/numpy/distutils/tests/test_ccompiler_opt_conf.py new file mode 100644 index 000000000..2f83a59e0 --- /dev/null +++ b/numpy/distutils/tests/test_ccompiler_opt_conf.py @@ -0,0 +1,169 @@ +import unittest +from os import sys, path + +is_standalone = __name__ == '__main__' and __package__ is None +if is_standalone: + sys.path.append(path.abspath(path.join(path.dirname(__file__), ".."))) + from ccompiler_opt import CCompilerOpt +else: + from numpy.distutils.ccompiler_opt import CCompilerOpt + +arch_compilers = dict( + x86 = ("gcc", "clang", "icc", "iccw", "msvc"), + x64 = ("gcc", "clang", "icc", "iccw", "msvc"), + ppc64 = ("gcc", "clang"), + ppc64le = ("gcc", "clang"), + armhf = ("gcc", "clang"), + aarch64 = ("gcc", "clang"), + narch = ("gcc",) +) + +class FakeCCompilerOpt(CCompilerOpt): + fake_info = "" + def __init__(self, *args, **kwargs): + CCompilerOpt.__init__(self, None, **kwargs) + def dist_compile(self, sources, flags, **kwargs): + return sources + def dist_info(self): + return FakeCCompilerOpt.fake_info + @staticmethod + def dist_log(*args, stderr=False): + pass + +class _TestConfFeatures(FakeCCompilerOpt): + """A hook to check the sanity of configured features +- before it called by the abstract class '_Feature' + """ + + def conf_features_partial(self): + conf_all = self.conf_features + for feature_name, feature in conf_all.items(): + self.test_feature( + "attribute conf_features", + conf_all, feature_name, feature + ) + + conf_partial = FakeCCompilerOpt.conf_features_partial(self) + for feature_name, feature in conf_partial.items(): + self.test_feature( + "conf_features_partial()", + conf_partial, feature_name, feature + ) + return conf_partial + + def test_feature(self, log, search_in, feature_name, feature_dict): + error_msg = ( + "during validate '{}' within feature '{}', " + "march '{}' and compiler '{}'\n>> " + ).format(log, feature_name, self.cc_march, self.cc_name) + + if not feature_name.isupper(): + raise AssertionError(error_msg + "feature name must be in uppercase") + + for option, val in feature_dict.items(): + self.test_option_types(error_msg, option, val) + self.test_duplicates(error_msg, option, val) + + self.test_implies(error_msg, search_in, feature_name, feature_dict) + self.test_group(error_msg, search_in, feature_name, feature_dict) + + def test_option_types(self, error_msg, option, val): + for tp, available in ( + ((str, list), ( + "implies", "headers", "flags", "group", "detect" + )), + ((str,), ("disable",)), + ((int,), ("interest",)), + ((bool,), ("implies_detect",)), + ((bool, type(None)), ("autovec",)), + ) : + found_it = option in available + if not found_it: + continue + if not isinstance(val, tp): + error_tp = [t.__name__ for t in (*tp,)] + error_tp = ' or '.join(error_tp) + raise AssertionError(error_msg + \ + "expected '%s' type for option '%s' not '%s'" % ( + error_tp, option, type(val).__name__ + )) + break + + if not found_it: + raise AssertionError(error_msg + \ + "invalid option name '%s'" % option + ) + + def test_duplicates(self, error_msg, option, val): + if option not in ( + "implies", "headers", "flags", "group", "detect" + ) : return + + if isinstance(val, str): + val = val.split() + + if len(val) != len(set(val)): + raise AssertionError(error_msg + \ + "duplicated values in option '%s'" % option + ) + + def test_implies(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + implies = feature_dict.get("implies", "") + if not implies: + return + if isinstance(implies, str): + implies = implies.split() + + if feature_name in implies: + raise AssertionError(error_msg + \ + "feature implies itself" + ) + + for impl in implies: + impl_dict = search_in.get(impl) + if impl_dict is not None: + if "disable" in impl_dict: + raise AssertionError(error_msg + \ + "implies disabled feature '%s'" % impl + ) + continue + raise AssertionError(error_msg + \ + "implies non-exist feature '%s'" % impl + ) + + def test_group(self, error_msg, search_in, feature_name, feature_dict): + if feature_dict.get("disabled") is not None: + return + group = feature_dict.get("group", "") + if not group: + return + if isinstance(group, str): + group = group.split() + + for f in group: + impl_dict = search_in.get(f) + if not impl_dict or "disable" in impl_dict: + continue + raise AssertionError(error_msg + \ + "in option '%s', '%s' already exists as a feature name" % ( + option, f + )) + +class TestConfFeatures(unittest.TestCase): + def __init__(self, methodName="runTest"): + unittest.TestCase.__init__(self, methodName) + self.setup() + + def setup(self): + FakeCCompilerOpt.conf_nocache = True + + def test_features(self): + for arch, compilers in arch_compilers.items(): + for cc in compilers: + FakeCCompilerOpt.fake_info = arch + cc + _TestConfFeatures() + +if is_standalone: + unittest.main() diff --git a/numpy/f2py/rules.py b/numpy/f2py/rules.py index 7b25b545a..56f2033ff 100755 --- a/numpy/f2py/rules.py +++ b/numpy/f2py/rules.py @@ -55,6 +55,9 @@ __version__ = "$Revision: 1.129 $"[10:-1] from . import __version__ f2py_version = __version__.version +from .. import version as _numpy_version +numpy_version = _numpy_version.version + import os import time import copy @@ -206,6 +209,9 @@ PyMODINIT_FUNC PyInit_#modulename#(void) { \t\t\"This module '#modulename#' is auto-generated with f2py (version:#f2py_version#).\\nFunctions:\\n\"\n#docs#\".\"); \tPyDict_SetItemString(d, \"__doc__\", s); \tPy_DECREF(s); +\ts = PyUnicode_FromString(\"""" + numpy_version + """\"); +\tPyDict_SetItemString(d, \"__f2py_numpy_version__\", s); +\tPy_DECREF(s); \t#modulename#_error = PyErr_NewException (\"#modulename#.error\", NULL, NULL); \t/* \t * Store the error object inside the dict, so that it could get deallocated. diff --git a/numpy/f2py/tests/test_regression.py b/numpy/f2py/tests/test_regression.py index 67e00f1f7..a1b772069 100644 --- a/numpy/f2py/tests/test_regression.py +++ b/numpy/f2py/tests/test_regression.py @@ -2,7 +2,7 @@ import os import pytest import numpy as np -from numpy.testing import assert_raises, assert_equal +from numpy.testing import assert_, assert_raises, assert_equal, assert_string_equal from . import util @@ -25,3 +25,23 @@ class TestIntentInOut(util.F2PyTest): x = np.arange(3, dtype=np.float32) self.module.foo(x) assert_equal(x, [3, 1, 2]) + + +class TestNumpyVersionAttribute(util.F2PyTest): + # Check that th attribute __f2py_numpy_version__ is present + # in the compiled module and that has the value np.__version__. + sources = [_path('src', 'regression', 'inout.f90')] + + @pytest.mark.slow + def test_numpy_version_attribute(self): + + # Check that self.module has an attribute named "__f2py_numpy_version__" + assert_(hasattr(self.module, "__f2py_numpy_version__"), + msg="Fortran module does not have __f2py_numpy_version__") + + # Check that the attribute __f2py_numpy_version__ is a string + assert_(isinstance(self.module.__f2py_numpy_version__, str), + msg="__f2py_numpy_version__ is not a string") + + # Check that __f2py_numpy_version__ has the value numpy.__version__ + assert_string_equal(np.__version__, self.module.__f2py_numpy_version__) diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index f5a548433..6d6222d3e 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -784,6 +784,7 @@ def _getconv(dtype): else: return asstr + # amount of lines loadtxt reads in one chunk, can be overridden for testing _loadtxt_chunksize = 50000 @@ -914,68 +915,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, [ 19.22, 64.31], [-17.57, 63.94]]) """ - # Type conversions for Py3 convenience - if comments is not None: - if isinstance(comments, (str, bytes)): - comments = [comments] - comments = [_decode_line(x) for x in comments] - # Compile regex for comments beforehand - comments = (re.escape(comment) for comment in comments) - regex_comments = re.compile('|'.join(comments)) - - if delimiter is not None: - delimiter = _decode_line(delimiter) - - user_converters = converters - - if encoding == 'bytes': - encoding = None - byte_converters = True - else: - byte_converters = False - - if usecols is not None: - # Allow usecols to be a single int or a sequence of ints - try: - usecols_as_list = list(usecols) - except TypeError: - usecols_as_list = [usecols] - for col_idx in usecols_as_list: - try: - opindex(col_idx) - except TypeError as e: - e.args = ( - "usecols must be an int or a sequence of ints but " - "it contains at least one element of type %s" % - type(col_idx), - ) - raise - # Fall back to existing code - usecols = usecols_as_list - - fown = False - try: - if isinstance(fname, os_PathLike): - fname = os_fspath(fname) - if _is_string_like(fname): - fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) - fencoding = getattr(fh, 'encoding', 'latin1') - fh = iter(fh) - fown = True - else: - fh = iter(fname) - fencoding = getattr(fname, 'encoding', 'latin1') - except TypeError: - raise ValueError('fname must be a string, file handle, or generator') - # input may be a python2 io stream - if encoding is not None: - fencoding = encoding - # we must assume local encoding - # TODO emit portability warning? - elif fencoding is None: - import locale - fencoding = locale.getpreferredencoding() + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Nested functions used by loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # not to be confused with the flatten_dtype we import... @recursive @@ -1075,11 +1018,84 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if X: yield X + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Main body of loadtxt. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + # Check correctness of the values of `ndmin` + if ndmin not in [0, 1, 2]: + raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) + + # Type conversions for Py3 convenience + if comments is not None: + if isinstance(comments, (str, bytes)): + comments = [comments] + comments = [_decode_line(x) for x in comments] + # Compile regex for comments beforehand + comments = (re.escape(comment) for comment in comments) + regex_comments = re.compile('|'.join(comments)) + + if delimiter is not None: + delimiter = _decode_line(delimiter) + + user_converters = converters + + if encoding == 'bytes': + encoding = None + byte_converters = True + else: + byte_converters = False + + if usecols is not None: + # Allow usecols to be a single int or a sequence of ints + try: + usecols_as_list = list(usecols) + except TypeError: + usecols_as_list = [usecols] + for col_idx in usecols_as_list: + try: + opindex(col_idx) + except TypeError as e: + e.args = ( + "usecols must be an int or a sequence of ints but " + "it contains at least one element of type %s" % + type(col_idx), + ) + raise + # Fall back to existing code + usecols = usecols_as_list + + # Make sure we're dealing with a proper dtype + dtype = np.dtype(dtype) + defconv = _getconv(dtype) + + dtype_types, packing = flatten_dtype_internal(dtype) + + fown = False try: - # Make sure we're dealing with a proper dtype - dtype = np.dtype(dtype) - defconv = _getconv(dtype) + if isinstance(fname, os_PathLike): + fname = os_fspath(fname) + if _is_string_like(fname): + fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) + fencoding = getattr(fh, 'encoding', 'latin1') + fh = iter(fh) + fown = True + else: + fh = iter(fname) + fencoding = getattr(fname, 'encoding', 'latin1') + except TypeError: + raise ValueError('fname must be a string, file handle, or generator') + # input may be a python2 io stream + if encoding is not None: + fencoding = encoding + # we must assume local encoding + # TODO emit portability warning? + elif fencoding is None: + import locale + fencoding = locale.getpreferredencoding() + + try: # Skip the first `skiprows` lines for i in range(skiprows): next(fh) @@ -1095,10 +1111,12 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # End of lines reached first_line = '' first_vals = [] - warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) + warnings.warn('loadtxt: Empty input file: "%s"' % fname, + stacklevel=2) N = len(usecols or first_vals) - dtype_types, packing = flatten_dtype_internal(dtype) + # Now that we know N, create the default converters list, and + # set packing, if necessary. if len(dtype_types) > 1: # We're dealing with a structured array, each field of # the dtype matches a column @@ -1118,8 +1136,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Unused converter specified continue if byte_converters: - # converters may use decode to workaround numpy's old behaviour, - # so encode the string again before passing to the user converter + # converters may use decode to workaround numpy's old + # behaviour, so encode the string again before passing to + # the user converter def tobytes_first(x, conv): if type(x) is bytes: return conv(x) @@ -1158,9 +1177,6 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, X.shape = (1, -1) # Verify that the array has at least dimensions `ndmin`. - # Check correctness of the values of `ndmin` - if ndmin not in [0, 1, 2]: - raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) # Tweak the size and shape of the arrays - remove extraneous dimensions if X.ndim > ndmin: X = np.squeeze(X) diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index db563e30c..e0f723a3c 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -957,7 +957,7 @@ def test__replace_nan(): """ Test that _replace_nan returns the original array if there are no NaNs, not a copy. """ - for dtype in [np.bool, np.int32, np.int64]: + for dtype in [np.bool_, np.int32, np.int64]: arr = np.array([0, 1], dtype=dtype) result, mask = _replace_nan(arr, 0) assert mask is None diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 8d612b8ed..b5371f51a 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -21,6 +21,7 @@ Released for unlimited redistribution. """ # pylint: disable-msg=E1002 import builtins +import inspect import operator import warnings import textwrap @@ -122,15 +123,8 @@ def doc_note(initialdoc, note): if note is None: return initialdoc - notesplit = re.split(r'\n\s*?Notes\n\s*?-----', initialdoc) - - notedoc = """\ -Notes - ----- - %s""" % note - - if len(notesplit) > 1: - notedoc = '\n\n ' + notedoc + '\n' + notesplit = re.split(r'\n\s*?Notes\n\s*?-----', inspect.cleandoc(initialdoc)) + notedoc = "\n\nNotes\n-----\n%s\n" % inspect.cleandoc(note) return ''.join(notesplit[:1] + [notedoc] + notesplit[1:]) diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index f86ebf551..8ede29da1 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -244,11 +244,6 @@ class _fromnxfunction: the new masked array version of the function. A note on application of the function to the mask is appended. - .. warning:: - If the function docstring already contained a Notes section, the - new docstring will have two Notes sections instead of appending a note - to the existing section. - Parameters ---------- None @@ -258,9 +253,9 @@ class _fromnxfunction: doc = getattr(npfunc, '__doc__', None) if doc: sig = self.__name__ + ma.get_object_signature(npfunc) - locdoc = "Notes\n-----\nThe function is applied to both the _data"\ - " and the _mask, if any." - return '\n'.join((sig, doc, locdoc)) + doc = ma.doc_note(doc, "The function is applied to both the _data " + "and the _mask, if any.") + return '\n\n'.join((sig, doc)) return def __call__(self, *args, **params): diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 6f34144bb..76a92f5ca 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -34,8 +34,8 @@ from numpy.ma.core import ( MAError, MaskError, MaskType, MaskedArray, abs, absolute, add, all, allclose, allequal, alltrue, angle, anom, arange, arccos, arccosh, arctan2, arcsin, arctan, argsort, array, asarray, choose, concatenate, - conjugate, cos, cosh, count, default_fill_value, diag, divide, empty, - empty_like, equal, exp, flatten_mask, filled, fix_invalid, + conjugate, cos, cosh, count, default_fill_value, diag, divide, doc_note, + empty, empty_like, equal, exp, flatten_mask, filled, fix_invalid, flatten_structured_array, fromflex, getmask, getmaskarray, greater, greater_equal, identity, inner, isMaskedArray, less, less_equal, log, log10, make_mask, make_mask_descr, mask_or, masked, masked_array, @@ -5283,3 +5283,33 @@ def test_mask_shape_assignment_does_not_break_masked(): b = np.ma.array(1, mask=a.mask) b.shape = (1,) assert_equal(a.mask.shape, ()) + +@pytest.mark.skipif(sys.flags.optimize > 1, + reason="no docstrings present to inspect when PYTHONOPTIMIZE/Py_OptimizeFlag > 1") +def test_doc_note(): + def method(self): + """This docstring + + Has multiple lines + + And notes + + Notes + ----- + original note + """ + pass + + expected_doc = """This docstring + +Has multiple lines + +And notes + +Notes +----- +note + +original note""" + + assert_equal(np.ma.core.doc_note(method.__doc__, "note"), expected_doc) diff --git a/numpy/polynomial/_polybase.py b/numpy/polynomial/_polybase.py index 30887b670..b5341ba37 100644 --- a/numpy/polynomial/_polybase.py +++ b/numpy/polynomial/_polybase.py @@ -919,10 +919,8 @@ class ABCPolyBase(abc.ABC): ---------- x : array_like, shape (M,) x-coordinates of the M sample points ``(x[i], y[i])``. - y : array_like, shape (M,) or (M, K) - y-coordinates of the sample points. Several data sets of sample - points sharing the same x-coordinates can be fitted at once by - passing in a 2D-array that contains one dataset per column. + y : array_like, shape (M,) + y-coordinates of the M sample points ``(x[i], y[i])``. deg : int or 1-D array_like Degree(s) of the fitting polynomials. If `deg` is a single integer all terms up to and including the `deg`'th term are included in the diff --git a/numpy/random/bit_generator.pyx b/numpy/random/bit_generator.pyx index f145ec13d..3c52a9933 100644 --- a/numpy/random/bit_generator.pyx +++ b/numpy/random/bit_generator.pyx @@ -382,13 +382,22 @@ cdef class SeedSequence(): ------- entropy_array : 1D uint32 array """ - # Convert run-entropy, program-entropy, and the spawn key into uint32 + # Convert run-entropy and the spawn key into uint32 # arrays and concatenate them. # We MUST have at least some run-entropy. The others are optional. assert self.entropy is not None run_entropy = _coerce_to_uint32_array(self.entropy) spawn_entropy = _coerce_to_uint32_array(self.spawn_key) + if len(spawn_entropy) > 0 and len(run_entropy) < self.pool_size: + # Explicitly fill out the entropy with 0s to the pool size to avoid + # conflict with spawn keys. We changed this in 1.19.0 to fix + # gh-16539. In order to preserve stream-compatibility with + # unspawned SeedSequences with small entropy inputs, we only do + # this when a spawn_key is specified. + diff = self.pool_size - len(run_entropy) + run_entropy = np.concatenate( + [run_entropy, np.zeros(diff, dtype=np.uint32)]) entropy_array = np.concatenate([run_entropy, spawn_entropy]) return entropy_array diff --git a/numpy/random/tests/test_seed_sequence.py b/numpy/random/tests/test_seed_sequence.py index fe23680ed..f08cf80fa 100644 --- a/numpy/random/tests/test_seed_sequence.py +++ b/numpy/random/tests/test_seed_sequence.py @@ -1,5 +1,5 @@ import numpy as np -from numpy.testing import assert_array_equal +from numpy.testing import assert_array_equal, assert_array_compare from numpy.random import SeedSequence @@ -52,3 +52,29 @@ def test_reference_data(): assert_array_equal(state, expected) state64 = ss.generate_state(len(expected64), dtype=np.uint64) assert_array_equal(state64, expected64) + + +def test_zero_padding(): + """ Ensure that the implicit zero-padding does not cause problems. + """ + # Ensure that large integers are inserted in little-endian fashion to avoid + # trailing 0s. + ss0 = SeedSequence(42) + ss1 = SeedSequence(42 << 32) + assert_array_compare( + np.not_equal, + ss0.generate_state(4), + ss1.generate_state(4)) + + # Ensure backwards compatibility with the original 0.17 release for small + # integers and no spawn key. + expected42 = np.array([3444837047, 2669555309, 2046530742, 3581440988], + dtype=np.uint32) + assert_array_equal(SeedSequence(42).generate_state(4), expected42) + + # Regression test for gh-16539 to ensure that the implicit 0s don't + # conflict with spawn keys. + assert_array_compare( + np.not_equal, + SeedSequence(42, spawn_key=(0,)).generate_state(4), + expected42) diff --git a/numpy/setup.py b/numpy/setup.py index c6498d101..cbf633504 100644 --- a/numpy/setup.py +++ b/numpy/setup.py @@ -17,6 +17,7 @@ def configuration(parent_package='',top_path=None): config.add_subpackage('polynomial') config.add_subpackage('random') config.add_subpackage('testing') + config.add_subpackage('typing') config.add_data_dir('doc') config.add_data_files('py.typed') config.add_data_files('*.pyi') diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index ef623255b..3827b7505 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -719,6 +719,8 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, at the same locations. """ + __tracebackhide__ = True # Hide traceback for py.test + x_id = func(x) y_id = func(y) # We include work-arounds here to handle three types of slightly diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py index b899e94f4..6a6cc664a 100644 --- a/numpy/testing/tests/test_utils.py +++ b/numpy/testing/tests/test_utils.py @@ -941,6 +941,17 @@ class TestArrayAlmostEqualNulp: assert_raises(AssertionError, assert_array_almost_equal_nulp, x, y, nulp) + def test_float64_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint64(0xffffffff) + nan1_i64 = np.array(np.nan, dtype=np.float64).view(np.uint64) + nan2_i64 = nan1_i64 ^ offset # nan payload on MIPS is all ones. + nan1_f64 = nan1_i64.view(np.float64) + nan2_f64 = nan2_i64.view(np.float64) + assert_array_max_ulp(nan1_f64, nan2_f64, 0) + def test_float32_pass(self): nulp = 5 x = np.linspace(-20, 20, 50, dtype=np.float32) @@ -971,6 +982,17 @@ class TestArrayAlmostEqualNulp: assert_raises(AssertionError, assert_array_almost_equal_nulp, x, y, nulp) + def test_float32_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint32(0xffff) + nan1_i32 = np.array(np.nan, dtype=np.float32).view(np.uint32) + nan2_i32 = nan1_i32 ^ offset # nan payload on MIPS is all ones. + nan1_f32 = nan1_i32.view(np.float32) + nan2_f32 = nan2_i32.view(np.float32) + assert_array_max_ulp(nan1_f32, nan2_f32, 0) + def test_float16_pass(self): nulp = 5 x = np.linspace(-4, 4, 10, dtype=np.float16) @@ -1001,6 +1023,17 @@ class TestArrayAlmostEqualNulp: assert_raises(AssertionError, assert_array_almost_equal_nulp, x, y, nulp) + def test_float16_ignore_nan(self): + # Ignore ULP differences between various NAN's + # Note that MIPS may reverse quiet and signaling nans + # so we use the builtin version as a base. + offset = np.uint16(0xff) + nan1_i16 = np.array(np.nan, dtype=np.float16).view(np.uint16) + nan2_i16 = nan1_i16 ^ offset # nan payload on MIPS is all ones. + nan1_f16 = nan1_i16.view(np.float16) + nan2_f16 = nan2_i16.view(np.float16) + assert_array_max_ulp(nan1_f16, nan2_f16, 0) + def test_complex128_pass(self): nulp = 5 x = np.linspace(-20, 20, 50, dtype=np.float64) diff --git a/numpy/tests/test_public_api.py b/numpy/tests/test_public_api.py index 7ce74bc43..cc4c5d8c5 100644 --- a/numpy/tests/test_public_api.py +++ b/numpy/tests/test_public_api.py @@ -54,18 +54,22 @@ def test_numpy_namespace(): 'show_config': 'numpy.__config__.show', 'who': 'numpy.lib.utils.who', } - # These built-in types are re-exported by numpy. - builtins = { - 'bool': 'builtins.bool', - 'complex': 'builtins.complex', - 'float': 'builtins.float', - 'int': 'builtins.int', - 'long': 'builtins.int', - 'object': 'builtins.object', - 'str': 'builtins.str', - 'unicode': 'builtins.str', - } - whitelist = dict(undocumented, **builtins) + if sys.version_info < (3, 7): + # These built-in types are re-exported by numpy. + builtins = { + 'bool': 'builtins.bool', + 'complex': 'builtins.complex', + 'float': 'builtins.float', + 'int': 'builtins.int', + 'long': 'builtins.int', + 'object': 'builtins.object', + 'str': 'builtins.str', + 'unicode': 'builtins.str', + } + whitelist = dict(undocumented, **builtins) + else: + # after 3.7, we override dir to not show these members + whitelist = undocumented bad_results = check_dir(np) # pytest gives better error messages with the builtin assert than with # assert_equal @@ -98,7 +102,7 @@ def test_dir_testing(): """Assert that output of dir has only one "testing/tester" attribute without duplicate""" assert len(dir(np)) == len(set(dir(np))) - + def test_numpy_linalg(): bad_results = check_dir(np.linalg) @@ -176,6 +180,7 @@ PUBLIC_MODULES = ['numpy.' + s for s in [ "polynomial.polyutils", "random", "testing", + "typing", "version", ]] @@ -209,6 +214,7 @@ PRIVATE_BUT_PRESENT_MODULES = ['numpy.' + s for s in [ "core.umath", "core.umath_tests", "distutils.ccompiler", + 'distutils.ccompiler_opt', "distutils.command", "distutils.command.autodist", "distutils.command.bdist_rpm", diff --git a/numpy/tests/typing/fail/array_like.py b/numpy/tests/typing/fail/array_like.py index a5ef5795f..a97e72dc7 100644 --- a/numpy/tests/typing/fail/array_like.py +++ b/numpy/tests/typing/fail/array_like.py @@ -1,11 +1,5 @@ -from typing import Any, TYPE_CHECKING - import numpy as np - -if TYPE_CHECKING: - from numpy.typing import ArrayLike -else: - ArrayLike = Any +from numpy.typing import ArrayLike class A: diff --git a/numpy/tests/typing/fail/fromnumeric.py b/numpy/tests/typing/fail/fromnumeric.py index f158a1071..7455ce722 100644 --- a/numpy/tests/typing/fail/fromnumeric.py +++ b/numpy/tests/typing/fail/fromnumeric.py @@ -22,11 +22,9 @@ np.choose(A, mode="bob") # E: No overload variant of "choose" matches argument np.repeat(a, None) # E: Argument 2 to "repeat" has incompatible type np.repeat(A, 1, axis=1.0) # E: Argument "axis" to "repeat" has incompatible type -np.swapaxes(a, 0, 0) # E: Argument 1 to "swapaxes" has incompatible type np.swapaxes(A, None, 1) # E: Argument 2 to "swapaxes" has incompatible type np.swapaxes(A, 1, [0]) # E: Argument 3 to "swapaxes" has incompatible type -np.transpose(a, axes=1) # E: Argument "axes" to "transpose" has incompatible type np.transpose(A, axes=1.0) # E: Argument "axes" to "transpose" has incompatible type np.partition(a, None) # E: Argument 2 to "partition" has incompatible type @@ -53,25 +51,20 @@ np.argpartition( A, 0, order=range(5) # E: Argument "order" to "argpartition" has incompatible type ) -np.sort(a) # E: Argument 1 to "sort" has incompatible type np.sort(A, axis="bob") # E: Argument "axis" to "sort" has incompatible type np.sort(A, kind="bob") # E: Argument "kind" to "sort" has incompatible type np.sort(A, order=range(5)) # E: Argument "order" to "sort" has incompatible type -np.argsort(a) # E: Argument 1 to "argsort" has incompatible type np.argsort(A, axis="bob") # E: Argument "axis" to "argsort" has incompatible type np.argsort(A, kind="bob") # E: Argument "kind" to "argsort" has incompatible type np.argsort(A, order=range(5)) # E: Argument "order" to "argsort" has incompatible type -np.argmax(a) # E: No overload variant of "argmax" matches argument type np.argmax(A, axis="bob") # E: No overload variant of "argmax" matches argument type np.argmax(A, kind="bob") # E: No overload variant of "argmax" matches argument type -np.argmin(a) # E: No overload variant of "argmin" matches argument type np.argmin(A, axis="bob") # E: No overload variant of "argmin" matches argument type np.argmin(A, kind="bob") # E: No overload variant of "argmin" matches argument type -np.searchsorted(a, 0) # E: No overload variant of "searchsorted" matches argument type np.searchsorted( # E: No overload variant of "searchsorted" matches argument type A[0], 0, side="bob" ) @@ -83,19 +76,16 @@ np.resize(A, 1.0) # E: Argument 2 to "resize" has incompatible type np.squeeze(A, 1.0) # E: No overload variant of "squeeze" matches argument type -np.diagonal(a) # E: Argument 1 to "diagonal" has incompatible type np.diagonal(A, offset=None) # E: Argument "offset" to "diagonal" has incompatible type np.diagonal(A, axis1="bob") # E: Argument "axis1" to "diagonal" has incompatible type np.diagonal(A, axis2=[]) # E: Argument "axis2" to "diagonal" has incompatible type -np.trace(a) # E: Argument 1 to "trace" has incompatible type np.trace(A, offset=None) # E: Argument "offset" to "trace" has incompatible type np.trace(A, axis1="bob") # E: Argument "axis1" to "trace" has incompatible type np.trace(A, axis2=[]) # E: Argument "axis2" to "trace" has incompatible type np.ravel(a, order="bob") # E: Argument "order" to "ravel" has incompatible type -np.compress(True, A) # E: Argument 1 to "compress" has incompatible type np.compress( [True], A, axis=1.0 # E: Argument "axis" to "compress" has incompatible type ) diff --git a/numpy/tests/typing/fail/scalars.py b/numpy/tests/typing/fail/scalars.py index 0dfc55124..5d7221895 100644 --- a/numpy/tests/typing/fail/scalars.py +++ b/numpy/tests/typing/fail/scalars.py @@ -65,3 +65,17 @@ np.floating(1) # E: Cannot instantiate abstract class np.complexfloating(1) # E: Cannot instantiate abstract class np.character("test") # E: Cannot instantiate abstract class np.flexible(b"test") # E: Cannot instantiate abstract class + +np.float64(value=0.0) # E: Unexpected keyword argument +np.int64(value=0) # E: Unexpected keyword argument +np.uint64(value=0) # E: Unexpected keyword argument +np.complex128(value=0.0j) # E: Unexpected keyword argument +np.str_(value='bob') # E: No overload variant +np.bytes_(value=b'test') # E: No overload variant +np.void(value=b'test') # E: Unexpected keyword argument +np.bool_(value=True) # E: Unexpected keyword argument +np.datetime64(value="2019") # E: No overload variant +np.timedelta64(value=0) # E: Unexpected keyword argument + +np.bytes_(b"hello", encoding='utf-8') # E: No overload variant +np.str_("hello", encoding='utf-8') # E: No overload variant diff --git a/numpy/tests/typing/pass/array_like.py b/numpy/tests/typing/pass/array_like.py index 098149c4b..e668b4963 100644 --- a/numpy/tests/typing/pass/array_like.py +++ b/numpy/tests/typing/pass/array_like.py @@ -1,13 +1,7 @@ -from typing import Any, List, Optional, TYPE_CHECKING +from typing import Any, List, Optional import numpy as np - -if TYPE_CHECKING: - from numpy.typing import ArrayLike, DtypeLike, _SupportsArray -else: - ArrayLike = Any - DtypeLike = Any - _SupportsArray = Any +from numpy.typing import ArrayLike, DtypeLike, _SupportsArray x1: ArrayLike = True x2: ArrayLike = 5 diff --git a/numpy/tests/typing/pass/dtype.py b/numpy/tests/typing/pass/dtype.py new file mode 100644 index 000000000..f954fdd44 --- /dev/null +++ b/numpy/tests/typing/pass/dtype.py @@ -0,0 +1,3 @@ +import numpy as np + +np.dtype(dtype=np.int64) diff --git a/numpy/tests/typing/pass/scalars.py b/numpy/tests/typing/pass/scalars.py index bd055673b..7de182626 100644 --- a/numpy/tests/typing/pass/scalars.py +++ b/numpy/tests/typing/pass/scalars.py @@ -34,7 +34,11 @@ np.float32(16) np.float64(3.0) np.bytes_(b"hello") +np.bytes_("hello", 'utf-8') +np.bytes_("hello", encoding='utf-8') np.str_("hello") +np.str_(b"hello", 'utf-8') +np.str_(b"hello", encoding='utf-8') # Protocols float(np.int8(4)) diff --git a/numpy/typing/__init__.py b/numpy/typing/__init__.py new file mode 100644 index 000000000..f2000823f --- /dev/null +++ b/numpy/typing/__init__.py @@ -0,0 +1,81 @@ +""" +============================ +Typing (:mod:`numpy.typing`) +============================ + +.. warning:: + + Some of the types in this module rely on features only present in + the standard library in Python 3.8 and greater. If you want to use + these types in earlier versions of Python, you should install the + typing-extensions_ package. + +Large parts of the NumPy API have PEP-484-style type annotations. In +addition, the following type aliases are available for users. + +- ``typing.ArrayLike``: objects that can be converted to arrays +- ``typing.DtypeLike``: objects that can be converted to dtypes + +Roughly speaking, ``typing.ArrayLike`` is "objects that can be used as +inputs to ``np.array``" and ``typing.DtypeLike`` is "objects that can +be used as inputs to ``np.dtype``". + +.. _typing-extensions: https://pypi.org/project/typing-extensions/ + +Differences from the runtime NumPy API +-------------------------------------- + +NumPy is very flexible. Trying to describe the full range of +possibilities statically would result in types that are not very +helpful. For that reason, the typed NumPy API is often stricter than +the runtime NumPy API. This section describes some notable +differences. + +ArrayLike +~~~~~~~~~ + +The ``ArrayLike`` type tries to avoid creating object arrays. For +example, + +.. code-block:: python + + >>> np.array(x**2 for x in range(10)) + array(<generator object <genexpr> at 0x10c004cd0>, dtype=object) + +is valid NumPy code which will create a 0-dimensional object +array. Type checkers will complain about the above example when using +the NumPy types however. If you really intended to do the above, then +you can either use a ``# type: ignore`` comment: + +.. code-block:: python + + >>> np.array(x**2 for x in range(10)) # type: ignore + +or explicitly type the array like object as ``Any``: + +.. code-block:: python + + >>> from typing import Any + >>> array_like: Any = (x**2 for x in range(10)) + >>> np.array(array_like) + array(<generator object <genexpr> at 0x1192741d0>, dtype=object) + +ndarray +~~~~~~~ + +It's possible to mutate the dtype of an array at runtime. For example, +the following code is valid: + +.. code-block:: python + + x = np.array([1, 2]) + x.dtype = np.bool_ + +This sort of mutation is not allowed by the types. Users who want to +write statically typed code should insted use the `numpy.ndarray.view` +method to create a view of the array with a different dtype. + +""" +from ._array_like import _SupportsArray, ArrayLike +from ._shape import _Shape, _ShapeLike +from ._dtype_like import DtypeLike diff --git a/numpy/typing/_array_like.py b/numpy/typing/_array_like.py new file mode 100644 index 000000000..76c0c839c --- /dev/null +++ b/numpy/typing/_array_like.py @@ -0,0 +1,34 @@ +import sys +from typing import Any, overload, Sequence, TYPE_CHECKING, Union + +from numpy import ndarray +from ._dtype_like import DtypeLike + +if sys.version_info >= (3, 8): + from typing import Protocol + HAVE_PROTOCOL = True +else: + try: + from typing_extensions import Protocol + except ImportError: + HAVE_PROTOCOL = False + else: + HAVE_PROTOCOL = True + +if TYPE_CHECKING or HAVE_PROTOCOL: + class _SupportsArray(Protocol): + @overload + def __array__(self, __dtype: DtypeLike = ...) -> ndarray: ... + @overload + def __array__(self, dtype: DtypeLike = ...) -> ndarray: ... +else: + _SupportsArray = Any + +# TODO: support buffer protocols once +# +# https://bugs.python.org/issue27501 +# +# is resolved. See also the mypy issue: +# +# https://github.com/python/typing/issues/593 +ArrayLike = Union[bool, int, float, complex, _SupportsArray, Sequence] diff --git a/numpy/typing.pyi b/numpy/typing/_dtype_like.py index f5705192a..b9df0af04 100644 --- a/numpy/typing.pyi +++ b/numpy/typing/_dtype_like.py @@ -1,17 +1,7 @@ -import sys -from typing import Any, Dict, List, overload, Sequence, Text, Tuple, Union +from typing import Any, Dict, List, Sequence, Tuple, Union -from numpy import dtype, ndarray - -if sys.version_info >= (3, 8): - from typing import Protocol -else: - from typing_extensions import Protocol - -_Shape = Tuple[int, ...] - -# Anything that can be coerced to a shape tuple -_ShapeLike = Union[int, Sequence[int]] +from numpy import dtype +from ._shape import _ShapeLike _DtypeLikeNested = Any # TODO: wait for support for recursive types @@ -45,7 +35,7 @@ DtypeLike = Union[ Sequence[str], # names Sequence[_DtypeLikeNested], # formats Sequence[int], # offsets - Sequence[Union[bytes, Text, None]], # titles + Sequence[Union[bytes, str, None]], # titles int, # itemsize ], ], @@ -54,11 +44,3 @@ DtypeLike = Union[ # (base_dtype, new_dtype) Tuple[_DtypeLikeNested, _DtypeLikeNested], ] - -class _SupportsArray(Protocol): - @overload - def __array__(self, __dtype: DtypeLike = ...) -> ndarray: ... - @overload - def __array__(self, dtype: DtypeLike = ...) -> ndarray: ... - -ArrayLike = Union[bool, int, float, complex, _SupportsArray, Sequence] diff --git a/numpy/typing/_shape.py b/numpy/typing/_shape.py new file mode 100644 index 000000000..4629046ea --- /dev/null +++ b/numpy/typing/_shape.py @@ -0,0 +1,6 @@ +from typing import Sequence, Tuple, Union + +_Shape = Tuple[int, ...] + +# Anything that can be coerced to a shape tuple +_ShapeLike = Union[int, Sequence[int]] |