diff options
Diffstat (limited to 'numpy/core')
44 files changed, 1534 insertions, 952 deletions
diff --git a/numpy/core/_add_newdocs.py b/numpy/core/_add_newdocs.py index ebe5f6b54..bc034c3e9 100644 --- a/numpy/core/_add_newdocs.py +++ b/numpy/core/_add_newdocs.py @@ -1072,6 +1072,43 @@ add_newdoc('numpy.core.multiarray', 'fromstring', """) +add_newdoc('numpy.core.multiarray', 'compare_chararrays', + """ + compare_chararrays(a, b, cmp_op, rstrip) + + Performs element-wise comparison of two string arrays using the + comparison operator specified by `cmp_op`. + + Parameters + ---------- + a, b : array_like + Arrays to be compared. + cmp_op : {"<", "<=", "==", ">=", ">", "!="} + Type of comparison. + rstrip : Boolean + If True, the spaces at the end of Strings are removed before the comparison. + + Returns + ------- + out : ndarray + The output array of type Boolean with the same shape as a and b. + + Raises + ------ + ValueError + If `cmp_op` is not valid. + TypeError + If at least one of `a` or `b` is a non-string array + + Examples + -------- + >>> a = np.array(["a", "b", "cde"]) + >>> b = np.array(["a", "a", "dec"]) + >>> np.compare_chararrays(a, b, ">", True) + array([False, True, False]) + + """) + add_newdoc('numpy.core.multiarray', 'fromiter', """ fromiter(iterable, dtype, count=-1) @@ -1320,6 +1357,12 @@ add_newdoc('numpy.core.multiarray', 'set_numeric_ops', Set numerical operators for array objects. + .. deprecated:: 1.16 + + For the general case, use :c:func:`PyUFunc_ReplaceLoopBySignature`. + For ndarray subclasses, define the ``__array_ufunc__`` method and + override the relevant ufunc. + Parameters ---------- op1, op2, ... : callable @@ -1597,7 +1640,7 @@ add_newdoc('numpy.core.multiarray', 'c_einsum', """ c_einsum(subscripts, *operands, out=None, dtype=None, order='K', casting='safe') - + *This documentation shadows that of the native python implementation of the `einsum` function, except all references and examples related to the `optimize` argument (v 0.12.0) have been removed.* @@ -2113,7 +2156,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('ctypes', ----- Below are the public attributes of this object which were documented in "Guide to NumPy" (we have omitted undocumented public attributes, - as well as documented private attributes): + as well as documented private attributes): .. autoattribute:: numpy.core._internal._ctypes.data @@ -2455,7 +2498,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('size', Notes ----- - `a.size` returns a standard arbitrary precision Python integer. This + `a.size` returns a standard arbitrary precision Python integer. This may not be the case with other methods of obtaining the same value (like the suggested ``np.prod(a.shape)``, which returns an instance of ``np.int_``), and may be relevant if the value is used further in @@ -4090,7 +4133,7 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('tofile', machines with different endianness. Some of these problems can be overcome by outputting the data as text files, at the expense of speed and file size. - + When fid is a file object, array contents are directly written to the file, bypassing the file object's ``write`` method. As a result, tofile cannot be used with files objects supporting compression (e.g., GzipFile) @@ -4546,184 +4589,6 @@ add_newdoc('numpy.core.umath', 'seterrobj', # ############################################################################## -add_newdoc('numpy.core.multiarray', 'bincount', - """ - bincount(x, weights=None, minlength=0) - - Count number of occurrences of each value in array of non-negative ints. - - The number of bins (of size 1) is one larger than the largest value in - `x`. If `minlength` is specified, there will be at least this number - of bins in the output array (though it will be longer if necessary, - depending on the contents of `x`). - Each bin gives the number of occurrences of its index value in `x`. - If `weights` is specified the input array is weighted by it, i.e. if a - value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead - of ``out[n] += 1``. - - Parameters - ---------- - x : array_like, 1 dimension, nonnegative ints - Input array. - weights : array_like, optional - Weights, array of the same shape as `x`. - minlength : int, optional - A minimum number of bins for the output array. - - .. versionadded:: 1.6.0 - - Returns - ------- - out : ndarray of ints - The result of binning the input array. - The length of `out` is equal to ``np.amax(x)+1``. - - Raises - ------ - ValueError - If the input is not 1-dimensional, or contains elements with negative - values, or if `minlength` is negative. - TypeError - If the type of the input is float or complex. - - See Also - -------- - histogram, digitize, unique - - Examples - -------- - >>> np.bincount(np.arange(5)) - array([1, 1, 1, 1, 1]) - >>> np.bincount(np.array([0, 1, 1, 3, 2, 1, 7])) - array([1, 3, 1, 1, 0, 0, 0, 1]) - - >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23]) - >>> np.bincount(x).size == np.amax(x)+1 - True - - The input array needs to be of integer dtype, otherwise a - TypeError is raised: - - >>> np.bincount(np.arange(5, dtype=float)) - Traceback (most recent call last): - File "<stdin>", line 1, in <module> - TypeError: array cannot be safely cast to required type - - A possible use of ``bincount`` is to perform sums over - variable-size chunks of an array, using the ``weights`` keyword. - - >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights - >>> x = np.array([0, 1, 1, 2, 2, 2]) - >>> np.bincount(x, weights=w) - array([ 0.3, 0.7, 1.1]) - - """) - -add_newdoc('numpy.core.multiarray', 'ravel_multi_index', - """ - ravel_multi_index(multi_index, dims, mode='raise', order='C') - - Converts a tuple of index arrays into an array of flat - indices, applying boundary modes to the multi-index. - - Parameters - ---------- - multi_index : tuple of array_like - A tuple of integer arrays, one array for each dimension. - dims : tuple of ints - The shape of array into which the indices from ``multi_index`` apply. - mode : {'raise', 'wrap', 'clip'}, optional - Specifies how out-of-bounds indices are handled. Can specify - either one mode or a tuple of modes, one mode per index. - - * 'raise' -- raise an error (default) - * 'wrap' -- wrap around - * 'clip' -- clip to the range - - In 'clip' mode, a negative index which would normally - wrap will clip to 0 instead. - order : {'C', 'F'}, optional - Determines whether the multi-index should be viewed as - indexing in row-major (C-style) or column-major - (Fortran-style) order. - - Returns - ------- - raveled_indices : ndarray - An array of indices into the flattened version of an array - of dimensions ``dims``. - - See Also - -------- - unravel_index - - Notes - ----- - .. versionadded:: 1.6.0 - - Examples - -------- - >>> arr = np.array([[3,6,6],[4,5,1]]) - >>> np.ravel_multi_index(arr, (7,6)) - array([22, 41, 37]) - >>> np.ravel_multi_index(arr, (7,6), order='F') - array([31, 41, 13]) - >>> np.ravel_multi_index(arr, (4,6), mode='clip') - array([22, 23, 19]) - >>> np.ravel_multi_index(arr, (4,4), mode=('clip','wrap')) - array([12, 13, 13]) - - >>> np.ravel_multi_index((3,1,4,1), (6,7,8,9)) - 1621 - """) - -add_newdoc('numpy.core.multiarray', 'unravel_index', - """ - unravel_index(indices, shape, order='C') - - Converts a flat index or array of flat indices into a tuple - of coordinate arrays. - - Parameters - ---------- - indices : array_like - An integer array whose elements are indices into the flattened - version of an array of dimensions ``shape``. Before version 1.6.0, - this function accepted just one index value. - shape : tuple of ints - The shape of the array to use for unraveling ``indices``. - - .. versionchanged:: 1.16.0 - Renamed from ``dims`` to ``shape``. - - order : {'C', 'F'}, optional - Determines whether the indices should be viewed as indexing in - row-major (C-style) or column-major (Fortran-style) order. - - .. versionadded:: 1.6.0 - - Returns - ------- - unraveled_coords : tuple of ndarray - Each array in the tuple has the same shape as the ``indices`` - array. - - See Also - -------- - ravel_multi_index - - Examples - -------- - >>> np.unravel_index([22, 41, 37], (7,6)) - (array([3, 6, 6]), array([4, 5, 1])) - >>> np.unravel_index([31, 41, 13], (7,6), order='F') - (array([3, 6, 6]), array([4, 5, 1])) - - >>> np.unravel_index(1621, (6,7,8,9)) - (3, 1, 4, 1) - - """) - add_newdoc('numpy.core.multiarray', 'add_docstring', """ add_docstring(obj, docstring) @@ -5200,7 +5065,7 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', to None - otherwise it defaults to ufunc.identity. If ``None`` is given, the first element of the reduction is used, and an error is thrown if the reduction is empty. - + .. versionadded:: 1.15.0 Returns @@ -5233,18 +5098,18 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', >>> np.add.reduce(X, 2) array([[ 1, 5], [ 9, 13]]) - + You can use the ``initial`` keyword argument to initialize the reduction with a different value. - + >>> np.add.reduce([10], initial=5) 15 >>> np.add.reduce(np.ones((2, 2, 2)), axis=(0, 2), initializer=10) array([14., 14.]) - + Allows reductions of empty arrays where they would normally fail, i.e. for ufuncs without an identity. - + >>> np.minimum.reduce([], initial=np.inf) inf >>> np.minimum.reduce([]) diff --git a/numpy/core/_dtype_ctypes.py b/numpy/core/_dtype_ctypes.py index f10b4e99f..ca365d2cb 100644 --- a/numpy/core/_dtype_ctypes.py +++ b/numpy/core/_dtype_ctypes.py @@ -33,17 +33,49 @@ def _from_ctypes_array(t): def _from_ctypes_structure(t): - # TODO: gh-10533, gh-10532 - fields = [] for item in t._fields_: if len(item) > 2: raise TypeError( "ctypes bitfields have no dtype equivalent") - fname, ftyp = item - fields.append((fname, dtype_from_ctypes_type(ftyp))) - # by default, ctypes structs are aligned - return np.dtype(fields, align=True) + if hasattr(t, "_pack_"): + formats = [] + offsets = [] + names = [] + current_offset = 0 + for fname, ftyp in t._fields_: + names.append(fname) + formats.append(dtype_from_ctypes_type(ftyp)) + # Each type has a default offset, this is platform dependent for some types. + effective_pack = min(t._pack_, ctypes.alignment(ftyp)) + current_offset = ((current_offset + effective_pack - 1) // effective_pack) * effective_pack + offsets.append(current_offset) + current_offset += ctypes.sizeof(ftyp) + + return np.dtype(dict( + formats=formats, + offsets=offsets, + names=names, + itemsize=ctypes.sizeof(t))) + else: + fields = [] + for fname, ftyp in t._fields_: + fields.append((fname, dtype_from_ctypes_type(ftyp))) + + # by default, ctypes structs are aligned + return np.dtype(fields, align=True) + + +def dtype_from_ctypes_scalar(t): + """ + Return the dtype type with endianness included if it's the case + """ + if t.__ctype_be__ is t: + return np.dtype('>' + t._type_) + elif t.__ctype_le__ is t: + return np.dtype('<' + t._type_) + else: + return np.dtype(t._type_) def dtype_from_ctypes_type(t): @@ -62,7 +94,7 @@ def dtype_from_ctypes_type(t): "conversion from ctypes.Union types like {} to dtype" .format(t.__name__)) elif isinstance(t._type_, str): - return np.dtype(t._type_) + return dtype_from_ctypes_scalar(t) else: raise NotImplementedError( "Unknown ctypes type {}".format(t.__name__)) diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index 30069f0ca..59da60253 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -10,6 +10,7 @@ import re import sys from numpy.compat import unicode +from numpy.core.overrides import set_module from .multiarray import dtype, array, ndarray try: import ctypes @@ -482,6 +483,12 @@ _pep3118_standard_map = { } _pep3118_standard_typechars = ''.join(_pep3118_standard_map.keys()) +_pep3118_unsupported_map = { + 'u': 'UCS-2 strings', + '&': 'pointers', + 't': 'bitfields', + 'X': 'function pointers', +} class _Stream(object): def __init__(self, s): @@ -593,6 +600,11 @@ def __dtype_from_pep3118(stream, is_subdtype): stream.byteorder, stream.byteorder) value = dtype(numpy_byteorder + dtypechar) align = value.alignment + elif stream.next in _pep3118_unsupported_map: + desc = _pep3118_unsupported_map[stream.next] + raise NotImplementedError( + "Unrepresentable PEP 3118 data type {!r} ({})" + .format(stream.next, desc)) else: raise ValueError("Unknown PEP 3118 data type specifier %r" % stream.s) @@ -718,9 +730,11 @@ def _lcm(a, b): return a // _gcd(a, b) * b # Exception used in shares_memory() +@set_module('numpy') class TooHardError(RuntimeError): pass +@set_module('numpy') class AxisError(ValueError, IndexError): """ Axis supplied was invalid. """ def __init__(self, axis, ndim=None, msg_prefix=None): diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index ccc1468c4..075d75340 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -48,7 +48,7 @@ from .fromnumeric import ravel, any from .numeric import concatenate, asarray, errstate from .numerictypes import (longlong, intc, int_, float_, complex_, bool_, flexible) -from .overrides import array_function_dispatch +from .overrides import array_function_dispatch, set_module import warnings import contextlib @@ -89,6 +89,8 @@ def _make_options_dict(precision=None, threshold=None, edgeitems=None, return options + +@set_module('numpy') def set_printoptions(precision=None, threshold=None, edgeitems=None, linewidth=None, suppress=None, nanstr=None, infstr=None, formatter=None, sign=None, floatmode=None, **kwarg): @@ -250,6 +252,7 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None, set_legacy_print_mode(0) +@set_module('numpy') def get_printoptions(): """ Return the current print options. @@ -279,6 +282,7 @@ def get_printoptions(): return _format_options.copy() +@set_module('numpy') @contextlib.contextmanager def printoptions(*args, **kwargs): """Context manager for setting print options. @@ -976,6 +980,8 @@ class LongFloatFormat(FloatingFormat): DeprecationWarning, stacklevel=2) super(LongFloatFormat, self).__init__(*args, **kwargs) + +@set_module('numpy') def format_float_scientific(x, precision=None, unique=True, trim='k', sign=False, pad_left=None, exp_digits=None): """ @@ -1043,6 +1049,8 @@ def format_float_scientific(x, precision=None, unique=True, trim='k', trim=trim, sign=sign, pad_left=pad_left, exp_digits=exp_digits) + +@set_module('numpy') def format_float_positional(x, precision=None, unique=True, fractional=True, trim='k', sign=False, pad_left=None, pad_right=None): @@ -1547,10 +1555,12 @@ def array_str(a, max_line_width=None, precision=None, suppress_small=None): a, max_line_width, precision, suppress_small) +# needed if __array_function__ is disabled +_array2string_impl = getattr(array2string, '__wrapped__', array2string) _default_array_str = functools.partial(_array_str_implementation, - array2string=array2string.__wrapped__) + array2string=_array2string_impl) _default_array_repr = functools.partial(_array_repr_implementation, - array2string=array2string.__wrapped__) + array2string=_array2string_impl) def set_string_function(f, repr=True): diff --git a/numpy/core/code_generators/cversions.txt b/numpy/core/code_generators/cversions.txt index c8b998bfc..73cd8ddd6 100644 --- a/numpy/core/code_generators/cversions.txt +++ b/numpy/core/code_generators/cversions.txt @@ -39,9 +39,11 @@ 0x0000000b = edb1ba83730c650fd9bc5772a919cda7 # Version 12 (NumPy 1.14) Added PyArray_ResolveWritebackIfCopy, -# Version 12 (NumPy 1.15) No change. # PyArray_SetWritebackIfCopyBase and deprecated PyArray_SetUpdateIfCopyBase. +# Version 12 (NumPy 1.15) No change. 0x0000000c = a1bc756c5782853ec2e3616cf66869d8 -# Version 13 (Numpy 1.16) Added fields core_dim_flags and core_dim_sizes to PyUFuncObject +# Version 13 (NumPy 1.16) Deprecate PyArray_SetNumericOps and +# PyArray_GetNumericOps, Added fields core_dim_flags and core_dim_sizes +# to PyUFuncObject 0x0000000d = a1bc756c5782853ec2e3616cf66869d8 diff --git a/numpy/core/code_generators/generate_numpy_api.py b/numpy/core/code_generators/generate_numpy_api.py index 7f2541667..a883ee469 100644 --- a/numpy/core/code_generators/generate_numpy_api.py +++ b/numpy/core/code_generators/generate_numpy_api.py @@ -50,7 +50,6 @@ _import_array(void) PyObject *c_api = NULL; if (numpy == NULL) { - PyErr_SetString(PyExc_ImportError, "numpy.core._multiarray_umath failed to import"); return -1; } c_api = PyObject_GetAttrString(numpy, "_ARRAY_API"); diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 199ad831b..9d4e72c0e 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -10,11 +10,14 @@ sys.path.insert(0, os.path.dirname(__file__)) import ufunc_docstrings as docstrings sys.path.pop(0) -Zero = "PyUFunc_Zero" -One = "PyUFunc_One" -None_ = "PyUFunc_None" -AllOnes = "PyUFunc_MinusOne" -ReorderableNone = "PyUFunc_ReorderableNone" +Zero = "PyInt_FromLong(0)" +One = "PyInt_FromLong(1)" +True_ = "(Py_INCREF(Py_True), Py_True)" +False_ = "(Py_INCREF(Py_False), Py_False)" +None_ = object() +AllOnes = "PyInt_FromLong(-1)" +MinusInfinity = 'PyFloat_FromDouble(-NPY_INFINITY)' +ReorderableNone = "(Py_INCREF(Py_None), Py_None)" # Sentinel value to specify using the full type description in the # function name @@ -458,7 +461,7 @@ defdict = { [TypeDescription('O', FullTypeDescr, 'OO', 'O')], ), 'logical_and': - Ufunc(2, 1, One, + Ufunc(2, 1, True_, docstrings.get('numpy.core.umath.logical_and'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), @@ -472,14 +475,14 @@ defdict = { TD(O, f='npy_ObjectLogicalNot'), ), 'logical_or': - Ufunc(2, 1, Zero, + Ufunc(2, 1, False_, docstrings.get('numpy.core.umath.logical_or'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?', simd=[('avx2', ints)]), TD(O, f='npy_ObjectLogicalOr'), ), 'logical_xor': - Ufunc(2, 1, Zero, + Ufunc(2, 1, False_, docstrings.get('numpy.core.umath.logical_xor'), 'PyUFunc_SimpleBinaryComparisonTypeResolver', TD(nodatetime_or_obj, out='?'), @@ -514,7 +517,7 @@ defdict = { TD(O, f='npy_ObjectMin') ), 'logaddexp': - Ufunc(2, 1, None, + Ufunc(2, 1, MinusInfinity, docstrings.get('numpy.core.umath.logaddexp'), None, TD(flts, f="logaddexp", astype={'e':'f'}) @@ -1048,18 +1051,38 @@ def make_ufuncs(funcdict): # do not play well with \n docstring = '\\n\"\"'.join(docstring.split(r"\n")) fmt = textwrap.dedent("""\ - f = PyUFunc_FromFuncAndData( + identity = {identity_expr}; + if ({has_identity} && identity == NULL) {{ + return -1; + }} + f = PyUFunc_FromFuncAndDataAndSignatureAndIdentity( {name}_functions, {name}_data, {name}_signatures, {nloops}, {nin}, {nout}, {identity}, "{name}", - "{doc}", 0 + "{doc}", 0, NULL, identity ); + if ({has_identity}) {{ + Py_DECREF(identity); + }} if (f == NULL) {{ return -1; - }}""") - mlist.append(fmt.format( + }} + """) + args = dict( name=name, nloops=len(uf.type_descriptions), - nin=uf.nin, nout=uf.nout, identity=uf.identity, doc=docstring - )) + nin=uf.nin, nout=uf.nout, + has_identity='0' if uf.identity is None_ else '1', + identity='PyUFunc_IdentityValue', + identity_expr=uf.identity, + doc=docstring + ) + + # Only PyUFunc_None means don't reorder - we pass this using the old + # argument + if uf.identity is None_: + args['identity'] = 'PyUFunc_None' + args['identity_expr'] = 'NULL' + + mlist.append(fmt.format(**args)) if uf.typereso is not None: mlist.append( r"((PyUFuncObject *)f)->type_resolver = &%s;" % uf.typereso) @@ -1087,7 +1110,7 @@ def make_code(funcdict, filename): static int InitOperators(PyObject *dictionary) { - PyObject *f; + PyObject *f, *identity; %s %s diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py index d8a9ee6b4..fdf97ac00 100644 --- a/numpy/core/code_generators/numpy_api.py +++ b/numpy/core/code_generators/numpy_api.py @@ -402,6 +402,9 @@ ufunc_funcs_api = { # End 1.7 API 'PyUFunc_RegisterLoopForDescr': (41,), # End 1.8 API + 'PyUFunc_FromFuncAndDataAndSignatureAndIdentity': + (42,), + # End 1.16 API } # List of all the dicts which define the C API diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index e86086012..12ba3f02e 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -23,6 +23,7 @@ from .numerictypes import string_, unicode_, integer, object_, bool_, character from .numeric import ndarray, compare_chararrays from .numeric import array as narray from numpy.core.multiarray import _vec_string +from numpy.core.overrides import set_module from numpy.core import overrides from numpy.compat import asbytes, long import numpy @@ -1820,6 +1821,7 @@ def isdecimal(a): return _vec_string(a, bool_, 'isdecimal') +@set_module('numpy') class chararray(ndarray): """ chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0, diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py index 799b1418d..b3dd313cf 100644 --- a/numpy/core/function_base.py +++ b/numpy/core/function_base.py @@ -7,6 +7,7 @@ from . import numeric as _nx from .numeric import (result_type, NaN, shares_memory, MAY_SHARE_BOUNDS, TooHardError,asanyarray) from numpy.core.multiarray import add_docstring +from numpy.core.overrides import set_module __all__ = ['logspace', 'linspace', 'geomspace'] @@ -23,6 +24,7 @@ def _index_deprecate(i, stacklevel=2): return i +@set_module('numpy') def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None): """ Return evenly spaced numbers over a specified interval. @@ -154,6 +156,7 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None): return y.astype(dtype, copy=False) +@set_module('numpy') def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None): """ Return numbers spaced evenly on a log scale. @@ -238,6 +241,7 @@ def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None): return _nx.power(base, y).astype(dtype) +@set_module('numpy') def geomspace(start, stop, num=50, endpoint=True, dtype=None): """ Return numbers spaced evenly on a log scale (a geometric progression). diff --git a/numpy/core/getlimits.py b/numpy/core/getlimits.py index 389f16ff5..544b8b35f 100644 --- a/numpy/core/getlimits.py +++ b/numpy/core/getlimits.py @@ -8,6 +8,7 @@ __all__ = ['finfo', 'iinfo'] import warnings from .machar import MachAr +from .overrides import set_module from . import numeric from . import numerictypes as ntypes from .numeric import array, inf @@ -289,6 +290,7 @@ def _discovered_machar(ftype): params['title']) +@set_module('numpy') class finfo(object): """ finfo(dtype) @@ -439,6 +441,7 @@ class finfo(object): " max=%(_str_max)s, dtype=%(dtype)s)") % d) +@set_module('numpy') class iinfo(object): """ iinfo(type) diff --git a/numpy/core/include/numpy/npy_3kcompat.h b/numpy/core/include/numpy/npy_3kcompat.h index a3c69f44e..832bc0599 100644 --- a/numpy/core/include/numpy/npy_3kcompat.h +++ b/numpy/core/include/numpy/npy_3kcompat.h @@ -219,6 +219,7 @@ npy_PyFile_Dup2(PyObject *file, char *mode, npy_off_t *orig_pos) if (handle == NULL) { PyErr_SetString(PyExc_IOError, "Getting a FILE* from a Python file object failed"); + return NULL; } /* Record the original raw file handle position */ @@ -383,6 +384,36 @@ npy_PyFile_CloseFile(PyObject *file) } +/* This is a copy of _PyErr_ChainExceptions + */ +static NPY_INLINE void +npy_PyErr_ChainExceptions(PyObject *exc, PyObject *val, PyObject *tb) +{ + if (exc == NULL) + return; + + if (PyErr_Occurred()) { + /* only py3 supports this anyway */ + #ifdef NPY_PY3K + PyObject *exc2, *val2, *tb2; + PyErr_Fetch(&exc2, &val2, &tb2); + PyErr_NormalizeException(&exc, &val, &tb); + if (tb != NULL) { + PyException_SetTraceback(val, tb); + Py_DECREF(tb); + } + Py_DECREF(exc); + PyErr_NormalizeException(&exc2, &val2, &tb2); + PyException_SetContext(val2, val); + PyErr_Restore(exc2, val2, tb2); + #endif + } + else { + PyErr_Restore(exc, val, tb); + } +} + + /* This is a copy of _PyErr_ChainExceptions, with: * - a minimal implementation for python 2 * - __cause__ used instead of __context__ diff --git a/numpy/core/include/numpy/ufuncobject.h b/numpy/core/include/numpy/ufuncobject.h index 85f8a6c08..90d837a9b 100644 --- a/numpy/core/include/numpy/ufuncobject.h +++ b/numpy/core/include/numpy/ufuncobject.h @@ -223,7 +223,8 @@ typedef struct _tagPyUFuncObject { */ npy_uint32 *core_dim_flags; - + /* Identity for reduction, when identity == PyUFunc_IdentityValue */ + PyObject *identity_value; } PyUFuncObject; @@ -299,6 +300,12 @@ typedef struct _tagPyUFuncObject { * This case allows reduction with multiple axes at once. */ #define PyUFunc_ReorderableNone -2 +/* + * UFunc unit is in identity_value, and the order of operations can be reordered + * This case allows reduction with multiple axes at once. + */ +#define PyUFunc_IdentityValue -3 + #define UFUNC_REDUCE 0 #define UFUNC_ACCUMULATE 1 diff --git a/numpy/core/machar.py b/numpy/core/machar.py index 7578544fe..91fb4eda8 100644 --- a/numpy/core/machar.py +++ b/numpy/core/machar.py @@ -11,9 +11,11 @@ __all__ = ['MachAr'] from numpy.core.fromnumeric import any from numpy.core.numeric import errstate +from numpy.core.overrides import set_module # Need to speed this up...especially for longfloat +@set_module('numpy') class MachAr(object): """ Diagnosing machine parameters. diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py index f5cc68bb9..82bc4707c 100644 --- a/numpy/core/memmap.py +++ b/numpy/core/memmap.py @@ -5,6 +5,7 @@ from .numeric import uint8, ndarray, dtype from numpy.compat import ( long, basestring, os_fspath, contextlib_nullcontext, is_pathlib_path ) +from numpy.core.overrides import set_module __all__ = ['memmap'] @@ -19,6 +20,8 @@ mode_equivalents = { "write":"w+" } + +@set_module('numpy') class memmap(ndarray): """Create a memory-map to an array stored in a *binary* file on disk. diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index 25debd2f8..78963b0aa 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -7,6 +7,7 @@ by importing from the extension module. """ import functools +import warnings from . import overrides from . import _multiarray_umath @@ -39,6 +40,26 @@ __all__ = [ 'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot', 'where', 'zeros'] + +arange.__module__ = 'numpy' +array.__module__ = 'numpy' +datetime_data.__module__ = 'numpy' +empty.__module__ = 'numpy' +frombuffer.__module__ = 'numpy' +fromfile.__module__ = 'numpy' +fromiter.__module__ = 'numpy' +frompyfunc.__module__ = 'numpy' +fromstring.__module__ = 'numpy' +geterrobj.__module__ = 'numpy' +matmul.__module__ = 'numpy' +may_share_memory.__module__ = 'numpy' +nested_iters.__module__ = 'numpy' +promote_types.__module__ = 'numpy' +set_numeric_ops.__module__ = 'numpy' +seterrobj.__module__ = 'numpy' +zeros.__module__ = 'numpy' + + array_function_dispatch = functools.partial( overrides.array_function_dispatch, module='numpy') @@ -832,6 +853,474 @@ def vdot(a, b): return _multiarray_umath.vdot(a, b) +def _bincount_dispatcher(x, weights=None, minlength=None): + return (x, weights) + + +@array_function_dispatch(_bincount_dispatcher) +def bincount(x, weights=None, minlength=0): + """ + Count number of occurrences of each value in array of non-negative ints. + + The number of bins (of size 1) is one larger than the largest value in + `x`. If `minlength` is specified, there will be at least this number + of bins in the output array (though it will be longer if necessary, + depending on the contents of `x`). + Each bin gives the number of occurrences of its index value in `x`. + If `weights` is specified the input array is weighted by it, i.e. if a + value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead + of ``out[n] += 1``. + + Parameters + ---------- + x : array_like, 1 dimension, nonnegative ints + Input array. + weights : array_like, optional + Weights, array of the same shape as `x`. + minlength : int, optional + A minimum number of bins for the output array. + + .. versionadded:: 1.6.0 + + Returns + ------- + out : ndarray of ints + The result of binning the input array. + The length of `out` is equal to ``np.amax(x)+1``. + + Raises + ------ + ValueError + If the input is not 1-dimensional, or contains elements with negative + values, or if `minlength` is negative. + TypeError + If the type of the input is float or complex. + + See Also + -------- + histogram, digitize, unique + + Examples + -------- + >>> np.bincount(np.arange(5)) + array([1, 1, 1, 1, 1]) + >>> np.bincount(np.array([0, 1, 1, 3, 2, 1, 7])) + array([1, 3, 1, 1, 0, 0, 0, 1]) + + >>> x = np.array([0, 1, 1, 3, 2, 1, 7, 23]) + >>> np.bincount(x).size == np.amax(x)+1 + True + + The input array needs to be of integer dtype, otherwise a + TypeError is raised: + + >>> np.bincount(np.arange(5, dtype=float)) + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + TypeError: array cannot be safely cast to required type + + A possible use of ``bincount`` is to perform sums over + variable-size chunks of an array, using the ``weights`` keyword. + + >>> w = np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6]) # weights + >>> x = np.array([0, 1, 1, 2, 2, 2]) + >>> np.bincount(x, weights=w) + array([ 0.3, 0.7, 1.1]) + + """ + return _multiarray_umath.bincount(x, weights=weights, minlength=minlength) + + +def _ravel_multi_index_dispatcher(multi_index, dims, mode=None, order=None): + return multi_index + + +@array_function_dispatch(_ravel_multi_index_dispatcher) +def ravel_multi_index(multi_index, dims, mode='raise', order='C'): + """ + Converts a tuple of index arrays into an array of flat + indices, applying boundary modes to the multi-index. + + Parameters + ---------- + multi_index : tuple of array_like + A tuple of integer arrays, one array for each dimension. + dims : tuple of ints + The shape of array into which the indices from ``multi_index`` apply. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices are handled. Can specify + either one mode or a tuple of modes, one mode per index. + + * 'raise' -- raise an error (default) + * 'wrap' -- wrap around + * 'clip' -- clip to the range + + In 'clip' mode, a negative index which would normally + wrap will clip to 0 instead. + order : {'C', 'F'}, optional + Determines whether the multi-index should be viewed as + indexing in row-major (C-style) or column-major + (Fortran-style) order. + + Returns + ------- + raveled_indices : ndarray + An array of indices into the flattened version of an array + of dimensions ``dims``. + + See Also + -------- + unravel_index + + Notes + ----- + .. versionadded:: 1.6.0 + + Examples + -------- + >>> arr = np.array([[3,6,6],[4,5,1]]) + >>> np.ravel_multi_index(arr, (7,6)) + array([22, 41, 37]) + >>> np.ravel_multi_index(arr, (7,6), order='F') + array([31, 41, 13]) + >>> np.ravel_multi_index(arr, (4,6), mode='clip') + array([22, 23, 19]) + >>> np.ravel_multi_index(arr, (4,4), mode=('clip','wrap')) + array([12, 13, 13]) + + >>> np.ravel_multi_index((3,1,4,1), (6,7,8,9)) + 1621 + """ + return _multiarray_umath.ravel_multi_index( + multi_index, dims, mode=mode, order=order) + + +def _deprecate_dims(shape, dims): + if dims is not None: + warnings.warn("'shape' argument should be used instead of 'dims'", + DeprecationWarning, stacklevel=3) + shape = dims + return shape + + +def _unravel_index_dispatcher(indices, shape=None, order=None, dims=None): + shape = _deprecate_dims(shape, dims) + return (indices,) + + +@array_function_dispatch(_unravel_index_dispatcher) +def unravel_index(indices, shape=None, order='C', dims=None): + """ + Converts a flat index or array of flat indices into a tuple + of coordinate arrays. + + Parameters + ---------- + indices : array_like + An integer array whose elements are indices into the flattened + version of an array of dimensions ``shape``. Before version 1.6.0, + this function accepted just one index value. + shape : tuple of ints + The shape of the array to use for unraveling ``indices``. + + .. versionchanged:: 1.16.0 + Renamed from ``dims`` to ``shape``. + + order : {'C', 'F'}, optional + Determines whether the indices should be viewed as indexing in + row-major (C-style) or column-major (Fortran-style) order. + + .. versionadded:: 1.6.0 + + Returns + ------- + unraveled_coords : tuple of ndarray + Each array in the tuple has the same shape as the ``indices`` + array. + + See Also + -------- + ravel_multi_index + + Examples + -------- + >>> np.unravel_index([22, 41, 37], (7,6)) + (array([3, 6, 6]), array([4, 5, 1])) + >>> np.unravel_index([31, 41, 13], (7,6), order='F') + (array([3, 6, 6]), array([4, 5, 1])) + + >>> np.unravel_index(1621, (6,7,8,9)) + (3, 1, 4, 1) + + """ + shape = _deprecate_dims(shape, dims) + return _multiarray_umath.unravel_index(indices, shape, order=order) + + +def _copyto_dispatcher(dst, src, casting=None, where=None): + return (dst, src, where) + + +@array_function_dispatch(_copyto_dispatcher) +def copyto(dst, src, casting='same_kind', where=True): + """ + Copies values from one array to another, broadcasting as necessary. + + Raises a TypeError if the `casting` rule is violated, and if + `where` is provided, it selects which elements to copy. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + dst : ndarray + The array into which values are copied. + src : array_like + The array from which values are copied. + casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional + Controls what kind of data casting may occur when copying. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + where : array_like of bool, optional + A boolean array which is broadcasted to match the dimensions + of `dst`, and selects elements to copy from `src` to `dst` + wherever it contains the value True. + """ + return _multiarray_umath.copyto(dst, src, casting=casting, where=where) + + +def _putmask_dispatcher(a, mask, values): + return (a, mask, values) + + +@array_function_dispatch(_putmask_dispatcher) +def putmask(a, mask, values): + """ + Changes elements of an array based on conditional and input values. + + Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``. + + If `values` is not the same size as `a` and `mask` then it will repeat. + This gives behavior different from ``a[mask] = values``. + + Parameters + ---------- + a : array_like + Target array. + mask : array_like + Boolean mask array. It has to be the same shape as `a`. + values : array_like + Values to put into `a` where `mask` is True. If `values` is smaller + than `a` it will be repeated. + + See Also + -------- + place, put, take, copyto + + Examples + -------- + >>> x = np.arange(6).reshape(2, 3) + >>> np.putmask(x, x>2, x**2) + >>> x + array([[ 0, 1, 2], + [ 9, 16, 25]]) + + If `values` is smaller than `a` it is repeated: + + >>> x = np.arange(5) + >>> np.putmask(x, x>1, [-33, -44]) + >>> x + array([ 0, 1, -33, -44, -33]) + + """ + return _multiarray_umath.putmask(a, mask, values) + + +def _packbits_and_unpackbits_dispatcher(myarray, axis=None): + return (myarray,) + + +@array_function_dispatch(_packbits_and_unpackbits_dispatcher) +def packbits(myarray, axis=None): + """ + Packs the elements of a binary-valued array into bits in a uint8 array. + + The result is padded to full bytes by inserting zero bits at the end. + + Parameters + ---------- + myarray : array_like + An array of integers or booleans whose elements should be packed to + bits. + axis : int, optional + The dimension over which bit-packing is done. + ``None`` implies packing the flattened array. + + Returns + ------- + packed : ndarray + Array of type uint8 whose elements represent bits corresponding to the + logical (0 or nonzero) value of the input elements. The shape of + `packed` has the same number of dimensions as the input (unless `axis` + is None, in which case the output is 1-D). + + See Also + -------- + unpackbits: Unpacks elements of a uint8 array into a binary-valued output + array. + + Examples + -------- + >>> a = np.array([[[1,0,1], + ... [0,1,0]], + ... [[1,1,0], + ... [0,0,1]]]) + >>> b = np.packbits(a, axis=-1) + >>> b + array([[[160],[64]],[[192],[32]]], dtype=uint8) + + Note that in binary 160 = 1010 0000, 64 = 0100 0000, 192 = 1100 0000, + and 32 = 0010 0000. + + """ + return _multiarray_umath.packbits(myarray, axis) + + +@array_function_dispatch(_packbits_and_unpackbits_dispatcher) +def unpackbits(myarray, axis=None): + """ + Unpacks elements of a uint8 array into a binary-valued output array. + + Each element of `myarray` represents a bit-field that should be unpacked + into a binary-valued output array. The shape of the output array is either + 1-D (if `axis` is None) or the same shape as the input array with unpacking + done along the axis specified. + + Parameters + ---------- + myarray : ndarray, uint8 type + Input array. + axis : int, optional + The dimension over which bit-unpacking is done. + ``None`` implies unpacking the flattened array. + + Returns + ------- + unpacked : ndarray, uint8 type + The elements are binary-valued (0 or 1). + + See Also + -------- + packbits : Packs the elements of a binary-valued array into bits in a uint8 + array. + + Examples + -------- + >>> a = np.array([[2], [7], [23]], dtype=np.uint8) + >>> a + array([[ 2], + [ 7], + [23]], dtype=uint8) + >>> b = np.unpackbits(a, axis=1) + >>> b + array([[0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1], + [0, 0, 0, 1, 0, 1, 1, 1]], dtype=uint8) + + """ + return _multiarray_umath.unpackbits(myarray, axis) + + +def _shares_memory_dispatcher(a, b, max_work=None): + return (a, b) + + +@array_function_dispatch(_shares_memory_dispatcher) +def shares_memory(a, b, max_work=None): + """ + Determine if two arrays share memory + + Parameters + ---------- + a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem (maximum number + of candidate solutions to consider). The following special + values are recognized: + + max_work=MAY_SHARE_EXACT (default) + The problem is solved exactly. In this case, the function returns + True only if there is an element shared between the arrays. + max_work=MAY_SHARE_BOUNDS + Only the memory bounds of a and b are checked. + + Raises + ------ + numpy.TooHardError + Exceeded max_work. + + Returns + ------- + out : bool + + See Also + -------- + may_share_memory + + Examples + -------- + >>> np.may_share_memory(np.array([1,2]), np.array([5,8,9])) + False + + """ + return _multiarray_umath.shares_memory(a, b, max_work=max_work) + + +@array_function_dispatch(_shares_memory_dispatcher) +def may_share_memory(a, b, max_work=None): + """ + Determine if two arrays might share memory + + A return of True does not necessarily mean that the two arrays + share any element. It just means that they *might*. + + Only the memory bounds of a and b are checked by default. + + Parameters + ---------- + a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem. See + `shares_memory` for details. Default for ``may_share_memory`` + is to do a bounds check. + + Returns + ------- + out : bool + + See Also + -------- + shares_memory + + Examples + -------- + >>> np.may_share_memory(np.array([1,2]), np.array([5,8,9])) + False + >>> x = np.zeros([3, 4]) + >>> np.may_share_memory(x[:,0], x[:,1]) + True + + """ + return _multiarray_umath.may_share_memory(a, b, max_work=max_work) + + def _is_busday_dispatcher( dates, weekmask=None, holidays=None, busdaycal=None, out=None): return (dates, weekmask, holidays, out) @@ -1156,3 +1645,4 @@ def datetime_as_string(arr, unit=None, timezone='naive', casting='same_kind'): datetime with units 'm' according to the rule 'safe' """ return _multiarray_umath.datetime_as_string(arr, unit, timezone, casting) + diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 265c3636f..aa5be1af3 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -30,6 +30,7 @@ if sys.version_info[0] < 3: from . import overrides from . import umath +from .overrides import set_module from .umath import (multiply, invert, sin, UFUNC_BUFSIZE_DEFAULT, ERR_IGNORE, ERR_WARN, ERR_RAISE, ERR_CALL, ERR_PRINT, ERR_LOG, ERR_DEFAULT, PINF, NAN) @@ -92,6 +93,7 @@ if sys.version_info[0] < 3: __all__.extend(['getbuffer', 'newbuffer']) +@set_module('numpy') class ComplexWarning(RuntimeWarning): """ The warning raised when casting a complex dtype to a real dtype. @@ -170,6 +172,7 @@ def zeros_like(a, dtype=None, order='K', subok=True): return res +@set_module('numpy') def ones(shape, dtype=None, order='C'): """ Return a new array of given shape and type, filled with ones. @@ -287,6 +290,7 @@ def ones_like(a, dtype=None, order='K', subok=True): return res +@set_module('numpy') def full(shape, fill_value, dtype=None, order='C'): """ Return a new array of given shape and type, filled with `fill_value`. @@ -462,6 +466,7 @@ def count_nonzero(a, axis=None): return a_bool.sum(axis=axis, dtype=np.intp) +@set_module('numpy') def asarray(a, dtype=None, order=None): """Convert the input to an array. @@ -533,6 +538,7 @@ def asarray(a, dtype=None, order=None): return array(a, dtype, copy=False, order=order) +@set_module('numpy') def asanyarray(a, dtype=None, order=None): """Convert the input to an ndarray, but pass ndarray subclasses through. @@ -585,6 +591,7 @@ def asanyarray(a, dtype=None, order=None): return array(a, dtype, copy=False, order=order, subok=True) +@set_module('numpy') def ascontiguousarray(a, dtype=None): """ Return a contiguous array (ndim >= 1) in memory (C order). @@ -625,6 +632,7 @@ def ascontiguousarray(a, dtype=None): return array(a, dtype, copy=False, order='C', ndmin=1) +@set_module('numpy') def asfortranarray(a, dtype=None): """ Return an array (ndim >= 1) laid out in Fortran order in memory. @@ -665,6 +673,7 @@ def asfortranarray(a, dtype=None): return array(a, dtype, copy=False, order='F', ndmin=1) +@set_module('numpy') def require(a, dtype=None, requirements=None): """ Return an ndarray of the provided type that satisfies requirements. @@ -763,6 +772,7 @@ def require(a, dtype=None, requirements=None): return arr +@set_module('numpy') def isfortran(a): """ Returns True if the array is Fortran contiguous but *not* C contiguous. @@ -1889,6 +1899,7 @@ def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None): little_endian = (sys.byteorder == 'little') +@set_module('numpy') def indices(dimensions, dtype=int): """ Return an array representing the indices of a grid. @@ -1960,6 +1971,7 @@ def indices(dimensions, dtype=int): return res +@set_module('numpy') def fromfunction(function, shape, **kwargs): """ Construct an array by executing a function over each coordinate. @@ -2020,6 +2032,7 @@ def _frombuffer(buf, dtype, shape, order): return frombuffer(buf, dtype=dtype).reshape(shape, order=order) +@set_module('numpy') def isscalar(num): """ Returns True if the type of `num` is a scalar type. @@ -2096,6 +2109,7 @@ def isscalar(num): or isinstance(num, numbers.Number)) +@set_module('numpy') def binary_repr(num, width=None): """ Return the binary representation of the input number as a string. @@ -2206,6 +2220,7 @@ def binary_repr(num, width=None): return '1' * (outwidth - binwidth) + binary +@set_module('numpy') def base_repr(number, base=2, padding=0): """ Return a string representation of a number in the given base system. @@ -2300,6 +2315,7 @@ def _maketup(descr, val): return tuple(res) +@set_module('numpy') def identity(n, dtype=None): """ Return the identity array. @@ -2640,6 +2656,7 @@ for key in _errdict.keys(): del key +@set_module('numpy') def seterr(all=None, divide=None, over=None, under=None, invalid=None): """ Set how floating-point errors are handled. @@ -2741,6 +2758,7 @@ def seterr(all=None, divide=None, over=None, under=None, invalid=None): return old +@set_module('numpy') def geterr(): """ Get the current way of handling floating-point errors. @@ -2792,6 +2810,7 @@ def geterr(): return res +@set_module('numpy') def setbufsize(size): """ Set the size of the buffer used in ufuncs. @@ -2816,6 +2835,7 @@ def setbufsize(size): return old +@set_module('numpy') def getbufsize(): """ Return the size of the buffer used in ufuncs. @@ -2829,6 +2849,7 @@ def getbufsize(): return umath.geterrobj()[0] +@set_module('numpy') def seterrcall(func): """ Set the floating-point error callback function or log object. @@ -2921,6 +2942,7 @@ def seterrcall(func): return old +@set_module('numpy') def geterrcall(): """ Return the current callback function used on floating-point errors. @@ -2973,6 +2995,7 @@ class _unspecified(object): _Unspecified = _unspecified() +@set_module('numpy') class errstate(object): """ errstate(**kwargs) diff --git a/numpy/core/numerictypes.py b/numpy/core/numerictypes.py index 2fb841f7c..f00f92286 100644 --- a/numpy/core/numerictypes.py +++ b/numpy/core/numerictypes.py @@ -92,6 +92,7 @@ from numpy.core.multiarray import ( datetime_as_string, busday_offset, busday_count, is_busday, busdaycalendar ) +from numpy.core.overrides import set_module # we add more at the bottom __all__ = ['sctypeDict', 'sctypeNA', 'typeDict', 'typeNA', 'sctypes', @@ -187,6 +188,8 @@ def maximum_sctype(t): else: return t + +@set_module('numpy') def issctype(rep): """ Determines whether the given object represents a scalar data-type. @@ -231,6 +234,8 @@ def issctype(rep): except Exception: return False + +@set_module('numpy') def obj2sctype(rep, default=None): """ Return the scalar dtype or NumPy equivalent of Python type of an object. @@ -285,6 +290,7 @@ def obj2sctype(rep, default=None): return res.type +@set_module('numpy') def issubclass_(arg1, arg2): """ Determine if a class is a subclass of a second class. @@ -323,6 +329,8 @@ def issubclass_(arg1, arg2): except TypeError: return False + +@set_module('numpy') def issubsctype(arg1, arg2): """ Determine if the first argument is a subclass of the second argument. @@ -353,6 +361,8 @@ def issubsctype(arg1, arg2): """ return issubclass(obj2sctype(arg1), obj2sctype(arg2)) + +@set_module('numpy') def issubdtype(arg1, arg2): """ Returns True if first argument is a typecode lower/equal in type hierarchy. @@ -446,6 +456,8 @@ def _construct_lookups(): _construct_lookups() + +@set_module('numpy') def sctype2char(sctype): """ Return the string representation of a scalar dtype. @@ -586,6 +598,8 @@ def _register_types(): _register_types() + +@set_module('numpy') def find_common_type(array_types, scalar_types): """ Determine common type following standard coercion rules. diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py index 088c15e65..1cc1ff8d8 100644 --- a/numpy/core/overrides.py +++ b/numpy/core/overrides.py @@ -4,6 +4,7 @@ TODO: rewrite this in C for performance. """ import collections import functools +import os from numpy.core._multiarray_umath import ndarray from numpy.compat._inspect import getargspec @@ -12,6 +13,9 @@ from numpy.compat._inspect import getargspec _NDARRAY_ARRAY_FUNCTION = ndarray.__array_function__ _NDARRAY_ONLY = [ndarray] +ENABLE_ARRAY_FUNCTION = bool( + int(os.environ.get('NUMPY_EXPERIMENTAL_ARRAY_FUNCTION', 0))) + def get_overloaded_types_and_args(relevant_args): """Returns a list of arguments on which to call __array_function__. @@ -146,12 +150,57 @@ def verify_matching_signatures(implementation, dispatcher): 'default argument values') +def set_module(module): + """Decorator for overriding __module__ on a function or class. + + Example usage:: + + @set_module('numpy') + def example(): + pass + + assert example.__module__ == 'numpy' + """ + def decorator(func): + if module is not None: + func.__module__ = module + return func + return decorator + + def array_function_dispatch(dispatcher, module=None, verify=True): - """Decorator for adding dispatch with the __array_function__ protocol.""" + """Decorator for adding dispatch with the __array_function__ protocol. + + See NEP-18 for example usage. + + Parameters + ---------- + dispatcher : callable + Function that when called like ``dispatcher(*args, **kwargs)`` with + arguments from the NumPy function call returns an iterable of + array-like arguments to check for ``__array_function__``. + module : str, optional + __module__ attribute to set on new function, e.g., ``module='numpy'``. + By default, module is copied from the decorated function. + verify : bool, optional + If True, verify the that the signature of the dispatcher and decorated + function signatures match exactly: all required and optional arguments + should appear in order with the same names, but the default values for + all optional arguments should be ``None``. Only disable verification + if the dispatcher's signature needs to deviate for some particular + reason, e.g., because the function has a signature like + ``func(*args, **kwargs)``. + + Returns + ------- + Function suitable for decorating the implementation of a NumPy function. + """ + + if not ENABLE_ARRAY_FUNCTION: + # __array_function__ requires an explicit opt-in for now + return set_module(module) + def decorator(implementation): - # TODO: only do this check when the appropriate flag is enabled or for - # a dev install. We want this check for testing but don't want to - # slow down all numpy imports. if verify: verify_matching_signatures(implementation, dispatcher) diff --git a/numpy/core/records.py b/numpy/core/records.py index 1b596e4de..6fc282500 100644 --- a/numpy/core/records.py +++ b/numpy/core/records.py @@ -43,6 +43,7 @@ import warnings from . import numeric as sb from . import numerictypes as nt from numpy.compat import isfileobj, bytes, long, unicode, os_fspath +from numpy.core.overrides import set_module from .arrayprint import get_printoptions # All of the functions allow formats to be a dtype @@ -82,6 +83,8 @@ def find_duplicate(list): dup.append(list[i]) return dup + +@set_module('numpy') class format_parser(object): """ Class to convert formats, names, titles description to a dtype. diff --git a/numpy/core/setup.py b/numpy/core/setup.py index a4429cee2..efcacfb8e 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -677,7 +677,7 @@ def configuration(parent_package='',top_path=None): join('src', 'npymath', 'npy_math_complex.c.src'), join('src', 'npymath', 'halffloat.c') ] - + # Must be true for CRT compilers but not MinGW/cygwin. See gh-9977. is_msvc = platform.system() == 'Windows' config.add_installed_library('npymath', @@ -697,7 +697,8 @@ def configuration(parent_package='',top_path=None): ####################################################################### # This library is created for the build but it is not installed - npysort_sources = [join('src', 'npysort', 'quicksort.c.src'), + npysort_sources = [join('src', 'common', 'npy_sort.h.src'), + join('src', 'npysort', 'quicksort.c.src'), join('src', 'npysort', 'mergesort.c.src'), join('src', 'npysort', 'heapsort.c.src'), join('src', 'common', 'npy_partition.h.src'), @@ -903,14 +904,15 @@ def configuration(parent_package='',top_path=None): join('include', 'numpy', 'npy_math.h'), join('include', 'numpy', 'halffloat.h'), join('src', 'multiarray', 'common.h'), + join('src', 'multiarray', 'number.h'), join('src', 'common', 'templ_common.h.src'), join('src', 'umath', 'simd.inc.src'), join('src', 'umath', 'override.h'), join(codegen_dir, 'generate_ufunc_api.py'), - ] + ] config.add_extension('_multiarray_umath', - sources=multiarray_src + umath_src + + sources=multiarray_src + umath_src + npymath_sources + common_src + [generate_config_h, generate_numpyconfig_h, @@ -920,7 +922,7 @@ def configuration(parent_package='',top_path=None): generate_umath_c, generate_ufunc_api, ], - depends=deps + multiarray_deps + umath_deps + + depends=deps + multiarray_deps + umath_deps + common_deps, libraries=['npymath', 'npysort'], extra_info=extra_info) diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py index 3edf0824e..6d234e527 100644 --- a/numpy/core/shape_base.py +++ b/numpy/core/shape_base.py @@ -217,6 +217,11 @@ def _arrays_for_stack_dispatcher(arrays, stacklevel=4): return arrays +def _warn_for_nonsequence(arrays): + if not overrides.ENABLE_ARRAY_FUNCTION: + _arrays_for_stack_dispatcher(arrays, stacklevel=4) + + def _vhstack_dispatcher(tup): return _arrays_for_stack_dispatcher(tup) @@ -274,6 +279,7 @@ def vstack(tup): [4]]) """ + _warn_for_nonsequence(tup) return _nx.concatenate([atleast_2d(_m) for _m in tup], 0) @@ -325,6 +331,7 @@ def hstack(tup): [3, 4]]) """ + _warn_for_nonsequence(tup) arrs = [atleast_1d(_m) for _m in tup] # As a special case, dimension 0 of 1-dimensional arrays is "horizontal" if arrs and arrs[0].ndim == 1: @@ -398,6 +405,7 @@ def stack(arrays, axis=0, out=None): [3, 4]]) """ + _warn_for_nonsequence(arrays) arrays = [asanyarray(arr) for arr in arrays] if not arrays: raise ValueError('need at least one array to stack') diff --git a/numpy/core/src/common/npy_sort.h b/numpy/core/src/common/npy_sort.h deleted file mode 100644 index 8c6f05623..000000000 --- a/numpy/core/src/common/npy_sort.h +++ /dev/null @@ -1,204 +0,0 @@ -#ifndef __NPY_SORT_H__ -#define __NPY_SORT_H__ - -/* Python include is for future object sorts */ -#include <Python.h> -#include <numpy/npy_common.h> -#include <numpy/ndarraytypes.h> - -#define NPY_ENOMEM 1 -#define NPY_ECOMP 2 - -static NPY_INLINE int npy_get_msb(npy_uintp unum) -{ - int depth_limit = 0; - while (unum >>= 1) { - depth_limit++; - } - return depth_limit; -} - -int quicksort_bool(void *vec, npy_intp cnt, void *null); -int heapsort_bool(void *vec, npy_intp cnt, void *null); -int mergesort_bool(void *vec, npy_intp cnt, void *null); -int aquicksort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_bool(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_byte(void *vec, npy_intp cnt, void *null); -int heapsort_byte(void *vec, npy_intp cnt, void *null); -int mergesort_byte(void *vec, npy_intp cnt, void *null); -int aquicksort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_byte(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_ubyte(void *vec, npy_intp cnt, void *null); -int heapsort_ubyte(void *vec, npy_intp cnt, void *null); -int mergesort_ubyte(void *vec, npy_intp cnt, void *null); -int aquicksort_ubyte(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_ubyte(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_ubyte(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_short(void *vec, npy_intp cnt, void *null); -int heapsort_short(void *vec, npy_intp cnt, void *null); -int mergesort_short(void *vec, npy_intp cnt, void *null); -int aquicksort_short(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_short(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_short(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_ushort(void *vec, npy_intp cnt, void *null); -int heapsort_ushort(void *vec, npy_intp cnt, void *null); -int mergesort_ushort(void *vec, npy_intp cnt, void *null); -int aquicksort_ushort(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_ushort(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_ushort(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_int(void *vec, npy_intp cnt, void *null); -int heapsort_int(void *vec, npy_intp cnt, void *null); -int mergesort_int(void *vec, npy_intp cnt, void *null); -int aquicksort_int(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_int(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_int(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_uint(void *vec, npy_intp cnt, void *null); -int heapsort_uint(void *vec, npy_intp cnt, void *null); -int mergesort_uint(void *vec, npy_intp cnt, void *null); -int aquicksort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_uint(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_long(void *vec, npy_intp cnt, void *null); -int heapsort_long(void *vec, npy_intp cnt, void *null); -int mergesort_long(void *vec, npy_intp cnt, void *null); -int aquicksort_long(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_long(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_long(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_ulong(void *vec, npy_intp cnt, void *null); -int heapsort_ulong(void *vec, npy_intp cnt, void *null); -int mergesort_ulong(void *vec, npy_intp cnt, void *null); -int aquicksort_ulong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_ulong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_ulong(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_longlong(void *vec, npy_intp cnt, void *null); -int heapsort_longlong(void *vec, npy_intp cnt, void *null); -int mergesort_longlong(void *vec, npy_intp cnt, void *null); -int aquicksort_longlong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_longlong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_longlong(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_ulonglong(void *vec, npy_intp cnt, void *null); -int heapsort_ulonglong(void *vec, npy_intp cnt, void *null); -int mergesort_ulonglong(void *vec, npy_intp cnt, void *null); -int aquicksort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_ulonglong(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_half(void *vec, npy_intp cnt, void *null); -int heapsort_half(void *vec, npy_intp cnt, void *null); -int mergesort_half(void *vec, npy_intp cnt, void *null); -int aquicksort_half(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_half(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_half(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_float(void *vec, npy_intp cnt, void *null); -int heapsort_float(void *vec, npy_intp cnt, void *null); -int mergesort_float(void *vec, npy_intp cnt, void *null); -int aquicksort_float(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_float(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_float(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_double(void *vec, npy_intp cnt, void *null); -int heapsort_double(void *vec, npy_intp cnt, void *null); -int mergesort_double(void *vec, npy_intp cnt, void *null); -int aquicksort_double(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_double(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_double(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_longdouble(void *vec, npy_intp cnt, void *null); -int heapsort_longdouble(void *vec, npy_intp cnt, void *null); -int mergesort_longdouble(void *vec, npy_intp cnt, void *null); -int aquicksort_longdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_longdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_longdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_cfloat(void *vec, npy_intp cnt, void *null); -int heapsort_cfloat(void *vec, npy_intp cnt, void *null); -int mergesort_cfloat(void *vec, npy_intp cnt, void *null); -int aquicksort_cfloat(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_cfloat(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_cfloat(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_cdouble(void *vec, npy_intp cnt, void *null); -int heapsort_cdouble(void *vec, npy_intp cnt, void *null); -int mergesort_cdouble(void *vec, npy_intp cnt, void *null); -int aquicksort_cdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_cdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_cdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_clongdouble(void *vec, npy_intp cnt, void *null); -int heapsort_clongdouble(void *vec, npy_intp cnt, void *null); -int mergesort_clongdouble(void *vec, npy_intp cnt, void *null); -int aquicksort_clongdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_clongdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_clongdouble(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_string(void *vec, npy_intp cnt, void *arr); -int heapsort_string(void *vec, npy_intp cnt, void *arr); -int mergesort_string(void *vec, npy_intp cnt, void *arr); -int aquicksort_string(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int aheapsort_string(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int amergesort_string(void *vec, npy_intp *ind, npy_intp cnt, void *arr); - - -int quicksort_unicode(void *vec, npy_intp cnt, void *arr); -int heapsort_unicode(void *vec, npy_intp cnt, void *arr); -int mergesort_unicode(void *vec, npy_intp cnt, void *arr); -int aquicksort_unicode(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int aheapsort_unicode(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int amergesort_unicode(void *vec, npy_intp *ind, npy_intp cnt, void *arr); - - -int quicksort_datetime(void *vec, npy_intp cnt, void *null); -int heapsort_datetime(void *vec, npy_intp cnt, void *null); -int mergesort_datetime(void *vec, npy_intp cnt, void *null); -int aquicksort_datetime(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_datetime(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_datetime(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int quicksort_timedelta(void *vec, npy_intp cnt, void *null); -int heapsort_timedelta(void *vec, npy_intp cnt, void *null); -int mergesort_timedelta(void *vec, npy_intp cnt, void *null); -int aquicksort_timedelta(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int aheapsort_timedelta(void *vec, npy_intp *ind, npy_intp cnt, void *null); -int amergesort_timedelta(void *vec, npy_intp *ind, npy_intp cnt, void *null); - - -int npy_quicksort(void *vec, npy_intp cnt, void *arr); -int npy_heapsort(void *vec, npy_intp cnt, void *arr); -int npy_mergesort(void *vec, npy_intp cnt, void *arr); -int npy_aquicksort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int npy_aheapsort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); -int npy_amergesort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); - -#endif diff --git a/numpy/core/src/common/npy_sort.h.src b/numpy/core/src/common/npy_sort.h.src new file mode 100644 index 000000000..c31a82764 --- /dev/null +++ b/numpy/core/src/common/npy_sort.h.src @@ -0,0 +1,83 @@ +#ifndef __NPY_SORT_H__ +#define __NPY_SORT_H__ + +/* Python include is for future object sorts */ +#include <Python.h> +#include <numpy/npy_common.h> +#include <numpy/ndarraytypes.h> + +#define NPY_ENOMEM 1 +#define NPY_ECOMP 2 + +static NPY_INLINE int npy_get_msb(npy_uintp unum) +{ + int depth_limit = 0; + while (unum >>= 1) { + depth_limit++; + } + return depth_limit; +} + + +/* + ***************************************************************************** + ** NUMERIC SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, + * longlong, ulonglong, half, float, double, longdouble, + * cfloat, cdouble, clongdouble, datetime, timedelta# + */ + +int quicksort_@suff@(void *vec, npy_intp cnt, void *null); +int heapsort_@suff@(void *vec, npy_intp cnt, void *null); +int mergesort_@suff@(void *vec, npy_intp cnt, void *null); +int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); +int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); +int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *null); + +/**end repeat**/ + + + +/* + ***************************************************************************** + ** STRING SORTS ** + ***************************************************************************** + */ + + +/**begin repeat + * + * #suff = string, unicode# + */ + +int quicksort_@suff@(void *vec, npy_intp cnt, void *arr); +int heapsort_@suff@(void *vec, npy_intp cnt, void *arr); +int mergesort_@suff@(void *vec, npy_intp cnt, void *arr); +int aquicksort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int aheapsort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int amergesort_@suff@(void *vec, npy_intp *ind, npy_intp cnt, void *arr); + +/**end repeat**/ + + +/* + ***************************************************************************** + ** GENERIC SORT ** + ***************************************************************************** + */ + + +int npy_quicksort(void *vec, npy_intp cnt, void *arr); +int npy_heapsort(void *vec, npy_intp cnt, void *arr); +int npy_mergesort(void *vec, npy_intp cnt, void *arr); +int npy_aquicksort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int npy_aheapsort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); +int npy_amergesort(void *vec, npy_intp *ind, npy_intp cnt, void *arr); + +#endif diff --git a/numpy/core/src/common/ufunc_override.c b/numpy/core/src/common/ufunc_override.c index 33b54c665..b67422132 100644 --- a/numpy/core/src/common/ufunc_override.c +++ b/numpy/core/src/common/ufunc_override.c @@ -1,10 +1,9 @@ #define NPY_NO_DEPRECATED_API NPY_API_VERSION -#define NO_IMPORT_ARRAY +#define _MULTIARRAYMODULE #include "npy_pycompat.h" #include "get_attr_string.h" #include "npy_import.h" - #include "ufunc_override.h" /* @@ -12,45 +11,39 @@ * is not the default, i.e., the object is not an ndarray, and its * __array_ufunc__ is not the same as that of ndarray. * - * Returns a new reference, the value of type(obj).__array_ufunc__ - * - * If the __array_ufunc__ matches that of ndarray, or does not exist, return - * NULL. - * - * Note that since this module is used with both multiarray and umath, we do - * not have access to PyArray_Type and therewith neither to PyArray_CheckExact - * nor to the default __array_ufunc__ method, so instead we import locally. - * TODO: Can this really not be done more smartly? + * Returns a new reference, the value of type(obj).__array_ufunc__ if it + * exists and is different from that of ndarray, and NULL otherwise. */ NPY_NO_EXPORT PyObject * -get_non_default_array_ufunc(PyObject *obj) +PyUFuncOverride_GetNonDefaultArrayUfunc(PyObject *obj) { - static PyObject *ndarray = NULL; static PyObject *ndarray_array_ufunc = NULL; PyObject *cls_array_ufunc; - /* on first entry, import and cache ndarray and its __array_ufunc__ */ - if (ndarray == NULL) { - npy_cache_import("numpy.core.multiarray", "ndarray", &ndarray); - ndarray_array_ufunc = PyObject_GetAttrString(ndarray, + /* On first entry, cache ndarray's __array_ufunc__ */ + if (ndarray_array_ufunc == NULL) { + ndarray_array_ufunc = PyObject_GetAttrString((PyObject *)&PyArray_Type, "__array_ufunc__"); } /* Fast return for ndarray */ - if ((PyObject *)Py_TYPE(obj) == ndarray) { + if (PyArray_CheckExact(obj)) { return NULL; } - /* does the class define __array_ufunc__? */ + /* + * Does the class define __array_ufunc__? (Note that LookupSpecial has fast + * return for basic python types, so no need to worry about those here) + */ cls_array_ufunc = PyArray_LookupSpecial(obj, "__array_ufunc__"); if (cls_array_ufunc == NULL) { return NULL; } - /* is it different from ndarray.__array_ufunc__? */ - if (cls_array_ufunc != ndarray_array_ufunc) { - return cls_array_ufunc; + /* Ignore if the same as ndarray.__array_ufunc__ */ + if (cls_array_ufunc == ndarray_array_ufunc) { + Py_DECREF(cls_array_ufunc); + return NULL; } - Py_DECREF(cls_array_ufunc); - return NULL; + return cls_array_ufunc; } /* @@ -62,9 +55,9 @@ get_non_default_array_ufunc(PyObject *obj) */ NPY_NO_EXPORT int -has_non_default_array_ufunc(PyObject * obj) +PyUFunc_HasOverride(PyObject * obj) { - PyObject *method = get_non_default_array_ufunc(obj); + PyObject *method = PyUFuncOverride_GetNonDefaultArrayUfunc(obj); if (method) { Py_DECREF(method); return 1; @@ -80,17 +73,17 @@ has_non_default_array_ufunc(PyObject * obj) * The out argument itself is returned in out_kwd_obj, and the outputs * in the out_obj array (all as borrowed references). * - * Returns -1 if kwds is not a dict, 0 if no outputs found. + * Returns 0 if no outputs found, -1 if kwds is not a dict (with an error set). */ -static int -get_out_objects(PyObject *kwds, PyObject **out_kwd_obj, PyObject ***out_objs) +NPY_NO_EXPORT int +PyUFuncOverride_GetOutObjects(PyObject *kwds, PyObject **out_kwd_obj, PyObject ***out_objs) { if (kwds == NULL) { return 0; } if (!PyDict_CheckExact(kwds)) { PyErr_SetString(PyExc_TypeError, - "Internal Numpy error: call to PyUFunc_WithOverride " + "Internal Numpy error: call to PyUFuncOverride_GetOutObjects " "with non-dict kwds"); return -1; } @@ -108,134 +101,3 @@ get_out_objects(PyObject *kwds, PyObject **out_kwd_obj, PyObject ***out_objs) return 1; } } - -/* - * For each positional argument and each argument in a possible "out" - * keyword, look for overrides of the standard ufunc behaviour, i.e., - * non-default __array_ufunc__ methods. - * - * Returns the number of overrides, setting corresponding objects - * in PyObject array ``with_override`` and the corresponding - * __array_ufunc__ methods in ``methods`` (both using new references). - * - * Only the first override for a given class is returned. - * - * returns -1 on failure. - */ -NPY_NO_EXPORT int -PyUFunc_WithOverride(PyObject *args, PyObject *kwds, - PyObject **with_override, PyObject **methods) -{ - int i; - int num_override_args = 0; - int narg, nout = 0; - PyObject *out_kwd_obj; - PyObject **arg_objs, **out_objs; - - narg = PyTuple_Size(args); - if (narg < 0) { - return -1; - } - arg_objs = PySequence_Fast_ITEMS(args); - - nout = get_out_objects(kwds, &out_kwd_obj, &out_objs); - if (nout < 0) { - return -1; - } - - for (i = 0; i < narg + nout; ++i) { - PyObject *obj; - int j; - int new_class = 1; - - if (i < narg) { - obj = arg_objs[i]; - } - else { - obj = out_objs[i - narg]; - } - /* - * Have we seen this class before? If so, ignore. - */ - for (j = 0; j < num_override_args; j++) { - new_class = (Py_TYPE(obj) != Py_TYPE(with_override[j])); - if (!new_class) { - break; - } - } - if (new_class) { - /* - * Now see if the object provides an __array_ufunc__. However, we should - * ignore the base ndarray.__ufunc__, so we skip any ndarray as well as - * any ndarray subclass instances that did not override __array_ufunc__. - */ - PyObject *method = get_non_default_array_ufunc(obj); - if (method == NULL) { - continue; - } - if (method == Py_None) { - PyErr_Format(PyExc_TypeError, - "operand '%.200s' does not support ufuncs " - "(__array_ufunc__=None)", - obj->ob_type->tp_name); - Py_DECREF(method); - goto fail; - } - Py_INCREF(obj); - with_override[num_override_args] = obj; - methods[num_override_args] = method; - ++num_override_args; - } - } - return num_override_args; - -fail: - for (i = 0; i < num_override_args; i++) { - Py_DECREF(with_override[i]); - Py_DECREF(methods[i]); - } - return -1; -} - -/* - * Check whether any of a set of input and output args have a non-default - * __array_ufunc__ method. Return 1 if so, 0 if not. - * - * This function primarily exists to help ndarray.__array_ufunc__ determine - * whether it can support a ufunc (which is the case only if none of the - * operands have an override). Thus, unlike in PyUFunc_CheckOverride, the - * actual overrides are not needed and one can stop looking once one is found. - * - * TODO: move this function and has_non_default_array_ufunc closer to ndarray. - */ -NPY_NO_EXPORT int -PyUFunc_HasOverride(PyObject *args, PyObject *kwds) -{ - int i; - int nin, nout; - PyObject *out_kwd_obj; - PyObject **in_objs, **out_objs; - - /* check inputs */ - nin = PyTuple_Size(args); - if (nin < 0) { - return -1; - } - in_objs = PySequence_Fast_ITEMS(args); - for (i = 0; i < nin; ++i) { - if (has_non_default_array_ufunc(in_objs[i])) { - return 1; - } - } - /* check outputs, if any */ - nout = get_out_objects(kwds, &out_kwd_obj, &out_objs); - if (nout < 0) { - return -1; - } - for (i = 0; i < nout; i++) { - if (has_non_default_array_ufunc(out_objs[i])) { - return 1; - } - } - return 0; -} diff --git a/numpy/core/src/common/ufunc_override.h b/numpy/core/src/common/ufunc_override.h index 5b269d270..cc39166b3 100644 --- a/numpy/core/src/common/ufunc_override.h +++ b/numpy/core/src/common/ufunc_override.h @@ -8,18 +8,11 @@ * is not the default, i.e., the object is not an ndarray, and its * __array_ufunc__ is not the same as that of ndarray. * - * Returns a new reference, the value of type(obj).__array_ufunc__ - * - * If the __array_ufunc__ matches that of ndarray, or does not exist, return - * NULL. - * - * Note that since this module is used with both multiarray and umath, we do - * not have access to PyArray_Type and therewith neither to PyArray_CheckExact - * nor to the default __array_ufunc__ method, so instead we import locally. - * TODO: Can this really not be done more smartly? + * Returns a new reference, the value of type(obj).__array_ufunc__ if it + * exists and is different from that of ndarray, and NULL otherwise. */ NPY_NO_EXPORT PyObject * -get_non_default_array_ufunc(PyObject *obj); +PyUFuncOverride_GetNonDefaultArrayUfunc(PyObject *obj); /* * Check whether an object has __array_ufunc__ defined on its class and it @@ -29,18 +22,16 @@ get_non_default_array_ufunc(PyObject *obj); * Returns 1 if this is the case, 0 if not. */ NPY_NO_EXPORT int -has_non_default_array_ufunc(PyObject * obj); +PyUFunc_HasOverride(PyObject *obj); /* - * Check whether a set of input and output args have a non-default - * `__array_ufunc__` method. Returns the number of overrides, setting - * corresponding objects in PyObject array with_override (if not NULL). - * returns -1 on failure. + * Get possible out argument from kwds, and returns the number of outputs + * contained within it: if a tuple, the number of elements in it, 1 otherwise. + * The out argument itself is returned in out_kwd_obj, and the outputs + * in the out_obj array (all as borrowed references). + * + * Returns 0 if no outputs found, -1 if kwds is not a dict (with an error set). */ NPY_NO_EXPORT int -PyUFunc_WithOverride(PyObject *args, PyObject *kwds, - PyObject **with_override, PyObject **methods); - -NPY_NO_EXPORT int -PyUFunc_HasOverride(PyObject *args, PyObject *kwds); +PyUFuncOverride_GetOutObjects(PyObject *kwds, PyObject **out_kwd_obj, PyObject ***out_objs); #endif diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 6c4d49bd1..b98c3afb3 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -1855,6 +1855,16 @@ printf_float_g(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) return PrintFloat_Printf_g(obj, precision); } +static PyObject * +getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args)) +{ + PyObject * ops = PyArray_GetNumericOps(); + if (ops == NULL) { + return NULL; + } + return PyLong_FromLong(PyArray_SetNumericOps(ops)); +} + static PyMethodDef Multiarray_TestsMethods[] = { {"IsPythonScalar", IsPythonScalar, @@ -1963,6 +1973,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"get_fpu_mode", get_fpu_mode, METH_VARARGS, get_fpu_mode_doc}, + {"getset_numericops", + getset_numericops, + METH_NOARGS, NULL}, /**begin repeat * #name = cabs, carg# */ diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index e8380e3bc..17d8baf7b 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1158,26 +1158,6 @@ arr_unravel_index(PyObject *self, PyObject *args, PyObject *kwds) char *kwlist[] = {"indices", "shape", "order", NULL}; - /* Continue to support the older "dims" argument in place - * of the "shape" argument. Issue an appropriate warning - * if "dims" is detected in keywords, then replace it with - * the new "shape" argument and continue processing as usual */ - - - if (kwds) { - PyObject *dims_item, *shape_item; - dims_item = PyDict_GetItemString(kwds, "dims"); - shape_item = PyDict_GetItemString(kwds, "shape"); - if (dims_item != NULL && shape_item == NULL) { - if (DEPRECATE("'shape' argument should be" - " used instead of 'dims'") < 0) { - return NULL; - } - PyDict_SetItemString(kwds, "shape", dims_item); - PyDict_DelItemString(kwds, "dims"); - } - } - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&|O&:unravel_index", kwlist, &indices0, diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 23b0bfd24..231bd86dc 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -988,8 +988,49 @@ array_getarray(PyArrayObject *self, PyObject *args) } } +/* + * Check whether any of a set of input and output args have a non-default + * __array_ufunc__ method. Return 1 if so, 0 if not, and -1 on error. + * + * This function primarily exists to help ndarray.__array_ufunc__ determine + * whether it can support a ufunc (which is the case only if none of the + * operands have an override). Thus, unlike in umath/override.c, the + * actual overrides are not needed and one can stop looking once one is found. + */ +static int +any_array_ufunc_overrides(PyObject *args, PyObject *kwds) +{ + int i; + int nin, nout; + PyObject *out_kwd_obj; + PyObject **in_objs, **out_objs; -static PyObject * + /* check inputs */ + nin = PyTuple_Size(args); + if (nin < 0) { + return -1; + } + in_objs = PySequence_Fast_ITEMS(args); + for (i = 0; i < nin; ++i) { + if (PyUFunc_HasOverride(in_objs[i])) { + return 1; + } + } + /* check outputs, if any */ + nout = PyUFuncOverride_GetOutObjects(kwds, &out_kwd_obj, &out_objs); + if (nout < 0) { + return -1; + } + for (i = 0; i < nout; i++) { + if (PyUFunc_HasOverride(out_objs[i])) { + return 1; + } + } + return 0; +} + + +NPY_NO_EXPORT PyObject * array_ufunc(PyArrayObject *self, PyObject *args, PyObject *kwds) { PyObject *ufunc, *method_name, *normal_args, *ufunc_method; @@ -1009,7 +1050,7 @@ array_ufunc(PyArrayObject *self, PyObject *args, PyObject *kwds) return NULL; } /* ndarray cannot handle overrides itself */ - has_override = PyUFunc_HasOverride(normal_args, kwds); + has_override = any_array_ufunc_overrides(normal_args, kwds); if (has_override < 0) { goto cleanup; } diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 8f782cff6..9e42c115c 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -2044,6 +2044,7 @@ static PyObject * array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) { PyObject *file = NULL, *ret; + PyObject *err_type = NULL, *err_value = NULL, *err_traceback = NULL; char *sep = ""; Py_ssize_t nin = -1; static char *kwlist[] = {"file", "dtype", "count", "sep", NULL}; @@ -2079,18 +2080,26 @@ array_fromfile(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *keywds) } ret = PyArray_FromFile(fp, type, (npy_intp) nin, sep); + /* If an exception is thrown in the call to PyArray_FromFile + * we need to clear it, and restore it later to ensure that + * we can cleanup the duplicated file descriptor properly. + */ + PyErr_Fetch(&err_type, &err_value, &err_traceback); if (npy_PyFile_DupClose2(file, fp, orig_pos) < 0) { + npy_PyErr_ChainExceptions(err_type, err_value, err_traceback); goto fail; } if (own && npy_PyFile_CloseFile(file) < 0) { + npy_PyErr_ChainExceptions(err_type, err_value, err_traceback); goto fail; } + PyErr_Restore(err_type, err_value, err_traceback); Py_DECREF(file); return ret; fail: Py_DECREF(file); - Py_DECREF(ret); + Py_XDECREF(ret); return NULL; } @@ -2990,7 +2999,7 @@ array_set_ops_function(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args), { PyObject *oldops = NULL; - if ((oldops = PyArray_GetNumericOps()) == NULL) { + if ((oldops = _PyArray_GetNumericOps()) == NULL) { return NULL; } /* @@ -3000,8 +3009,10 @@ array_set_ops_function(PyObject *NPY_UNUSED(self), PyObject *NPY_UNUSED(args), */ if (kwds && PyArray_SetNumericOps(kwds) == -1) { Py_DECREF(oldops); - PyErr_SetString(PyExc_ValueError, + if (PyErr_Occurred() == NULL) { + PyErr_SetString(PyExc_ValueError, "one or more objects not callable"); + } return NULL; } return oldops; diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index dabbae064..5ee536d4f 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -71,12 +71,8 @@ array_inplace_power(PyArrayObject *a1, PyObject *o2, PyObject *NPY_UNUSED(modulo n_ops.op = temp; \ } - -/*NUMPY_API - *Set internal structure with number functions that all arrays will use - */ NPY_NO_EXPORT int -PyArray_SetNumericOps(PyObject *dict) +_PyArray_SetNumericOps(PyObject *dict) { PyObject *temp = NULL; SET(add); @@ -119,16 +115,28 @@ PyArray_SetNumericOps(PyObject *dict) return 0; } +/*NUMPY_API + *Set internal structure with number functions that all arrays will use + */ +NPY_NO_EXPORT int +PyArray_SetNumericOps(PyObject *dict) +{ + /* 2018-09-09, 1.16 */ + if (DEPRECATE("PyArray_SetNumericOps is deprecated. Use " + "PyUFunc_ReplaceLoopBySignature to replace ufunc inner loop functions " + "instead.") < 0) { + return -1; + } + return _PyArray_SetNumericOps(dict); +} + /* Note - macro contains goto */ #define GET(op) if (n_ops.op && \ (PyDict_SetItemString(dict, #op, n_ops.op)==-1)) \ goto fail; -/*NUMPY_API - Get dictionary showing number functions that all arrays will use -*/ NPY_NO_EXPORT PyObject * -PyArray_GetNumericOps(void) +_PyArray_GetNumericOps(void) { PyObject *dict; if ((dict = PyDict_New())==NULL) @@ -176,6 +184,19 @@ PyArray_GetNumericOps(void) return NULL; } +/*NUMPY_API + Get dictionary showing number functions that all arrays will use +*/ +NPY_NO_EXPORT PyObject * +PyArray_GetNumericOps(void) +{ + /* 2018-09-09, 1.16 */ + if (DEPRECATE("PyArray_GetNumericOps is deprecated.") < 0) { + return NULL; + } + return _PyArray_GetNumericOps(); +} + static PyObject * _get_keywords(int rtype, PyArrayObject *out) { @@ -578,7 +599,7 @@ array_positive(PyArrayObject *m1) */ PyObject *exc, *val, *tb; PyErr_Fetch(&exc, &val, &tb); - if (has_non_default_array_ufunc((PyObject *)m1)) { + if (PyUFunc_HasOverride((PyObject *)m1)) { PyErr_Restore(exc, val, tb); return NULL; } diff --git a/numpy/core/src/multiarray/number.h b/numpy/core/src/multiarray/number.h index 99a2a722b..fbdfe6f94 100644 --- a/numpy/core/src/multiarray/number.h +++ b/numpy/core/src/multiarray/number.h @@ -48,10 +48,10 @@ NPY_NO_EXPORT PyObject * array_int(PyArrayObject *v); NPY_NO_EXPORT int -PyArray_SetNumericOps(PyObject *dict); +_PyArray_SetNumericOps(PyObject *dict); NPY_NO_EXPORT PyObject * -PyArray_GetNumericOps(void); +_PyArray_GetNumericOps(void); NPY_NO_EXPORT PyObject * PyArray_GenericBinaryFunction(PyArrayObject *m1, PyObject *m2, PyObject *op); diff --git a/numpy/core/src/multiarray/temp_elide.c b/numpy/core/src/multiarray/temp_elide.c index 3d2f976f2..09b948218 100644 --- a/numpy/core/src/multiarray/temp_elide.c +++ b/numpy/core/src/multiarray/temp_elide.c @@ -166,7 +166,7 @@ check_callers(int * cannot) return 0; } /* get multiarray base address */ - if (dladdr(&PyArray_SetNumericOps, &info)) { + if (dladdr(&PyArray_INCREF, &info)) { pos_ma_start = info.dli_fbase; pos_ma_end = info.dli_fbase; } diff --git a/numpy/core/src/umath/override.c b/numpy/core/src/umath/override.c index 4a381ba12..c56f43fa2 100644 --- a/numpy/core/src/umath/override.c +++ b/numpy/core/src/umath/override.c @@ -5,8 +5,96 @@ #include "numpy/ufuncobject.h" #include "npy_import.h" -#include "ufunc_override.h" #include "override.h" +#include "ufunc_override.h" + +/* + * For each positional argument and each argument in a possible "out" + * keyword, look for overrides of the standard ufunc behaviour, i.e., + * non-default __array_ufunc__ methods. + * + * Returns the number of overrides, setting corresponding objects + * in PyObject array ``with_override`` and the corresponding + * __array_ufunc__ methods in ``methods`` (both using new references). + * + * Only the first override for a given class is returned. + * + * Returns -1 on failure. + */ +static int +get_array_ufunc_overrides(PyObject *args, PyObject *kwds, + PyObject **with_override, PyObject **methods) +{ + int i; + int num_override_args = 0; + int narg, nout = 0; + PyObject *out_kwd_obj; + PyObject **arg_objs, **out_objs; + + narg = PyTuple_Size(args); + if (narg < 0) { + return -1; + } + arg_objs = PySequence_Fast_ITEMS(args); + + nout = PyUFuncOverride_GetOutObjects(kwds, &out_kwd_obj, &out_objs); + if (nout < 0) { + return -1; + } + + for (i = 0; i < narg + nout; ++i) { + PyObject *obj; + int j; + int new_class = 1; + + if (i < narg) { + obj = arg_objs[i]; + } + else { + obj = out_objs[i - narg]; + } + /* + * Have we seen this class before? If so, ignore. + */ + for (j = 0; j < num_override_args; j++) { + new_class = (Py_TYPE(obj) != Py_TYPE(with_override[j])); + if (!new_class) { + break; + } + } + if (new_class) { + /* + * Now see if the object provides an __array_ufunc__. However, we should + * ignore the base ndarray.__ufunc__, so we skip any ndarray as well as + * any ndarray subclass instances that did not override __array_ufunc__. + */ + PyObject *method = PyUFuncOverride_GetNonDefaultArrayUfunc(obj); + if (method == NULL) { + continue; + } + if (method == Py_None) { + PyErr_Format(PyExc_TypeError, + "operand '%.200s' does not support ufuncs " + "(__array_ufunc__=None)", + obj->ob_type->tp_name); + Py_DECREF(method); + goto fail; + } + Py_INCREF(obj); + with_override[num_override_args] = obj; + methods[num_override_args] = method; + ++num_override_args; + } + } + return num_override_args; + +fail: + for (i = 0; i < num_override_args; i++) { + Py_DECREF(with_override[i]); + Py_DECREF(methods[i]); + } + return -1; +} /* * The following functions normalize ufunc arguments. The work done is similar @@ -359,7 +447,7 @@ PyUFunc_CheckOverride(PyUFuncObject *ufunc, char *method, /* * Check inputs for overrides */ - num_override_args = PyUFunc_WithOverride( + num_override_args = get_array_ufunc_overrides( args, kwds, with_override, array_ufunc_methods); if (num_override_args == -1) { goto fail; diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index da0713b2b..a3e00b5c1 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -32,6 +32,14 @@ #include <float.h> #include <string.h> /* for memcpy */ +#if defined __AVX512F__ +#define VECTOR_SIZE_BYTES 64 +#elif defined __AVX2__ +#define VECTOR_SIZE_BYTES 32 +#else +#define VECTOR_SIZE_BYTES 16 +#endif + static NPY_INLINE npy_uintp abs_ptrdiff(char *a, char *b) { @@ -144,7 +152,7 @@ static NPY_INLINE int run_@name@_simd_@func@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) { #if @vector@ && defined NPY_HAVE_SSE2_INTRINSICS - if (@check@(sizeof(@type@), 16)) { + if (@check@(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_@func@_@TYPE@((@type@*)args[1], (@type@*)args[0], dimensions[0]); return 1; } @@ -183,16 +191,16 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps @type@ * op = (@type@ *)args[2]; npy_intp n = dimensions[0]; /* argument one scalar */ - if (IS_BLOCKABLE_BINARY_SCALAR1(sizeof(@type@), 16)) { + if (IS_BLOCKABLE_BINARY_SCALAR1(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_scalar1_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } /* argument two scalar */ - else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), 16)) { + else if (IS_BLOCKABLE_BINARY_SCALAR2(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_scalar2_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } - else if (IS_BLOCKABLE_BINARY(sizeof(@type@), 16)) { + else if (IS_BLOCKABLE_BINARY(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } @@ -232,16 +240,16 @@ run_binary_simd_@kind@_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps npy_bool * op = (npy_bool *)args[2]; npy_intp n = dimensions[0]; /* argument one scalar */ - if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), 16)) { + if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_scalar1_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } /* argument two scalar */ - else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), 16)) { + else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_scalar2_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } - else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), 16)) { + else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), VECTOR_SIZE_BYTES)) { sse2_binary_@kind@_@TYPE@(op, ip1, ip2, n); return 1; } @@ -302,7 +310,8 @@ static NPY_INLINE int run_binary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { #if defined NPY_HAVE_SSE2_INTRINSICS - if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_BINARY(sizeof(npy_bool), 16)) { + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_BINARY(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { sse2_binary_@kind@_BOOL((npy_bool*)args[2], (npy_bool*)args[0], (npy_bool*)args[1], dimensions[0]); return 1; @@ -316,7 +325,8 @@ static NPY_INLINE int run_reduce_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { #if defined NPY_HAVE_SSE2_INTRINSICS - if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_REDUCE(sizeof(npy_bool), 16)) { + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_REDUCE(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { sse2_reduce_@kind@_BOOL((npy_bool*)args[0], (npy_bool*)args[1], dimensions[0]); return 1; @@ -340,7 +350,8 @@ static NPY_INLINE int run_unary_simd_@kind@_BOOL(char **args, npy_intp *dimensions, npy_intp *steps) { #if defined NPY_HAVE_SSE2_INTRINSICS - if (sizeof(npy_bool) == 1 && IS_BLOCKABLE_UNARY(sizeof(npy_bool), 16)) { + if (sizeof(npy_bool) == 1 && + IS_BLOCKABLE_UNARY(sizeof(npy_bool), VECTOR_SIZE_BYTES)) { sse2_@kind@_BOOL((npy_bool*)args[1], (npy_bool*)args[0], dimensions[0]); return 1; } @@ -416,19 +427,19 @@ static void sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) { #ifdef __AVX512F__ - LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[i]; /* lots of specializations, to squeeze out max performance */ - if (npy_is_aligned(&ip1[i], 64) && npy_is_aligned(&ip2[i], 64)) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES) && npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); @vpre512@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @@ -436,16 +447,16 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } } - else if (npy_is_aligned(&ip1[i], 64)) { - LOOP_BLOCKED(@type@, 64) { + else if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @vpre512@_store_@vsuf@(&op[i], c); } } - else if (npy_is_aligned(&ip2[i], 64)) { - LOOP_BLOCKED(@type@, 64) { + else if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @@ -454,14 +465,14 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } else { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); @vpre512@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @@ -470,19 +481,20 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } #elif __AVX2__ - LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[i]; /* lots of specializations, to squeeze out max performance */ - if (npy_is_aligned(&ip1[i], 32) && npy_is_aligned(&ip2[i], 32)) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES) && + npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); @vpre256@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @@ -490,16 +502,16 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } } - else if (npy_is_aligned(&ip1[i], 32)) { - LOOP_BLOCKED(@type@, 32) { + else if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @vpre256@_store_@vsuf@(&op[i], c); } } - else if (npy_is_aligned(&ip2[i], 32)) { - LOOP_BLOCKED(@type@, 32) { + else if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @@ -508,14 +520,14 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } else { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); @vpre256@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @@ -524,19 +536,20 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } #else - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[i]; /* lots of specializations, to squeeze out max performance */ - if (npy_is_aligned(&ip1[i], 16) && npy_is_aligned(&ip2[i], 16)) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES) && + npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a); @vpre@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @@ -544,16 +557,16 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } } - else if (npy_is_aligned(&ip1[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + else if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @vpre@_store_@vsuf@(&op[i], c); } } - else if (npy_is_aligned(&ip2[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + else if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @@ -562,14 +575,14 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } else { if (ip1 == ip2) { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, a); @vpre@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @@ -589,17 +602,17 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i { #ifdef __AVX512F__ const @vtype512@ a = @vpre512@_set1_@vsuf@(ip1[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[0] @OP@ ip2[i]; - if (npy_is_aligned(&ip2[i], 64)) { - LOOP_BLOCKED(@type@, 64) { + if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @vpre512@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @vpre512@_store_@vsuf@(&op[i], c); @@ -609,17 +622,17 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i #elif __AVX2__ const @vtype256@ a = @vpre256@_set1_@vsuf@(ip1[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[0] @OP@ ip2[i]; - if (npy_is_aligned(&ip2[i], 32)) { - LOOP_BLOCKED(@type@, 32) { + if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @vpre256@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @vpre256@_store_@vsuf@(&op[i], c); @@ -627,17 +640,17 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i } #else const @vtype@ a = @vpre@_set1_@vsuf@(ip1[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[0] @OP@ ip2[i]; - if (npy_is_aligned(&ip2[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + if (npy_is_aligned(&ip2[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ b = @vpre@_load_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @vpre@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ b = @vpre@_loadu_@vsuf@(&ip2[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @vpre@_store_@vsuf@(&op[i], c); @@ -655,17 +668,17 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i { #ifdef __AVX512F__ const @vtype512@ b = @vpre512@_set1_@vsuf@(ip2[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[0]; - if (npy_is_aligned(&ip1[i], 64)) { - LOOP_BLOCKED(@type@, 64) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @vpre512@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 64) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); @vpre512@_store_@vsuf@(&op[i], c); @@ -674,17 +687,17 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i #elif __AVX2__ const @vtype256@ b = @vpre256@_set1_@vsuf@(ip2[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[0]; - if (npy_is_aligned(&ip1[i], 32)) { - LOOP_BLOCKED(@type@, 32) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @vpre256@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); @vpre256@_store_@vsuf@(&op[i], c); @@ -692,17 +705,17 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i } #else const @vtype@ b = @vpre@_set1_@vsuf@(ip2[0]); - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @OP@ ip2[0]; - if (npy_is_aligned(&ip1[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + if (npy_is_aligned(&ip1[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @vpre@_store_@vsuf@(&op[i], c); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_loadu_@vsuf@(&ip1[i]); @vtype@ c = @vpre@_@VOP@_@vsuf@(a, b); @vpre@_store_@vsuf@(&op[i], c); @@ -742,10 +755,10 @@ sse2_compress4_to_byte_@TYPE@(@vtype@ r1, @vtype@ r2, @vtype@ r3, @vtype@ * r4, static void sse2_signbit_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n) { - LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) { + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { op[i] = npy_signbit(ip1[i]) != 0; } - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip1[i]); int r = @vpre@_movemask_@vsuf@(a); if (sizeof(@type@) == 8) { @@ -783,14 +796,14 @@ sse2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, npy_intp n) const @vtype@ fltmax = @vpre@_set1_@vsuf@(FLT_MAX); #endif #endif - LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) { + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { op[i] = npy_@kind@(ip1[i]) != 0; } - LOOP_BLOCKED(@type@, 64) { - @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]); - @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]); - @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]); - @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]); + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); @vtype@ r1, r2, r3, r4; #if @var@ != 0 /* isinf/isfinite */ /* fabs via masking of sign bit */ @@ -853,18 +866,18 @@ sse2_ordered_cmp_@kind@_@TYPE@(const @type@ a, const @type@ b) static void sse2_binary_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) { - LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) { + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[i]); } - LOOP_BLOCKED(@type@, 64) { - @vtype@ a1 = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]); - @vtype@ b1 = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]); - @vtype@ c1 = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]); - @vtype@ d1 = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]); - @vtype@ a2 = @vpre@_loadu_@vsuf@(&ip2[i + 0 * 16 / sizeof(@type@)]); - @vtype@ b2 = @vpre@_loadu_@vsuf@(&ip2[i + 1 * 16 / sizeof(@type@)]); - @vtype@ c2 = @vpre@_loadu_@vsuf@(&ip2[i + 2 * 16 / sizeof(@type@)]); - @vtype@ d2 = @vpre@_loadu_@vsuf@(&ip2[i + 3 * 16 / sizeof(@type@)]); + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a1 = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b1 = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c1 = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d1 = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ a2 = @vpre@_loadu_@vsuf@(&ip2[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b2 = @vpre@_loadu_@vsuf@(&ip2[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c2 = @vpre@_loadu_@vsuf@(&ip2[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d2 = @vpre@_loadu_@vsuf@(&ip2[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a1, a2); @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b1, b2); @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c1, c2); @@ -881,14 +894,14 @@ static void sse2_binary_scalar1_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) { @vtype@ s = @vpre@_set1_@vsuf@(ip1[0]); - LOOP_BLOCK_ALIGN_VAR(ip2, @type@, 16) { + LOOP_BLOCK_ALIGN_VAR(ip2, @type@, VECTOR_SIZE_BYTES) { op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[0], ip2[i]); } - LOOP_BLOCKED(@type@, 64) { - @vtype@ a = @vpre@_load_@vsuf@(&ip2[i + 0 * 16 / sizeof(@type@)]); - @vtype@ b = @vpre@_load_@vsuf@(&ip2[i + 1 * 16 / sizeof(@type@)]); - @vtype@ c = @vpre@_load_@vsuf@(&ip2[i + 2 * 16 / sizeof(@type@)]); - @vtype@ d = @vpre@_load_@vsuf@(&ip2[i + 3 * 16 / sizeof(@type@)]); + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip2[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip2[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip2[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip2[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); @vtype@ r1 = @vpre@_@VOP@_@vsuf@(s, a); @vtype@ r2 = @vpre@_@VOP@_@vsuf@(s, b); @vtype@ r3 = @vpre@_@VOP@_@vsuf@(s, c); @@ -905,14 +918,14 @@ static void sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy_intp n) { @vtype@ s = @vpre@_set1_@vsuf@(ip2[0]); - LOOP_BLOCK_ALIGN_VAR(ip1, @type@, 16) { + LOOP_BLOCK_ALIGN_VAR(ip1, @type@, VECTOR_SIZE_BYTES) { op[i] = sse2_ordered_cmp_@kind@_@TYPE@(ip1[i], ip2[0]); } - LOOP_BLOCKED(@type@, 64) { - @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * 16 / sizeof(@type@)]); - @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * 16 / sizeof(@type@)]); - @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * 16 / sizeof(@type@)]); - @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * 16 / sizeof(@type@)]); + LOOP_BLOCKED(@type@, 4 * VECTOR_SIZE_BYTES) { + @vtype@ a = @vpre@_load_@vsuf@(&ip1[i + 0 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ b = @vpre@_load_@vsuf@(&ip1[i + 1 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ c = @vpre@_load_@vsuf@(&ip1[i + 2 * VECTOR_SIZE_BYTES / sizeof(@type@)]); + @vtype@ d = @vpre@_load_@vsuf@(&ip1[i + 3 * VECTOR_SIZE_BYTES / sizeof(@type@)]); @vtype@ r1 = @vpre@_@VOP@_@vsuf@(a, s); @vtype@ r2 = @vpre@_@VOP@_@vsuf@(b, s); @vtype@ r3 = @vpre@_@VOP@_@vsuf@(c, s); @@ -928,19 +941,20 @@ sse2_binary_scalar2_@kind@_@TYPE@(npy_bool * op, @type@ * ip1, @type@ * ip2, npy static void sse2_sqrt_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) { - /* align output to 16 bytes */ - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) { + /* align output to VECTOR_SIZE_BYTES bytes */ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) { op[i] = @scalarf@(ip[i]); } - assert(n < (16 / sizeof(@type@)) || npy_is_aligned(&op[i], 16)); - if (npy_is_aligned(&ip[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) || + npy_is_aligned(&op[i], VECTOR_SIZE_BYTES)); + if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ d = @vpre@_load_@vsuf@(&ip[i]); @vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d)); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ d = @vpre@_loadu_@vsuf@(&ip[i]); @vpre@_store_@vsuf@(&op[i], @vpre@_sqrt_@vsuf@(d)); } @@ -979,19 +993,20 @@ sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) */ const @vtype@ mask = @vpre@_set1_@vsuf@(-0.@c@); - /* align output to 16 bytes */ - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) { + /* align output to VECTOR_SIZE_BYTES bytes */ + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) { op[i] = @scalar@_@type@(ip[i]); } - assert(n < (16 / sizeof(@type@)) || npy_is_aligned(&op[i], 16)); - if (npy_is_aligned(&ip[i], 16)) { - LOOP_BLOCKED(@type@, 16) { + assert(n < (VECTOR_SIZE_BYTES / sizeof(@type@)) || + npy_is_aligned(&op[i], VECTOR_SIZE_BYTES)); + if (npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_load_@vsuf@(&ip[i]); @vpre@_store_@vsuf@(&op[i], @vpre@_@VOP@_@vsuf@(mask, a)); } } else { - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vpre@_loadu_@vsuf@(&ip[i]); @vpre@_store_@vsuf@(&op[i], @vpre@_@VOP@_@vsuf@(mask, a)); } @@ -1012,11 +1027,11 @@ sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) static void sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) { - const npy_intp stride = 16 / (npy_intp)sizeof(@type@); - LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) { + const npy_intp stride = VECTOR_SIZE_BYTES / (npy_intp)sizeof(@type@); + LOOP_BLOCK_ALIGN_VAR(ip, @type@, VECTOR_SIZE_BYTES) { *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i]; } - assert(n < (stride) || npy_is_aligned(&ip[i], 16)); + assert(n < (stride) || npy_is_aligned(&ip[i], VECTOR_SIZE_BYTES)); if (i + 3 * stride <= n) { /* load the first elements */ @vtype@ c1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); @@ -1025,7 +1040,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) /* minps/minpd will set invalid flag if nan is encountered */ npy_clear_floatstatus_barrier((char*)&c1); - LOOP_BLOCKED(@type@, 32) { + LOOP_BLOCKED(@type@, 2 * VECTOR_SIZE_BYTES) { @vtype@ v1 = @vpre@_load_@vsuf@((@type@*)&ip[i]); @vtype@ v2 = @vpre@_load_@vsuf@((@type@*)&ip[i + stride]); c1 = @vpre@_@VOP@_@vsuf@(c1, v1); @@ -1090,9 +1105,9 @@ static NPY_INLINE @vtype@ byte_to_true(@vtype@ v) static void sse2_binary_@kind@_BOOL(npy_bool * op, npy_bool * ip1, npy_bool * ip2, npy_intp n) { - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = ip1[i] @op@ ip2[i]; - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vloadu@((@vtype@*)&ip1[i]); @vtype@ b = @vloadu@((@vtype@*)&ip2[i]); #if @and@ @@ -1117,16 +1132,16 @@ static void sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, const npy_intp n) { const @vtype@ zero = @vpre@_setzero_@vsuf@(); - LOOP_BLOCK_ALIGN_VAR(ip, npy_bool, 16) { + LOOP_BLOCK_ALIGN_VAR(ip, npy_bool, VECTOR_SIZE_BYTES) { *op = *op @op@ ip[i]; if (*op @sc@ 0) { return; } } /* unrolled once to replace a slow movmsk with a fast pmaxb */ - LOOP_BLOCKED(npy_bool, 32) { + LOOP_BLOCKED(npy_bool, 2 * VECTOR_SIZE_BYTES) { @vtype@ v = @vload@((@vtype@*)&ip[i]); - @vtype@ v2 = @vload@((@vtype@*)&ip[i + 16]); + @vtype@ v2 = @vload@((@vtype@*)&ip[i + VECTOR_SIZE_BYTES]); v = @vpre@_cmpeq_epi8(v, zero); v2 = @vpre@_cmpeq_epi8(v2, zero); #if @and@ @@ -1164,9 +1179,9 @@ sse2_reduce_@kind@_BOOL(npy_bool * op, npy_bool * ip, const npy_intp n) static void sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n) { - LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) + LOOP_BLOCK_ALIGN_VAR(op, @type@, VECTOR_SIZE_BYTES) op[i] = (ip[i] @op@ 0); - LOOP_BLOCKED(@type@, 16) { + LOOP_BLOCKED(@type@, VECTOR_SIZE_BYTES) { @vtype@ a = @vloadu@((@vtype@*)&ip[i]); #if @not@ const @vtype@ zero = @vpre@_setzero_@vsuf@(); @@ -1187,6 +1202,8 @@ sse2_@kind@_BOOL(@type@ * op, @type@ * ip, const npy_intp n) /**end repeat**/ +#undef VECTOR_SIZE_BYTES + #endif /* NPY_HAVE_SSE2_INTRINSICS */ #endif diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 8fb731fb7..1fe8745a0 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -308,6 +308,49 @@ _find_array_prepare(ufunc_full_args args, return; } +#define NPY_UFUNC_DEFAULT_INPUT_FLAGS \ + NPY_ITER_READONLY | \ + NPY_ITER_ALIGNED | \ + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE + +#define NPY_UFUNC_DEFAULT_OUTPUT_FLAGS \ + NPY_ITER_ALIGNED | \ + NPY_ITER_ALLOCATE | \ + NPY_ITER_NO_BROADCAST | \ + NPY_ITER_NO_SUBTYPE | \ + NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE +/* + * Set per-operand flags according to desired input or output flags. + * op_flags[i] for i in input (as determined by ufunc->nin) will be + * merged with op_in_flags, perhaps overriding per-operand flags set + * in previous stages. + * op_flags[i] for i in output will be set to op_out_flags only if previously + * unset. + * The input flag behavior preserves backward compatibility, while the + * output flag behaviour is the "correct" one for maximum flexibility. + */ +NPY_NO_EXPORT void +_ufunc_setup_flags(PyUFuncObject *ufunc, npy_uint32 op_in_flags, + npy_uint32 op_out_flags, npy_uint32 *op_flags) +{ + int nin = ufunc->nin; + int nout = ufunc->nout; + int nop = nin + nout, i; + /* Set up the flags */ + for (i = 0; i < nin; ++i) { + op_flags[i] = ufunc->op_flags[i] | op_in_flags; + /* + * If READWRITE flag has been set for this operand, + * then clear default READONLY flag + */ + if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) { + op_flags[i] &= ~NPY_ITER_READONLY; + } + } + for (i = nin; i < nop; ++i) { + op_flags[i] = ufunc->op_flags[i] ? ufunc->op_flags[i] : op_out_flags; + } +} /* * This function analyzes the input arguments @@ -1394,11 +1437,11 @@ iterator_loop(PyUFuncObject *ufunc, PyObject **arr_prep, ufunc_full_args full_args, PyUFuncGenericFunction innerloop, - void *innerloopdata) + void *innerloopdata, + npy_uint32 *op_flags) { npy_intp i, nin = ufunc->nin, nout = ufunc->nout; npy_intp nop = nin + nout; - npy_uint32 op_flags[NPY_MAXARGS]; NpyIter *iter; char *baseptrs[NPY_MAXARGS]; @@ -1412,29 +1455,6 @@ iterator_loop(PyUFuncObject *ufunc, NPY_BEGIN_THREADS_DEF; - /* Set up the flags */ - for (i = 0; i < nin; ++i) { - op_flags[i] = NPY_ITER_READONLY | - NPY_ITER_ALIGNED | - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - /* - * If READWRITE flag has been set for this operand, - * then clear default READONLY flag - */ - op_flags[i] |= ufunc->op_flags[i]; - if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) { - op_flags[i] &= ~NPY_ITER_READONLY; - } - } - for (i = nin; i < nop; ++i) { - op_flags[i] = NPY_ITER_WRITEONLY | - NPY_ITER_ALIGNED | - NPY_ITER_ALLOCATE | - NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE | - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - } - iter_flags = ufunc->iter_flags | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK | @@ -1538,15 +1558,15 @@ iterator_loop(PyUFuncObject *ufunc, } /* + * ufunc - the ufunc to call * trivial_loop_ok - 1 if no alignment, data conversion, etc required - * nin - number of inputs - * nout - number of outputs - * op - the operands (nin + nout of them) + * op - the operands (ufunc->nin + ufunc->nout of them) + * dtypes - the dtype of each operand * order - the loop execution order/output memory order * buffersize - how big of a buffer to use * arr_prep - the __array_prepare__ functions for the outputs - * innerloop - the inner loop function - * innerloopdata - data to pass to the inner loop + * full_args - the original input, output PyObject * + * op_flags - per-operand flags, a combination of NPY_ITER_* constants */ static int execute_legacy_ufunc_loop(PyUFuncObject *ufunc, @@ -1556,7 +1576,8 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, NPY_ORDER order, npy_intp buffersize, PyObject **arr_prep, - ufunc_full_args full_args) + ufunc_full_args full_args, + npy_uint32 *op_flags) { npy_intp nin = ufunc->nin, nout = ufunc->nout; PyUFuncGenericFunction innerloop; @@ -1691,7 +1712,7 @@ execute_legacy_ufunc_loop(PyUFuncObject *ufunc, NPY_UF_DBG_PRINT("iterator loop\n"); if (iterator_loop(ufunc, op, dtypes, order, buffersize, arr_prep, full_args, - innerloop, innerloopdata) < 0) { + innerloop, innerloopdata, op_flags) < 0) { return -1; } @@ -1717,14 +1738,13 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, NPY_ORDER order, npy_intp buffersize, PyObject **arr_prep, - ufunc_full_args full_args) + ufunc_full_args full_args, + npy_uint32 *op_flags) { int i, nin = ufunc->nin, nout = ufunc->nout; int nop = nin + nout; - npy_uint32 op_flags[NPY_MAXARGS]; NpyIter *iter; int needs_api; - npy_intp default_op_in_flags = 0, default_op_out_flags = 0; NpyIter_IterNextFunc *iternext; char **dataptr; @@ -1734,48 +1754,10 @@ execute_fancy_ufunc_loop(PyUFuncObject *ufunc, PyArrayObject **op_it; npy_uint32 iter_flags; - if (wheremask != NULL) { - if (nop + 1 > NPY_MAXARGS) { - PyErr_SetString(PyExc_ValueError, - "Too many operands when including where= parameter"); - return -1; - } - op[nop] = wheremask; - dtypes[nop] = NULL; - default_op_out_flags |= NPY_ITER_WRITEMASKED; - } - - /* Set up the flags */ - for (i = 0; i < nin; ++i) { - op_flags[i] = default_op_in_flags | - NPY_ITER_READONLY | - NPY_ITER_ALIGNED | - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - /* - * If READWRITE flag has been set for this operand, - * then clear default READONLY flag - */ - op_flags[i] |= ufunc->op_flags[i]; - if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) { - op_flags[i] &= ~NPY_ITER_READONLY; - } - } for (i = nin; i < nop; ++i) { - /* - * We don't write to all elements, and the iterator may make - * UPDATEIFCOPY temporary copies. The output arrays (unless they are - * allocated by the iterator itself) must be considered READWRITE by the - * iterator, so that the elements we don't write to are copied to the - * possible temporary array. - */ - op_flags[i] = default_op_out_flags | - (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY) | - NPY_ITER_ALIGNED | - NPY_ITER_ALLOCATE | - NPY_ITER_NO_BROADCAST | - NPY_ITER_NO_SUBTYPE | - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; + op_flags[i] |= (op[i] != NULL ? NPY_ITER_READWRITE : NPY_ITER_WRITEONLY); } + if (wheremask != NULL) { op_flags[nop] = NPY_ITER_READONLY | NPY_ITER_ARRAYMASK; } @@ -2471,6 +2453,11 @@ _get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) { *reorderable = 0; Py_RETURN_NONE; + case PyUFunc_IdentityValue: + *reorderable = 1; + Py_INCREF(ufunc->identity_value); + return ufunc->identity_value; + default: PyErr_Format(PyExc_ValueError, "ufunc %s has an invalid identity", ufunc_get_name_cstr(ufunc)); @@ -2785,6 +2772,18 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, if (retval < 0) { goto fail; } + /* + * We don't write to all elements, and the iterator may make + * UPDATEIFCOPY temporary copies. The output arrays (unless they are + * allocated by the iterator itself) must be considered READWRITE by the + * iterator, so that the elements we don't write to are copied to the + * possible temporary array. + */ + _ufunc_setup_flags(ufunc, NPY_ITER_COPY | NPY_UFUNC_DEFAULT_INPUT_FLAGS, + NPY_ITER_UPDATEIFCOPY | + NPY_ITER_READWRITE | + NPY_UFUNC_DEFAULT_OUTPUT_FLAGS, + op_flags); /* For the generalized ufunc, we get the loop right away too */ retval = ufunc->legacy_inner_loop_selector(ufunc, dtypes, &innerloop, &innerloopdata, &needs_api); @@ -2827,28 +2826,6 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, * Set up the iterator per-op flags. For generalized ufuncs, we * can't do buffering, so must COPY or UPDATEIFCOPY. */ - for (i = 0; i < nin; ++i) { - op_flags[i] = NPY_ITER_READONLY | - NPY_ITER_COPY | - NPY_ITER_ALIGNED | - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - /* - * If READWRITE flag has been set for this operand, - * then clear default READONLY flag - */ - op_flags[i] |= ufunc->op_flags[i]; - if (op_flags[i] & (NPY_ITER_READWRITE | NPY_ITER_WRITEONLY)) { - op_flags[i] &= ~NPY_ITER_READONLY; - } - } - for (i = nin; i < nop; ++i) { - op_flags[i] = NPY_ITER_READWRITE| - NPY_ITER_UPDATEIFCOPY| - NPY_ITER_ALIGNED| - NPY_ITER_ALLOCATE| - NPY_ITER_NO_BROADCAST| - NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE; - } iter_flags = ufunc->iter_flags | NPY_ITER_MULTI_INDEX | @@ -3097,7 +3074,8 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc, int i, nop; const char *ufunc_name; int retval = -1, subok = 1; - int need_fancy = 0; + npy_uint32 op_flags[NPY_MAXARGS]; + npy_intp default_op_out_flags; PyArray_Descr *dtypes[NPY_MAXARGS]; @@ -3156,13 +3134,6 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc, return retval; } - /* - * Use the masked loop if a wheremask was specified. - */ - if (wheremask != NULL) { - need_fancy = 1; - } - /* Get the buffersize and errormask */ if (_get_bufsize_errmask(extobj, ufunc_name, &buffersize, &errormask) < 0) { retval = -1; @@ -3177,16 +3148,20 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc, goto fail; } - /* Only do the trivial loop check for the unmasked version. */ - if (!need_fancy) { - /* - * This checks whether a trivial loop is ok, making copies of - * scalar and one dimensional operands if that will help. - */ - trivial_loop_ok = check_for_trivial_loop(ufunc, op, dtypes, buffersize); - if (trivial_loop_ok < 0) { - goto fail; - } + if (wheremask != NULL) { + /* Set up the flags. */ + default_op_out_flags = NPY_ITER_NO_SUBTYPE | + NPY_ITER_WRITEMASKED | + NPY_UFUNC_DEFAULT_OUTPUT_FLAGS; + _ufunc_setup_flags(ufunc, NPY_UFUNC_DEFAULT_INPUT_FLAGS, + default_op_out_flags, op_flags); + } + else { + /* Set up the flags. */ + default_op_out_flags = NPY_ITER_WRITEONLY | + NPY_UFUNC_DEFAULT_OUTPUT_FLAGS; + _ufunc_setup_flags(ufunc, NPY_UFUNC_DEFAULT_INPUT_FLAGS, + default_op_out_flags, op_flags); } #if NPY_UF_DBG_TRACING @@ -3214,23 +3189,46 @@ PyUFunc_GenericFunction(PyUFuncObject *ufunc, _find_array_prepare(full_args, arr_prep, nin, nout); } - /* Start with the floating-point exception flags cleared */ - npy_clear_floatstatus_barrier((char*)&ufunc); /* Do the ufunc loop */ - if (need_fancy) { + if (wheremask != NULL) { NPY_UF_DBG_PRINT("Executing fancy inner loop\n"); + if (nop + 1 > NPY_MAXARGS) { + PyErr_SetString(PyExc_ValueError, + "Too many operands when including where= parameter"); + return -1; + } + op[nop] = wheremask; + dtypes[nop] = NULL; + + /* Set up the flags */ + + npy_clear_floatstatus_barrier((char*)&ufunc); retval = execute_fancy_ufunc_loop(ufunc, wheremask, op, dtypes, order, - buffersize, arr_prep, full_args); + buffersize, arr_prep, full_args, op_flags); } else { NPY_UF_DBG_PRINT("Executing legacy inner loop\n"); + /* + * This checks whether a trivial loop is ok, making copies of + * scalar and one dimensional operands if that will help. + * Since it requires dtypes, it can only be called after + * ufunc->type_resolver + */ + trivial_loop_ok = check_for_trivial_loop(ufunc, op, dtypes, buffersize); + if (trivial_loop_ok < 0) { + goto fail; + } + + /* check_for_trivial_loop on half-floats can overflow */ + npy_clear_floatstatus_barrier((char*)&ufunc); + retval = execute_legacy_ufunc_loop(ufunc, trivial_loop_ok, op, dtypes, order, - buffersize, arr_prep, full_args); + buffersize, arr_prep, full_args, op_flags); } if (retval < 0) { goto fail; @@ -4840,6 +4838,20 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, const char *name, const char *doc, int unused, const char *signature) { + return PyUFunc_FromFuncAndDataAndSignatureAndIdentity( + func, data, types, ntypes, nin, nout, identity, name, doc, + unused, signature, NULL); +} + +/*UFUNC_API*/ +NPY_NO_EXPORT PyObject * +PyUFunc_FromFuncAndDataAndSignatureAndIdentity(PyUFuncGenericFunction *func, void **data, + char *types, int ntypes, + int nin, int nout, int identity, + const char *name, const char *doc, + int unused, const char *signature, + PyObject *identity_value) +{ PyUFuncObject *ufunc; if (nin + nout > NPY_MAXARGS) { PyErr_Format(PyExc_ValueError, @@ -4860,6 +4872,10 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, ufunc->nout = nout; ufunc->nargs = nin+nout; ufunc->identity = identity; + if (ufunc->identity == PyUFunc_IdentityValue) { + Py_INCREF(identity_value); + } + ufunc->identity_value = identity_value; ufunc->functions = func; ufunc->data = data; @@ -4881,6 +4897,7 @@ PyUFunc_FromFuncAndDataAndSignature(PyUFuncGenericFunction *func, void **data, ufunc->op_flags = PyArray_malloc(sizeof(npy_uint32)*ufunc->nargs); if (ufunc->op_flags == NULL) { + Py_DECREF(ufunc); return PyErr_NoMemory(); } memset(ufunc->op_flags, 0, sizeof(npy_uint32)*ufunc->nargs); @@ -5237,6 +5254,9 @@ ufunc_dealloc(PyUFuncObject *ufunc) PyArray_free(ufunc->op_flags); Py_XDECREF(ufunc->userloops); Py_XDECREF(ufunc->obj); + if (ufunc->identity == PyUFunc_IdentityValue) { + Py_DECREF(ufunc->identity_value); + } PyArray_free(ufunc); } diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c index 20bd2b0a8..8277ad6cc 100644 --- a/numpy/core/src/umath/umathmodule.c +++ b/numpy/core/src/umath/umathmodule.c @@ -29,6 +29,7 @@ #include "abstract.h" #include "numpy/npy_math.h" +#include "number.h" static PyUFuncGenericFunction pyfunc_functions[] = {PyUFunc_On_Om}; @@ -325,7 +326,7 @@ int initumath(PyObject *m) s2 = PyDict_GetItemString(d, "remainder"); /* Setup the array object's numerical structures with appropriate ufuncs in d*/ - PyArray_SetNumericOps(d); + _PyArray_SetNumericOps(d); PyDict_SetItemString(d, "conj", s); PyDict_SetItemString(d, "mod", s2); diff --git a/numpy/core/tests/test_deprecations.py b/numpy/core/tests/test_deprecations.py index 10ef16800..7e6e256fe 100644 --- a/numpy/core/tests/test_deprecations.py +++ b/numpy/core/tests/test_deprecations.py @@ -523,3 +523,14 @@ class TestFromstring(_DeprecationTestCase): # 2017-10-19, 1.14 def test_fromstring(self): self.assert_deprecated(np.fromstring, args=('\x00'*80,)) + +class Test_GetSet_NumericOps(_DeprecationTestCase): + # 2018-09-20, 1.16.0 + def test_get_numeric_ops(self): + from numpy.core._multiarray_tests import getset_numericops + self.assert_deprecated(getset_numericops, num=2) + + # empty kwargs prevents any state actually changing which would break + # other tests. + self.assert_deprecated(np.set_numeric_ops, kwargs={}) + assert_raises(ValueError, np.set_numeric_ops, add='abc') diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py index ecb51f72d..f2e7f8f50 100644 --- a/numpy/core/tests/test_dtype.py +++ b/numpy/core/tests/test_dtype.py @@ -824,7 +824,6 @@ class TestFromCTypes(object): )) self.check(Union, expected) - @pytest.mark.xfail(reason="_pack_ is ignored - see gh-11651") def test_packed_structure(self): class PackedStructure(ctypes.Structure): _pack_ = 1 @@ -838,8 +837,45 @@ class TestFromCTypes(object): ]) self.check(PackedStructure, expected) - @pytest.mark.xfail(sys.byteorder != 'little', - reason="non-native endianness does not work - see gh-10533") + def test_large_packed_structure(self): + class PackedStructure(ctypes.Structure): + _pack_ = 2 + _fields_ = [ + ('a', ctypes.c_uint8), + ('b', ctypes.c_uint16), + ('c', ctypes.c_uint8), + ('d', ctypes.c_uint16), + ('e', ctypes.c_uint32), + ('f', ctypes.c_uint32), + ('g', ctypes.c_uint8) + ] + expected = np.dtype(dict( + formats=[np.uint8, np.uint16, np.uint8, np.uint16, np.uint32, np.uint32, np.uint8 ], + offsets=[0, 2, 4, 6, 8, 12, 16], + names=['a', 'b', 'c', 'd', 'e', 'f', 'g'], + itemsize=18)) + self.check(PackedStructure, expected) + + def test_big_endian_structure_packed(self): + class BigEndStruct(ctypes.BigEndianStructure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + expected = np.dtype([('one', 'u1'), ('two', '>u4')]) + self.check(BigEndStruct, expected) + + def test_little_endian_structure_packed(self): + class LittleEndStruct(ctypes.LittleEndianStructure): + _fields_ = [ + ('one', ctypes.c_uint8), + ('two', ctypes.c_uint32) + ] + _pack_ = 1 + expected = np.dtype([('one', 'u1'), ('two', '<u4')]) + self.check(LittleEndStruct, expected) + def test_little_endian_structure(self): class PaddedStruct(ctypes.LittleEndianStructure): _fields_ = [ @@ -852,8 +888,6 @@ class TestFromCTypes(object): ], align=True) self.check(PaddedStruct, expected) - @pytest.mark.xfail(sys.byteorder != 'big', - reason="non-native endianness does not work - see gh-10533") def test_big_endian_structure(self): class PaddedStruct(ctypes.BigEndianStructure): _fields_ = [ @@ -865,3 +899,9 @@ class TestFromCTypes(object): ('b', '>H') ], align=True) self.check(PaddedStruct, expected) + + def test_simple_endian_types(self): + self.check(ctypes.c_uint16.__ctype_le__, np.dtype('<u2')) + self.check(ctypes.c_uint16.__ctype_be__, np.dtype('>u2')) + self.check(ctypes.c_uint8.__ctype_le__, np.dtype('u1')) + self.check(ctypes.c_uint8.__ctype_be__, np.dtype('u1')) diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 4b2a38990..d1b306ef0 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4541,6 +4541,19 @@ class TestIO(object): f.close() assert_equal(pos, 10, err_msg=err_msg) + def test_load_object_array_fromfile(self): + # gh-12300 + with open(self.filename, 'w') as f: + # Ensure we have a file with consistent contents + pass + + with open(self.filename, 'rb') as f: + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, f, dtype=object) + + assert_raises_regex(ValueError, "Cannot read into object array", + np.fromfile, self.filename, dtype=object) + def _check_from(self, s, value, **kw): if 'sep' not in kw: y = np.frombuffer(s, **kw) @@ -6775,7 +6788,7 @@ class TestNewBufferProtocol(object): ValueError, "format string", np.array, m) - def test_error_message(self): + def test_error_message_unsupported(self): # wchar has no corresponding numpy type - if this changes in future, we # need a better way to construct an invalid memoryview format. t = ctypes.c_wchar * 4 @@ -6784,7 +6797,10 @@ class TestNewBufferProtocol(object): exc = cm.exception if sys.version_info.major > 2: - with assert_raises_regex(ValueError, "Unknown .* specifier 'u'"): + with assert_raises_regex( + NotImplementedError, + r"Unrepresentable .* 'u' \(UCS-2 strings\)" + ): raise exc.__cause__ def test_ctypes_integer_via_memoryview(self): diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py index ee6d5da4a..7db551801 100644 --- a/numpy/core/tests/test_overrides.py +++ b/numpy/core/tests/test_overrides.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import inspect import sys import numpy as np @@ -7,8 +8,14 @@ from numpy.testing import ( assert_, assert_equal, assert_raises, assert_raises_regex) from numpy.core.overrides import ( get_overloaded_types_and_args, array_function_dispatch, - verify_matching_signatures) + verify_matching_signatures, ENABLE_ARRAY_FUNCTION) from numpy.core.numeric import pickle +import pytest + + +requires_array_function = pytest.mark.skipif( + not ENABLE_ARRAY_FUNCTION, + reason="__array_function__ dispatch not enabled.") def _get_overloaded_args(relevant_args): @@ -165,6 +172,7 @@ def dispatched_one_arg(array): return 'original' +@requires_array_function class TestArrayFunctionDispatch(object): def test_pickle(self): @@ -204,6 +212,7 @@ class TestArrayFunctionDispatch(object): dispatched_one_arg(array) +@requires_array_function class TestVerifyMatchingSignatures(object): def test_verify_matching_signatures(self): @@ -256,6 +265,7 @@ def _new_duck_type_and_implements(): return (MyArray, implements) +@requires_array_function class TestArrayFunctionImplementation(object): def test_one_arg(self): @@ -302,6 +312,7 @@ class TestArrayFunctionImplementation(object): array = np.array(1) assert_(func(array) is array) + assert_equal(func.__module__, 'my') with assert_raises_regex( TypeError, "no implementation found for 'my.func'"): @@ -322,15 +333,21 @@ class TestNDArrayMethods(object): assert_equal(repr(array), 'MyArray(1)') assert_equal(str(array), '1') - + class TestNumPyFunctions(object): - def test_module(self): + def test_set_module(self): assert_equal(np.sum.__module__, 'numpy') assert_equal(np.char.equal.__module__, 'numpy.char') assert_equal(np.fft.fft.__module__, 'numpy.fft') assert_equal(np.linalg.solve.__module__, 'numpy.linalg') + @pytest.mark.skipif(sys.version_info[0] < 3, reason="Python 3 only") + def test_inspect_sum(self): + signature = inspect.signature(np.sum) + assert_('axis' in signature.parameters) + + @requires_array_function def test_override_sum(self): MyArray, implements = _new_duck_type_and_implements() diff --git a/numpy/core/tests/test_records.py b/numpy/core/tests/test_records.py index af6c86b9e..08d8865a0 100644 --- a/numpy/core/tests/test_records.py +++ b/numpy/core/tests/test_records.py @@ -332,15 +332,14 @@ class TestPathUsage(object): def test_tofile_fromfile(self): with temppath(suffix='.bin') as path: path = Path(path) - a = np.empty(10, dtype='f8,i4,a5') + np.random.seed(123) + a = np.random.rand(10).astype('f8,i4,a5') a[5] = (0.5,10,'abcde') - a.newbyteorder('<') with path.open("wb") as fd: a.tofile(fd) x = np.core.records.fromfile(path, formats='f8,i4,a5', - shape=10, - byteorder='<') + shape=10) assert_array_equal(x, a) diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py index b2c610da6..416bd18db 100644 --- a/numpy/core/tests/test_shape_base.py +++ b/numpy/core/tests/test_shape_base.py @@ -1,5 +1,6 @@ from __future__ import division, absolute_import, print_function +import pytest import warnings import sys import numpy as np @@ -391,39 +392,32 @@ def test_stack(): assert_array_equal(result, np.array([0, 1, 2])) -# See for more information on how to parametrize a whole class -# https://docs.pytest.org/en/latest/example/parametrize.html#parametrizing-test-methods-through-per-class-configuration -def pytest_generate_tests(metafunc): - # called once per each test function - if hasattr(metafunc.cls, 'params'): - arglist = metafunc.cls.params - argnames = sorted(arglist[0]) - metafunc.parametrize(argnames, - [[funcargs[name] for name in argnames] - for funcargs in arglist]) - - -# blocking small arrays and large arrays go through different paths. -# the algorithm is triggered depending on the number of element -# copies required. -# We define a test fixture that forces most tests to go through -# both code paths. -# Ultimately, this should be removed if a single algorithm is found -# to be faster for both small and large arrays.s -def _block_force_concatenate(arrays): - arrays, list_ndim, result_ndim, _ = _block_setup(arrays) - return _block_concatenate(arrays, list_ndim, result_ndim) - - -def _block_force_slicing(arrays): - arrays, list_ndim, result_ndim, _ = _block_setup(arrays) - return _block_slicing(arrays, list_ndim, result_ndim) - - class TestBlock(object): - params = [dict(block=block), - dict(block=_block_force_concatenate), - dict(block=_block_force_slicing)] + @pytest.fixture(params=['block', 'force_concatenate', 'force_slicing']) + def block(self, request): + # blocking small arrays and large arrays go through different paths. + # the algorithm is triggered depending on the number of element + # copies required. + # We define a test fixture that forces most tests to go through + # both code paths. + # Ultimately, this should be removed if a single algorithm is found + # to be faster for both small and large arrays. + def _block_force_concatenate(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_concatenate(arrays, list_ndim, result_ndim) + + def _block_force_slicing(arrays): + arrays, list_ndim, result_ndim, _ = _block_setup(arrays) + return _block_slicing(arrays, list_ndim, result_ndim) + + if request.param == 'force_concatenate': + return _block_force_concatenate + elif request.param == 'force_slicing': + return _block_force_slicing + elif request.param == 'block': + return block + else: + raise ValueError('Unknown blocking request. There is a typo in the tests.') def test_returns_copy(self, block): a = np.eye(3) diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index bd7985dfb..fe23c922b 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -685,6 +685,10 @@ class TestLogAddExp(_FilterInvalids): assert_(np.isnan(np.logaddexp(0, np.nan))) assert_(np.isnan(np.logaddexp(np.nan, np.nan))) + def test_reduce(self): + assert_equal(np.logaddexp.identity, -np.inf) + assert_equal(np.logaddexp.reduce([]), -np.inf) + class TestLog1p(object): def test_log1p(self): @@ -2444,11 +2448,6 @@ class TestRationalFunctions(object): assert_equal(np.gcd(2**100, 3**100), 1) -def is_longdouble_finfo_bogus(): - info = np.finfo(np.longcomplex) - return not np.isfinite(np.log10(info.tiny/info.eps)) - - class TestComplexFunctions(object): funcs = [np.arcsin, np.arccos, np.arctan, np.arcsinh, np.arccosh, np.arctanh, np.sin, np.cos, np.tan, np.exp, @@ -2544,7 +2543,8 @@ class TestComplexFunctions(object): b = cfunc(p) assert_(abs(a - b) < atol, "%s %s: %s; cmath: %s" % (fname, p, a, b)) - def check_loss_of_precision(self, dtype): + @pytest.mark.parametrize('dtype', [np.complex64, np.complex_, np.longcomplex]) + def test_loss_of_precision(self, dtype): """Check loss of precision in complex arc* functions""" # Check against known-good functions @@ -2586,10 +2586,11 @@ class TestComplexFunctions(object): # It's not guaranteed that the system-provided arc functions # are accurate down to a few epsilons. (Eg. on Linux 64-bit) # So, give more leeway for long complex tests here: - check(x_series, 50*eps) + # Can use 2.1 for > Ubuntu LTS Trusty (2014), glibc = 2.19. + check(x_series, 50.0*eps) else: check(x_series, 2.1*eps) - check(x_basic, 2*eps/1e-3) + check(x_basic, 2.0*eps/1e-3) # Check a few points @@ -2629,15 +2630,6 @@ class TestComplexFunctions(object): check(func, pts, 1j) check(func, pts, 1+1j) - def test_loss_of_precision(self): - for dtype in [np.complex64, np.complex_]: - self.check_loss_of_precision(dtype) - - @pytest.mark.skipif(is_longdouble_finfo_bogus(), - reason="Bogus long double finfo") - def test_loss_of_precision_longcomplex(self): - self.check_loss_of_precision(np.longcomplex) - class TestAttributes(object): def test_attributes(self): |