diff options
-rw-r--r-- | doc/source/f2py/getting-started.rst | 9 | ||||
-rw-r--r-- | numpy/core/fromnumeric.py | 4 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 54 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 5 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 8 | ||||
-rw-r--r-- | numpy/fft/_pocketfft.py | 3 | ||||
-rw-r--r-- | numpy/lib/shape_base.py | 4 |
7 files changed, 63 insertions, 24 deletions
diff --git a/doc/source/f2py/getting-started.rst b/doc/source/f2py/getting-started.rst index c600eee01..27ddbb005 100644 --- a/doc/source/f2py/getting-started.rst +++ b/doc/source/f2py/getting-started.rst @@ -29,13 +29,12 @@ either by just in one command or step-by-step, some steps can be omitted or combined with others. Below I'll describe three typical approaches of using F2PY. -The following `example Fortran 77 code`__ will be used for -illustration: +The following example Fortran 77 code will be used for +illustration, save it as fib1.f: .. include:: fib1.f :literal: -__ fib1.f The quick way ============== @@ -242,14 +241,12 @@ directive defines special comment lines (starting with ``Cf2py``, for example) which are ignored by Fortran compilers but F2PY interprets them as normal lines. -Here is shown a `modified version of the example Fortran code`__, saved +Here is shown a modified version of the previous Fortran code, save it as ``fib3.f``: .. include:: fib3.f :literal: -__ fib3.f - Building the extension module can be now carried out in one command:: python -m numpy.f2py -c -m fib3 fib3.f diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index ab45ddfe8..acd2d2bea 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -2039,8 +2039,8 @@ def clip(a, a_min, a_max, out=None, **kwargs): is specified, values smaller than 0 become 0, and values larger than 1 become 1. - Equivalent to but faster than ``np.maximum(a_min, np.minimum(a, a_max))`` - assuming ``a_min < a_max``. + Equivalent to but faster than ``np.minimum(a_max, np.maximum(a, a_min))``. + No check is performed to ensure ``a_min < a_max``. Parameters diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 7ec90f9c8..4265476b5 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -55,6 +55,37 @@ abs_ptrdiff(char *a, char *b) return (a > b) ? (a - b) : (b - a); } +/* + * nomemoverlap - returns true if two strided arrays have an overlapping + * region in memory. ip_size/op_size = size of the arrays which can be negative + * indicating negative steps. + */ +static NPY_INLINE npy_bool +nomemoverlap(char *ip, + npy_intp ip_size, + char *op, + npy_intp op_size) +{ + char *ip_start, *ip_end, *op_start, *op_end; + if (ip_size < 0) { + ip_start = ip + ip_size; + ip_end = ip; + } + else { + ip_start = ip; + ip_end = ip + ip_size; + } + if (op_size < 0) { + op_start = op + op_size; + op_end = op; + } + else { + op_start = op; + op_end = op + op_size; + } + return (ip_start > op_end) | (op_start > ip_end); +} + #define IS_BINARY_STRIDE_ONE(esize, vsize) \ ((steps[0] == esize) && \ (steps[1] == esize) && \ @@ -83,22 +114,25 @@ abs_ptrdiff(char *a, char *b) * cross page boundaries. * * We instead rely on i32gather/scatter_ps instructions which use a 32-bit index - * element. The index needs to be < INT_MAX to avoid overflow. MAX_STEP_SIZE ensures this. + * element. The index needs to be < INT_MAX to avoid overflow. MAX_STEP_SIZE + * ensures this. The condition also requires that the input and output arrays + * should have no overlap in memory. */ -#define IS_BINARY_SMALL_STEPS \ +#define IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP \ ((abs(steps[0]) < MAX_STEP_SIZE) && \ (abs(steps[1]) < MAX_STEP_SIZE) && \ - (abs(steps[2]) < MAX_STEP_SIZE)) + (abs(steps[2]) < MAX_STEP_SIZE) && \ + (nomemoverlap(args[0], steps[0] * dimensions[0], args[2], steps[2] * dimensions[0])) && \ + (nomemoverlap(args[1], steps[1] * dimensions[0], args[2], steps[2] * dimensions[0]))) /* - * output should be contiguous, can handle strided input data - * Input step should be smaller than MAX_STEP_SIZE for performance + * 1) Output should be contiguous, can handle strided input data + * 2) Input step should be smaller than MAX_STEP_SIZE for performance + * 3) Input and output arrays should have no overlap in memory */ #define IS_OUTPUT_BLOCKABLE_UNARY(esize, vsize) \ (steps[1] == (esize) && abs(steps[0]) < MAX_STEP_SIZE && \ - (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \ - ((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \ - ((abs_ptrdiff(args[1], args[0]) == 0)))) + (nomemoverlap(args[1], steps[1] * dimensions[0], args[0], steps[0] * dimensions[0]))) #define IS_BLOCKABLE_REDUCE(esize, vsize) \ (steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \ @@ -253,7 +287,7 @@ static NPY_INLINE int run_binary_avx512f_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *steps) { #if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS && @EXISTS@ - if (IS_BINARY_SMALL_STEPS) { + if (IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP) { AVX512F_@func@_@TYPE@(args, dimensions, steps); return 1; } @@ -1943,7 +1977,7 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s /* * Note: while generally indices are npy_intp, we ensure that our maximum index * will fit in an int32 as a precondition for this function via - * IS_BINARY_SMALL_STEPS + * IS_BINARY_SMALL_STEPS_AND_NOMEMOVERLAP */ npy_int32 index_ip1[@num_lanes@], index_ip2[@num_lanes@], index_op[@num_lanes@]; diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 135acc51d..05f59d9dc 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -2000,12 +2000,12 @@ class TestClip: np.array(np.nan), np.zeros(10, dtype=np.int32)), ]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_clip_scalar_nan_propagation(self, arr, amin, amax): # enforcement of scalar nan propagation for comparisons # called through clip() expected = np.minimum(np.maximum(arr, amin), amax) - with assert_warns(DeprecationWarning): - actual = np.clip(arr, amin, amax) + actual = np.clip(arr, amin, amax) assert_equal(actual, expected) @pytest.mark.xfail(reason="propagation doesn't match spec") @@ -2014,6 +2014,7 @@ class TestClip: np.timedelta64('NaT'), np.zeros(10, dtype=np.int32)), ]) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_NaT_propagation(self, arr, amin, amax): # NOTE: the expected function spec doesn't # propagate NaT, but clip() now does diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index d1d4467d6..233a0b1d6 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -3157,6 +3157,14 @@ def test_rint_big_int(): # Rint should not change the value assert_equal(val, np.rint(val)) +@pytest.mark.parametrize('ftype', [np.float32, np.float64]) +def test_memoverlap_accumulate(ftype): + # Reproduces bug https://github.com/numpy/numpy/issues/15597 + arr = np.array([0.61, 0.60, 0.77, 0.41, 0.19], dtype=ftype) + out_max = np.array([0.61, 0.61, 0.77, 0.77, 0.77], dtype=ftype) + out_min = np.array([0.61, 0.60, 0.60, 0.41, 0.19], dtype=ftype) + assert_equal(np.maximum.accumulate(arr), out_max) + assert_equal(np.minimum.accumulate(arr), out_min) def test_signaling_nan_exceptions(): with assert_no_warnings(): diff --git a/numpy/fft/_pocketfft.py b/numpy/fft/_pocketfft.py index f2510a6c2..3eab242e5 100644 --- a/numpy/fft/_pocketfft.py +++ b/numpy/fft/_pocketfft.py @@ -59,12 +59,11 @@ def _raw_fft(a, n, axis, is_real, is_forward, inv_norm): if a.shape[axis] != n: s = list(a.shape) + index = [slice(None)]*len(s) if s[axis] > n: - index = [slice(None)]*len(s) index[axis] = slice(0, n) a = a[tuple(index)] else: - index = [slice(None)]*len(s) index[axis] = slice(0, s[axis]) s[axis] = n z = zeros(s, a.dtype.char) diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index 7634af010..b7f1f16f2 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -269,8 +269,8 @@ def apply_along_axis(func1d, axis, arr, *args, **kwargs): """ Apply a function to 1-D slices along the given axis. - Execute `func1d(a, *args)` where `func1d` operates on 1-D arrays and `a` - is a 1-D slice of `arr` along `axis`. + Execute `func1d(a, *args, **kwargs)` where `func1d` operates on 1-D arrays + and `a` is a 1-D slice of `arr` along `axis`. This is equivalent to (but faster than) the following use of `ndindex` and `s_`, which sets each of ``ii``, ``jj``, and ``kk`` to a tuple of indices:: |