summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/umath/loops.c.src26
-rw-r--r--numpy/core/src/umath/simd.inc.src20
-rw-r--r--numpy/core/tests/test_ufunc.py25
3 files changed, 57 insertions, 14 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 89eeb0c47..a2649ed93 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1621,21 +1621,23 @@ FLOAT_@func@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSE
NPY_NO_EXPORT NPY_GCC_OPT_3 void
FLOAT_@func@_@isa@(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
{
+ if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
+ UNARY_LOOP {
+ /*
+ * We use the AVX function to compute exp/log for scalar elements as well.
+ * This is needed to ensure the output of strided and non-strided
+ * cases match. But this worsens the performance of strided arrays.
+ * There is plan to fix this in a subsequent patch by using gather
+ * instructions for strided arrays in the AVX function.
+ */
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
- @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+ @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1);
#else
- /*
- * This is the path it would take if ISA was runtime detected, but not
- * compiled for. It fixes the error on clang6.0 which fails to compile
- * AVX512F version. Not sure if I like this idea, if during runtime it
- * detects AXV512F, it will end up running the scalar version instead
- * of AVX2.
- */
- UNARY_LOOP {
- const npy_float in1 = *(npy_float *)ip1;
- *(npy_float *)op1 = @scalarf@(in1);
- }
+ const npy_float in1 = *(npy_float *)ip1;
+ *(npy_float *)op1 = @scalarf@(in1);
#endif
+ }
+ }
}
/**end repeat1**/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 72493e308..1c6ac4426 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -122,20 +122,36 @@ abs_ptrdiff(char *a, char *b)
/**begin repeat
* #ISA = AVX2, AVX512F#
+ * #isa = avx2, avx512f#
+ * #REGISTER_SIZE = 32, 64#
*/
/* prototypes */
-#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
/**begin repeat1
* #func = exp, log#
*/
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
static NPY_INLINE void
@ISA@_@func@_FLOAT(npy_float *, npy_float *, const npy_intp n);
+#endif
-/**end repeat1**/
+static NPY_INLINE int
+run_unary_@isa@_@func@_FLOAT(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+#if defined HAVE_ATTRIBUTE_TARGET_@ISA@_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
+ if (IS_BLOCKABLE_UNARY(sizeof(npy_float), @REGISTER_SIZE@)) {
+ @ISA@_@func@_FLOAT((npy_float*)args[1], (npy_float*)args[0], dimensions[0]);
+ return 1;
+ }
+ else
+ return 0;
#endif
+ return 0;
+}
+
+/**end repeat1**/
/**end repeat**/
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 4b26c2208..caeea39f4 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -1964,3 +1964,28 @@ def test_ufunc_types(ufunc):
assert r.dtype == np.dtype(t)
else:
assert res.dtype == np.dtype(out)
+
+@pytest.mark.parametrize('ufunc', [getattr(np, x) for x in dir(np)
+ if isinstance(getattr(np, x), np.ufunc)])
+def test_ufunc_noncontiguous(ufunc):
+ '''
+ Check that contiguous and non-contiguous calls to ufuncs
+ have the same results for values in range(9)
+ '''
+ for typ in ufunc.types:
+ # types is a list of strings like ii->i
+ if any(set('O?mM') & set(typ)):
+ # bool, object, datetime are too irregular for this simple test
+ continue
+ inp, out = typ.split('->')
+ args_c = [np.empty(6, t) for t in inp]
+ args_n = [np.empty(18, t)[::3] for t in inp]
+ for a in args_c:
+ a.flat = range(1,7)
+ for a in args_n:
+ a.flat = range(1,7)
+ with warnings.catch_warnings(record=True):
+ warnings.filterwarnings("always")
+ res_c = ufunc(*args_c)
+ res_n = ufunc(*args_n)
+ assert_equal(res_c, res_n)