summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2021-11-03 22:08:09 +0200
committerSayed Adel <seiko@imavr.com>2021-11-06 15:52:48 +0200
commit035ecde53bf595c2b94849a248c6cf21221e2c6a (patch)
treecc2ba8315aff490717f090d9c53eca15bef76011
parentcb340848f0c4df96c93980251f240347cccc4ff8 (diff)
downloadnumpy-035ecde53bf595c2b94849a248c6cf21221e2c6a.tar.gz
SIMD: replace raw SIMD of ceil with universal intrinsics
-rw-r--r--numpy/core/code_generators/generate_umath.py2
-rw-r--r--numpy/core/src/umath/loops.c.src8
-rw-r--r--numpy/core/src/umath/loops.h.src6
-rw-r--r--numpy/core/src/umath/loops_unary_fp.dispatch.c.src21
-rw-r--r--numpy/core/src/umath/simd.inc.src22
5 files changed, 26 insertions, 33 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index 3a27a34cd..292d9e0d3 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -827,7 +827,7 @@ defdict = {
docstrings.get('numpy.core.umath.ceil'),
None,
TD('e', f='ceil', astype={'e': 'f'}),
- TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]),
+ TD(inexactvec, dispatch=[('loops_unary_fp', 'fd')]),
TD('fdg', f='ceil'),
TD(O, f='npy_ObjectCeil'),
),
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 7c0710819..aaa694f34 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1532,8 +1532,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const *
*/
/**begin repeat
- * #func = rint, ceil, floor, trunc#
- * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ * #func = rint, floor, trunc#
+ * #scalarf = npy_rint, npy_floor, npy_trunc#
*/
/**begin repeat1
@@ -1568,8 +1568,8 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void
*/
/**begin repeat2
- * #func = rint, ceil, floor, trunc#
- * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
+ * #func = rint, floor, trunc#
+ * #scalarf = npy_rint, npy_floor, npy_trunc#
*/
NPY_NO_EXPORT NPY_GCC_OPT_3 void
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index 0938cd050..081ca9957 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -187,7 +187,7 @@ NPY_NO_EXPORT void
* #TYPE = FLOAT, DOUBLE#
*/
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@,
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)))
@@ -228,7 +228,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@,
/**end repeat**/
/**begin repeat
- * #func = sin, cos#
+ * #func = sin, cos#
*/
NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@,
@@ -275,7 +275,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (
/**end repeat**/
/**begin repeat
- * #func = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
*/
/**begin repeat1
diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
index 2d5917282..789733fb6 100644
--- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
+++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src
@@ -1,6 +1,8 @@
/*@targets
** $maxopt baseline
- ** sse2 vsx2 neon
+ ** sse2 sse41
+ ** vsx2
+ ** neon asimd
**/
/**
* Force use SSE only on x86, even if AVX2 or AVX512F are enabled
@@ -65,6 +67,9 @@ NPY_FINLINE double c_square_f64(double a)
#define c_sqrt_f64 npy_sqrt
#endif
+#define c_ceil_f32 npy_ceilf
+#define c_ceil_f64 npy_ceil
+
/********************************************************************************
** Defining the SIMD kernels
********************************************************************************/
@@ -134,10 +139,10 @@ NPY_FINLINE double c_square_f64(double a)
*/
#if @VCHK@
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
- * #intr = sqrt, abs, square, recip#
- * #repl_0w1 = 0, 0, 0, 1#
- * #RECIP_WORKAROUND = 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
+ * #intr = ceil, sqrt, abs, square, recip#
+ * #repl_0w1 = 0, 0, 0, 0, 1#
+ * #RECIP_WORKAROUND = 0, 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG#
*/
/**begin repeat2
* #STYPE = CONTIG, NCONTIG, CONTIG, NCONTIG#
@@ -245,9 +250,9 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@
* #VCHK = NPY_SIMD, NPY_SIMD_F64#
*/
/**begin repeat1
- * #kind = sqrt, absolute, square, reciprocal#
- * #intr = sqrt, abs, square, recip#
- * #clear = 0, 1, 0, 0#
+ * #kind = ceil, sqrt, absolute, square, reciprocal#
+ * #intr = ceil, sqrt, abs, square, recip#
+ * #clear = 0, 0, 1, 0, 0#
*/
NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@)
(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index d47be9a30..0e2c1ab8b 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -169,7 +169,7 @@ run_@func@_avx512_skx_@TYPE@(char **args, npy_intp const *dimensions, npy_intp c
*/
/**begin repeat2
- * #func = rint, floor, ceil, trunc#
+ * #func = rint, floor, trunc#
*/
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
@@ -850,12 +850,6 @@ fma_floor_@vsub@(@vtype@ x)
}
NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
-fma_ceil_@vsub@(@vtype@ x)
-{
- return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF);
-}
-
-NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@
fma_trunc_@vsub@(@vtype@ x)
{
return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO);
@@ -988,12 +982,6 @@ avx512_floor_@vsub@(@vtype@ x)
}
NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
-avx512_ceil_@vsub@(@vtype@ x)
-{
- return _mm512_roundscale_@vsub@(x, 0x0A);
-}
-
-NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@
avx512_trunc_@vsub@(@vtype@ x)
{
return _mm512_roundscale_@vsub@(x, 0x0B);
@@ -1327,8 +1315,8 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s
*/
/**begin repeat1
- * #func = rint, ceil, floor, trunc#
- * #vectorf = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
+ * #vectorf = rint, floor, trunc#
*/
#if defined @CHK@
@@ -1398,8 +1386,8 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
*/
/**begin repeat1
- * #func = rint, ceil, floor, trunc#
- * #vectorf = rint, ceil, floor, trunc#
+ * #func = rint, floor, trunc#
+ * #vectorf = rint, floor, trunc#
*/
#if defined @CHK@