diff options
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/core/code_generators/generate_umath.py | 2 | ||||
| -rw-r--r-- | numpy/core/src/umath/loops.c.src | 8 | ||||
| -rw-r--r-- | numpy/core/src/umath/loops.h.src | 6 | ||||
| -rw-r--r-- | numpy/core/src/umath/loops_unary_fp.dispatch.c.src | 21 | ||||
| -rw-r--r-- | numpy/core/src/umath/simd.inc.src | 22 |
5 files changed, 26 insertions, 33 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 3a27a34cd..292d9e0d3 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -827,7 +827,7 @@ defdict = { docstrings.get('numpy.core.umath.ceil'), None, TD('e', f='ceil', astype={'e': 'f'}), - TD(inexactvec, simd=[('fma', 'fd'), ('avx512f', 'fd')]), + TD(inexactvec, dispatch=[('loops_unary_fp', 'fd')]), TD('fdg', f='ceil'), TD(O, f='npy_ObjectCeil'), ), diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 7c0710819..aaa694f34 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1532,8 +1532,8 @@ TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const * */ /**begin repeat - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ /**begin repeat1 @@ -1568,8 +1568,8 @@ NPY_NO_EXPORT NPY_GCC_OPT_3 void */ /**begin repeat2 - * #func = rint, ceil, floor, trunc# - * #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc# + * #func = rint, floor, trunc# + * #scalarf = npy_rint, npy_floor, npy_trunc# */ NPY_NO_EXPORT NPY_GCC_OPT_3 void diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index 0938cd050..081ca9957 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -187,7 +187,7 @@ NPY_NO_EXPORT void * #TYPE = FLOAT, DOUBLE# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# + * #kind = ceil, sqrt, absolute, square, reciprocal# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))) @@ -228,7 +228,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@func@, /**end repeat**/ /**begin repeat - * #func = sin, cos# + * #func = sin, cos# */ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void DOUBLE_@func@, @@ -275,7 +275,7 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT void @TYPE@_@kind@, ( /**end repeat**/ /**begin repeat - * #func = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# */ /**begin repeat1 diff --git a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src index 2d5917282..789733fb6 100644 --- a/numpy/core/src/umath/loops_unary_fp.dispatch.c.src +++ b/numpy/core/src/umath/loops_unary_fp.dispatch.c.src @@ -1,6 +1,8 @@ /*@targets ** $maxopt baseline - ** sse2 vsx2 neon + ** sse2 sse41 + ** vsx2 + ** neon asimd **/ /** * Force use SSE only on x86, even if AVX2 or AVX512F are enabled @@ -65,6 +67,9 @@ NPY_FINLINE double c_square_f64(double a) #define c_sqrt_f64 npy_sqrt #endif +#define c_ceil_f32 npy_ceilf +#define c_ceil_f64 npy_ceil + /******************************************************************************** ** Defining the SIMD kernels ********************************************************************************/ @@ -134,10 +139,10 @@ NPY_FINLINE double c_square_f64(double a) */ #if @VCHK@ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #repl_0w1 = 0, 0, 0, 1# - * #RECIP_WORKAROUND = 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #repl_0w1 = 0, 0, 0, 0, 1# + * #RECIP_WORKAROUND = 0, 0, 0, 0, WORKAROUND_CLANG_RECIPROCAL_BUG# */ /**begin repeat2 * #STYPE = CONTIG, NCONTIG, CONTIG, NCONTIG# @@ -245,9 +250,9 @@ static void simd_@TYPE@_@kind@_@STYPE@_@DTYPE@ * #VCHK = NPY_SIMD, NPY_SIMD_F64# */ /**begin repeat1 - * #kind = sqrt, absolute, square, reciprocal# - * #intr = sqrt, abs, square, recip# - * #clear = 0, 1, 0, 0# + * #kind = ceil, sqrt, absolute, square, reciprocal# + * #intr = ceil, sqrt, abs, square, recip# + * #clear = 0, 0, 1, 0, 0# */ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_@kind@) (char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index d47be9a30..0e2c1ab8b 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -169,7 +169,7 @@ run_@func@_avx512_skx_@TYPE@(char **args, npy_intp const *dimensions, npy_intp c */ /**begin repeat2 - * #func = rint, floor, ceil, trunc# + * #func = rint, floor, trunc# */ #if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS @@ -850,12 +850,6 @@ fma_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ -fma_ceil_@vsub@(@vtype@ x) -{ - return _mm256_round_@vsub@(x, _MM_FROUND_TO_POS_INF); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_FMA @vtype@ fma_trunc_@vsub@(@vtype@ x) { return _mm256_round_@vsub@(x, _MM_FROUND_TO_ZERO); @@ -988,12 +982,6 @@ avx512_floor_@vsub@(@vtype@ x) } NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ -avx512_ceil_@vsub@(@vtype@ x) -{ - return _mm512_roundscale_@vsub@(x, 0x0A); -} - -NPY_FINLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F @vtype@ avx512_trunc_@vsub@(@vtype@ x) { return _mm512_roundscale_@vsub@(x, 0x0B); @@ -1327,8 +1315,8 @@ AVX512F_@func@_@TYPE@(char **args, npy_intp const *dimensions, npy_intp const *s */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ @@ -1398,8 +1386,8 @@ static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void */ /**begin repeat1 - * #func = rint, ceil, floor, trunc# - * #vectorf = rint, ceil, floor, trunc# + * #func = rint, floor, trunc# + * #vectorf = rint, floor, trunc# */ #if defined @CHK@ |
