diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-06-07 19:21:03 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-06-08 20:44:05 +0200 |
commit | abad5e3a753a2d0f5bbd7bdf4e8769cf9a4ef02d (patch) | |
tree | e59f0f70b88e513b366811b2814c95a800c0f613 | |
parent | ac8fad529af6bee86cace5ea56490c0ab007b93d (diff) | |
download | numpy-abad5e3a753a2d0f5bbd7bdf4e8769cf9a4ef02d.tar.gz |
MAINT: move vectorized sqrt to own static function
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 53 |
1 files changed, 34 insertions, 19 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 687c62987..de78904bb 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1305,30 +1305,45 @@ TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void * * #vtype = __m128, __m128d# * #vsuf = ps, pd# */ + +#ifdef HAVE_EMMINTRIN_H + +#define NPY_HAVE_SIMD_@TYPE@ + +static void +sse2_sqrt_@TYPE@(char **args, npy_intp *dimensions, npy_intp *steps) +{ + UNARY_LOOP_BLOCK_ALIGN_OUT(@type@, 16) { + op1[i] = @scalarf@(ip1[i]); + } + assert(npy_is_aligned(&op1[i], 16)); + if (npy_is_aligned(&ip1[i], 16)) { + UNARY_LOOP_BLOCKED(@type@, 16) { + @vtype@ d = _mm_load_@vsuf@(&ip1[i]); + _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); + } + } + else { + UNARY_LOOP_BLOCKED(@type@, 16) { + @vtype@ d = _mm_loadu_@vsuf@(&ip1[i]); + _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); + } + } + UNARY_LOOP_BLOCKED_END { + op1[i] = @scalarf@(ip1[i]); + } +} + + +#endif + + NPY_NO_EXPORT void @TYPE@_sqrt(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) { #ifdef HAVE_EMMINTRIN_H if (IS_BLOCKABLE_UNARY(sizeof(@type@), 16)) { - UNARY_LOOP_BLOCK_ALIGN_OUT(@type@, 16) { - op1[i] = @scalarf@(ip1[i]); - } - assert(npy_is_aligned(&op1[i], 16)); - if (npy_is_aligned(&ip1[i], 16)) { - UNARY_LOOP_BLOCKED(@type@, 16) { - @vtype@ d = _mm_load_@vsuf@(&ip1[i]); - _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); - } - } - else { - UNARY_LOOP_BLOCKED(@type@, 16) { - @vtype@ d = _mm_loadu_@vsuf@(&ip1[i]); - _mm_store_@vsuf@(&op1[i], _mm_sqrt_@vsuf@(d)); - } - } - UNARY_LOOP_BLOCKED_END { - op1[i] = @scalarf@(ip1[i]); - } + sse2_sqrt_@TYPE@(args, dimensions, steps); } else #endif |