diff options
| author | mattip <matti.picus@gmail.com> | 2018-10-25 13:36:40 +0300 |
|---|---|---|
| committer | mattip <matti.picus@gmail.com> | 2018-10-25 19:04:21 +0300 |
| commit | 24c9faac737321933022104843a435ccb329cc0b (patch) | |
| tree | 5beeaecae72ae119b633a5eb7747f30669e856d8 | |
| parent | da3d162650abbf5cba096e6dacd5596397c1d23f (diff) | |
| download | numpy-24c9faac737321933022104843a435ccb329cc0b.tar.gz | |
MAINT: rearrange logic to not set FP invalid register where possible
| -rw-r--r-- | numpy/core/src/umath/loops.c.src | 15 | ||||
| -rw-r--r-- | numpy/core/src/umath/simd.inc.src | 7 |
2 files changed, 10 insertions, 12 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 643cad7c8..6393e7402 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1833,7 +1833,7 @@ NPY_NO_EXPORT void if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; - io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2; + io1 = (npy_isnan(io1) || io1 @OP@ in2) ? io1 : in2; } *((@type@ *)iop1) = io1; } @@ -1842,11 +1842,10 @@ NPY_NO_EXPORT void BINARY_LOOP { @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2; + in1 = (npy_isnan(in1) || in1 @OP@ in2) ? in1 : in2; *((@type@ *)op1) = in1; } } - npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -1861,7 +1860,7 @@ NPY_NO_EXPORT void if (IS_BINARY_REDUCE) { BINARY_REDUCE_LOOP(@type@) { const @type@ in2 = *(@type@ *)ip2; - io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2; + io1 = (npy_isnan(in2) || io1 @OP@ in2) ? io1 : in2; } *((@type@ *)iop1) = io1; } @@ -1869,10 +1868,9 @@ NPY_NO_EXPORT void BINARY_LOOP { const @type@ in1 = *(@type@ *)ip1; const @type@ in2 = *(@type@ *)ip2; - *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2; + *((@type@ *)op1) = (npy_isnan(in2) || in1 @OP@ in2) ? in1 : in2; } } - npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -2753,14 +2751,13 @@ NPY_NO_EXPORT void @ftype@ in1i = ((@ftype@ *)ip1)[1]; const @ftype@ in2r = ((@ftype@ *)ip2)[0]; const @ftype@ in2i = ((@ftype@ *)ip2)[1]; - if ( !(@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in1r) || npy_isnan(in1i))) { + if ( !(npy_isnan(in1r) || npy_isnan(in1i) || @OP@(in1r, in1i, in2r, in2i))) { in1r = in2r; in1i = in2i; } ((@ftype@ *)op1)[0] = in1r; ((@ftype@ *)op1)[1] = in1i; } - npy_clear_floatstatus_barrier((char*)dimensions); } /**end repeat1**/ @@ -2776,7 +2773,7 @@ NPY_NO_EXPORT void const @ftype@ in1i = ((@ftype@ *)ip1)[1]; const @ftype@ in2r = ((@ftype@ *)ip2)[0]; const @ftype@ in2i = ((@ftype@ *)ip2)[1]; - if (@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in2r) || npy_isnan(in2i)) { + if (npy_isnan(in2r) || npy_isnan(in2i) || @OP@(in1r, in1i, in2r, in2i)) { ((@ftype@ *)op1)[0] = in1r; ((@ftype@ *)op1)[1] = in1i; } diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 1d2b87f64..da0713b2b 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -1014,7 +1014,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) { const npy_intp stride = 16 / (npy_intp)sizeof(@type@); LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) { - *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; + *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i]; } assert(n < (stride) || npy_is_aligned(&ip[i], 16)); if (i + 3 * stride <= n) { @@ -1038,12 +1038,13 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) } else { @type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1); - *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp; + *op = (npy_isnan(*op) || *op @OP@ tmp) ? *op : tmp; } } LOOP_BLOCKED_END { - *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; + *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i]; } + npy_clear_floatstatus_barrier((char*)op); } /**end repeat1**/ |
