summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormattip <matti.picus@gmail.com>2018-10-25 13:36:40 +0300
committermattip <matti.picus@gmail.com>2018-10-25 19:04:21 +0300
commit24c9faac737321933022104843a435ccb329cc0b (patch)
tree5beeaecae72ae119b633a5eb7747f30669e856d8
parentda3d162650abbf5cba096e6dacd5596397c1d23f (diff)
downloadnumpy-24c9faac737321933022104843a435ccb329cc0b.tar.gz
MAINT: rearrange logic to not set FP invalid register where possible
-rw-r--r--numpy/core/src/umath/loops.c.src15
-rw-r--r--numpy/core/src/umath/simd.inc.src7
2 files changed, 10 insertions, 12 deletions
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 643cad7c8..6393e7402 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1833,7 +1833,7 @@ NPY_NO_EXPORT void
if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
- io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
+ io1 = (npy_isnan(io1) || io1 @OP@ in2) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
@@ -1842,11 +1842,10 @@ NPY_NO_EXPORT void
BINARY_LOOP {
@type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
- in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
+ in1 = (npy_isnan(in1) || in1 @OP@ in2) ? in1 : in2;
*((@type@ *)op1) = in1;
}
}
- npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -1861,7 +1860,7 @@ NPY_NO_EXPORT void
if (IS_BINARY_REDUCE) {
BINARY_REDUCE_LOOP(@type@) {
const @type@ in2 = *(@type@ *)ip2;
- io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2;
+ io1 = (npy_isnan(in2) || io1 @OP@ in2) ? io1 : in2;
}
*((@type@ *)iop1) = io1;
}
@@ -1869,10 +1868,9 @@ NPY_NO_EXPORT void
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
- *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
+ *((@type@ *)op1) = (npy_isnan(in2) || in1 @OP@ in2) ? in1 : in2;
}
}
- npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -2753,14 +2751,13 @@ NPY_NO_EXPORT void
@ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
- if ( !(@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in1r) || npy_isnan(in1i))) {
+ if ( !(npy_isnan(in1r) || npy_isnan(in1i) || @OP@(in1r, in1i, in2r, in2i))) {
in1r = in2r;
in1i = in2i;
}
((@ftype@ *)op1)[0] = in1r;
((@ftype@ *)op1)[1] = in1i;
}
- npy_clear_floatstatus_barrier((char*)dimensions);
}
/**end repeat1**/
@@ -2776,7 +2773,7 @@ NPY_NO_EXPORT void
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
const @ftype@ in2r = ((@ftype@ *)ip2)[0];
const @ftype@ in2i = ((@ftype@ *)ip2)[1];
- if (@OP@(in1r, in1i, in2r, in2i) || npy_isnan(in2r) || npy_isnan(in2i)) {
+ if (npy_isnan(in2r) || npy_isnan(in2i) || @OP@(in1r, in1i, in2r, in2i)) {
((@ftype@ *)op1)[0] = in1r;
((@ftype@ *)op1)[1] = in1i;
}
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 1d2b87f64..da0713b2b 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -1014,7 +1014,7 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
{
const npy_intp stride = 16 / (npy_intp)sizeof(@type@);
LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) {
- *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
+ *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i];
}
assert(n < (stride) || npy_is_aligned(&ip[i], 16));
if (i + 3 * stride <= n) {
@@ -1038,12 +1038,13 @@ sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n)
}
else {
@type@ tmp = sse2_horizontal_@VOP@_@vtype@(c1);
- *op = (*op @OP@ tmp || npy_isnan(*op)) ? *op : tmp;
+ *op = (npy_isnan(*op) || *op @OP@ tmp) ? *op : tmp;
}
}
LOOP_BLOCKED_END {
- *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i];
+ *op = (npy_isnan(*op) || *op @OP@ ip[i]) ? *op : ip[i];
}
+ npy_clear_floatstatus_barrier((char*)op);
}
/**end repeat1**/