summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2022-12-07 22:26:35 -0800
committerDeveloper-Ecosystem-Engineering <65677710+Developer-Ecosystem-Engineering@users.noreply.github.com>2022-12-07 22:32:52 -0800
commit0609a34aec0d2f4b0e404c84352dfb979baea3de (patch)
tree53eff007a185824849ea7a14245b1fb9bb10949b
parent2de88654b6d992fe949b7e9a68793fdb74364458 (diff)
downloadnumpy-0609a34aec0d2f4b0e404c84352dfb979baea3de.tar.gz
Use mask_to_true() for logical_not to save a few ops
-rw-r--r--numpy/core/src/umath/loops_logical.dispatch.c.src24
1 files changed, 16 insertions, 8 deletions
diff --git a/numpy/core/src/umath/loops_logical.dispatch.c.src b/numpy/core/src/umath/loops_logical.dispatch.c.src
index 94b0c4594..793a2af19 100644
--- a/numpy/core/src/umath/loops_logical.dispatch.c.src
+++ b/numpy/core/src/umath/loops_logical.dispatch.c.src
@@ -35,6 +35,16 @@ NPY_FINLINE npyv_u8 byte_to_true(npyv_u8 v)
// tmp is filled with 0xff/0x00, negate and mask to boolean true
return npyv_andc_u8(truemask, tmp);
}
+/*
+ * convert mask vector (0xff/0x00) to boolean true. similar to byte_to_true(),
+ * but we've already got a mask and can skip negation.
+ */
+NPY_FINLINE npyv_u8 mask_to_true(npyv_b8 v)
+{
+ const npyv_u8 truemask = npyv_setall_u8(1 == 1);
+ return npyv_and_u8(truemask, npyv_cvt_u8_b8(v));
+}
+
/**begin repeat
* #kind = logical_and, logical_or#
@@ -165,12 +175,11 @@ simd_@kind@_BOOL(npy_bool * op, npy_bool * ip, npy_intp len)
#if UNROLL > @unroll@
npyv_u8 v@unroll@ = npyv_load_u8(ip + vstep * @unroll@);
#if @not@
- npyv_u8 r@unroll@ = npyv_cvt_u8_b8(npyv_cmpeq_u8(v@unroll@, zero));
+ npyv_u8 r@unroll@ = mask_to_true(npyv_cmpeq_u8(v@unroll@, zero));
#else
- // abs is kind of pointless but maybe its used for byte_to_true below
- npyv_u8 r@unroll@ = v@unroll@;
+ npyv_u8 r@unroll@ = byte_to_true(v@unroll@);
#endif
- npyv_store_u8(op + vstep * @unroll@, byte_to_true(r@unroll@));
+ npyv_store_u8(op + vstep * @unroll@, r@unroll@);
#endif
/**end repeat1**/
}
@@ -180,12 +189,11 @@ simd_@kind@_BOOL(npy_bool * op, npy_bool * ip, npy_intp len)
for (; len >= vstep; len -= vstep, ip += vstep, op += vstep) {
npyv_u8 v = npyv_load_u8(ip);
#if @not@
- npyv_u8 r = npyv_cvt_u8_b8(npyv_cmpeq_u8(v, zero));
+ npyv_u8 r = mask_to_true(npyv_cmpeq_u8(v, zero));
#else
- // abs is kind of pointless but maybe its used for byte_to_true below
- npyv_u8 r = v;
+ npyv_u8 r = byte_to_true(v);
#endif
- npyv_store_u8(op, byte_to_true(r));
+ npyv_store_u8(op, r);
}
// Scalar loop to finish off