summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--benchmarks/benchmarks/bench_ufunc.py39
-rw-r--r--numpy/core/src/umath/loops.c.src75
-rw-r--r--numpy/core/tests/test_scalarmath.py20
-rw-r--r--numpy/core/tests/test_umath.py3
-rw-r--r--numpy/testing/utils.py9
5 files changed, 103 insertions, 43 deletions
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
index 0140718e0..1baee1340 100644
--- a/benchmarks/benchmarks/bench_ufunc.py
+++ b/benchmarks/benchmarks/bench_ufunc.py
@@ -77,6 +77,45 @@ class Custom(Benchmark):
(self.b | self.b)
+class CustomInplace(Benchmark):
+ def setup(self):
+ self.c = np.ones(500000, dtype=np.int8)
+ self.i = np.ones(150000, dtype=np.int32)
+ self.f = np.zeros(150000, dtype=np.float32)
+ self.d = np.zeros(75000, dtype=np.float64)
+ # fault memory
+ self.f *= 1.
+ self.d *= 1.
+
+ def time_char_or(self):
+ np.bitwise_or(self.c, 0, out=self.c)
+ np.bitwise_or(0, self.c, out=self.c)
+
+ def time_char_or_temp(self):
+ 0 | self.c | 0
+
+ def time_int_or(self):
+ np.bitwise_or(self.i, 0, out=self.i)
+ np.bitwise_or(0, self.i, out=self.i)
+
+ def time_int_or_temp(self):
+ 0 | self.i | 0
+
+ def time_float_add(self):
+ np.add(self.f, 1., out=self.f)
+ np.add(1., self.f, out=self.f)
+
+ def time_float_add_temp(self):
+ 1. + self.f + 1.
+
+ def time_double_add(self):
+ np.add(self.d, 1., out=self.d)
+ np.add(1., self.d, out=self.d)
+
+ def time_double_add_temp(self):
+ 1. + self.d + 1.
+
+
class CustomScalar(Benchmark):
params = [np.float32, np.float64]
param_names = ['dtype']
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 157b30e70..2720c361f 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -87,22 +87,25 @@
* combine with NPY_GCC_OPT_3 to allow autovectorization
* should only be used where its worthwhile to avoid code bloat
*/
+#define BASE_UNARY_LOOP(tin, tout, op) \
+ UNARY_LOOP { \
+ const tin in = *(tin *)ip1; \
+ tout * out = (tout *)op1; \
+ op; \
+ }
#define UNARY_LOOP_FAST(tin, tout, op) \
do { \
/* condition allows compiler to optimize the generic macro */ \
if (IS_UNARY_CONT(tin, tout)) { \
- UNARY_LOOP { \
- const tin in = *(tin *)ip1; \
- tout * out = (tout *)op1; \
- op; \
+ if (args[0] == args[1]) { \
+ BASE_UNARY_LOOP(tin, tout, op) \
+ } \
+ else { \
+ BASE_UNARY_LOOP(tin, tout, op) \
} \
} \
else { \
- UNARY_LOOP { \
- const tin in = *(tin *)ip1; \
- tout * out = (tout *)op1; \
- op; \
- } \
+ BASE_UNARY_LOOP(tin, tout, op) \
} \
} \
while (0)
@@ -128,40 +131,52 @@
* combine with NPY_GCC_OPT_3 to allow autovectorization
* should only be used where its worthwhile to avoid code bloat
*/
+#define BASE_BINARY_LOOP(tin, tout, op) \
+ BINARY_LOOP { \
+ const tin in1 = *(tin *)ip1; \
+ const tin in2 = *(tin *)ip2; \
+ tout * out = (tout *)op1; \
+ op; \
+ }
+#define BASE_BINARY_LOOP_S(tin, tout, cin, cinp, vin, vinp, op) \
+ const tin cin = *(tin *)cinp; \
+ BINARY_LOOP { \
+ const tin vin = *(tin *)vinp; \
+ tout * out = (tout *)op1; \
+ op; \
+ }
#define BINARY_LOOP_FAST(tin, tout, op) \
do { \
/* condition allows compiler to optimize the generic macro */ \
if (IS_BINARY_CONT(tin, tout)) { \
- BINARY_LOOP { \
- const tin in1 = *(tin *)ip1; \
- const tin in2 = *(tin *)ip2; \
- tout * out = (tout *)op1; \
- op; \
+ if (args[2] == args[0]) { \
+ BASE_BINARY_LOOP(tin, tout, op) \
+ } \
+ else if (args[2] == args[1]) { \
+ BASE_BINARY_LOOP(tin, tout, op) \
+ } \
+ else { \
+ BASE_BINARY_LOOP(tin, tout, op) \
} \
} \
else if (IS_BINARY_CONT_S1(tin, tout)) { \
- const tin in1 = *(tin *)args[0]; \
- BINARY_LOOP { \
- const tin in2 = *(tin *)ip2; \
- tout * out = (tout *)op1; \
- op; \
+ if (args[1] == args[2]) { \
+ BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \
+ } \
+ else { \
+ BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \
} \
} \
else if (IS_BINARY_CONT_S2(tin, tout)) { \
- const tin in2 = *(tin *)args[1]; \
- BINARY_LOOP { \
- const tin in1 = *(tin *)ip1; \
- tout * out = (tout *)op1; \
- op; \
+ if (args[0] == args[2]) { \
+ BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \
} \
+ else { \
+ BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \
+ }\
} \
else { \
- BINARY_LOOP { \
- const tin in1 = *(tin *)ip1; \
- const tin in2 = *(tin *)ip2; \
- tout * out = (tout *)op1; \
- op; \
- } \
+ BASE_BINARY_LOOP(tin, tout, op) \
} \
} \
while (0)
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index 6b55f0101..87b050bb9 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -65,7 +65,7 @@ class TestBaseMath(TestCase):
def test_blocked(self):
# test alignments offsets for simd instructions
# alignments for vz + 2 * (vs - 1) + 1
- for dt, sz in [(np.float32, 11), (np.float64, 7)]:
+ for dt, sz in [(np.float32, 11), (np.float64, 7), (np.int32, 11)]:
for out, inp1, inp2, msg in _gen_alignment_data(dtype=dt,
type='binary',
max_size=sz):
@@ -73,7 +73,7 @@ class TestBaseMath(TestCase):
inp1[...] = np.ones_like(inp1)
inp2[...] = np.zeros_like(inp2)
assert_almost_equal(np.add(inp1, inp2), exp1, err_msg=msg)
- assert_almost_equal(np.add(inp1, 1), exp1 + 1, err_msg=msg)
+ assert_almost_equal(np.add(inp1, 2), exp1 + 2, err_msg=msg)
assert_almost_equal(np.add(1, inp2), exp1, err_msg=msg)
np.add(inp1, inp2, out=out)
@@ -82,15 +82,17 @@ class TestBaseMath(TestCase):
inp2[...] += np.arange(inp2.size, dtype=dt) + 1
assert_almost_equal(np.square(inp2),
np.multiply(inp2, inp2), err_msg=msg)
- assert_almost_equal(np.reciprocal(inp2),
- np.divide(1, inp2), err_msg=msg)
+ # skip true divide for ints
+ if dt != np.int32 or sys.version_info.major < 3:
+ assert_almost_equal(np.reciprocal(inp2),
+ np.divide(1, inp2), err_msg=msg)
inp1[...] = np.ones_like(inp1)
- inp2[...] = np.zeros_like(inp2)
- np.add(inp1, 1, out=out)
- assert_almost_equal(out, exp1 + 1, err_msg=msg)
- np.add(1, inp2, out=out)
- assert_almost_equal(out, exp1, err_msg=msg)
+ np.add(inp1, 2, out=out)
+ assert_almost_equal(out, exp1 + 2, err_msg=msg)
+ inp2[...] = np.ones_like(inp2)
+ np.add(2, inp2, out=out)
+ assert_almost_equal(out, exp1 + 2, err_msg=msg)
def test_lower_align(self):
# check data that is not aligned to element size
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 20bcec241..a800de918 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -1201,8 +1201,9 @@ class TestAbsoluteNegative(TestCase):
assert_array_equal(out, d, err_msg=msg)
assert_array_equal(-inp, -1*inp, err_msg=msg)
+ d = -1 * inp
np.negative(inp, out=out)
- assert_array_equal(out, -1*inp, err_msg=msg)
+ assert_array_equal(out, d, err_msg=msg)
def test_lower_align(self):
# check data that is not aligned to element size
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index 682b032b4..683a4f0a6 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -1794,7 +1794,8 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24):
inp = lambda: arange(s, dtype=dtype)[o:]
out = empty((s,), dtype=dtype)[o:]
yield out, inp(), ufmt % (o, o, s, dtype, 'out of place')
- yield inp(), inp(), ufmt % (o, o, s, dtype, 'in place')
+ d = inp()
+ yield d, d, ufmt % (o, o, s, dtype, 'in place')
yield out[1:], inp()[:-1], ufmt % \
(o + 1, o, s - 1, dtype, 'out of place')
yield out[:-1], inp()[1:], ufmt % \
@@ -1809,9 +1810,11 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24):
out = empty((s,), dtype=dtype)[o:]
yield out, inp1(), inp2(), bfmt % \
(o, o, o, s, dtype, 'out of place')
- yield inp1(), inp1(), inp2(), bfmt % \
+ d = inp1()
+ yield d, d, inp2(), bfmt % \
(o, o, o, s, dtype, 'in place1')
- yield inp2(), inp1(), inp2(), bfmt % \
+ d = inp2()
+ yield d, inp1(), d, bfmt % \
(o, o, o, s, dtype, 'in place2')
yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \
(o + 1, o, o, s - 1, dtype, 'out of place')