diff options
-rw-r--r-- | benchmarks/benchmarks/bench_ufunc.py | 39 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 75 | ||||
-rw-r--r-- | numpy/core/tests/test_scalarmath.py | 20 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 3 | ||||
-rw-r--r-- | numpy/testing/utils.py | 9 |
5 files changed, 103 insertions, 43 deletions
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py index 0140718e0..1baee1340 100644 --- a/benchmarks/benchmarks/bench_ufunc.py +++ b/benchmarks/benchmarks/bench_ufunc.py @@ -77,6 +77,45 @@ class Custom(Benchmark): (self.b | self.b) +class CustomInplace(Benchmark): + def setup(self): + self.c = np.ones(500000, dtype=np.int8) + self.i = np.ones(150000, dtype=np.int32) + self.f = np.zeros(150000, dtype=np.float32) + self.d = np.zeros(75000, dtype=np.float64) + # fault memory + self.f *= 1. + self.d *= 1. + + def time_char_or(self): + np.bitwise_or(self.c, 0, out=self.c) + np.bitwise_or(0, self.c, out=self.c) + + def time_char_or_temp(self): + 0 | self.c | 0 + + def time_int_or(self): + np.bitwise_or(self.i, 0, out=self.i) + np.bitwise_or(0, self.i, out=self.i) + + def time_int_or_temp(self): + 0 | self.i | 0 + + def time_float_add(self): + np.add(self.f, 1., out=self.f) + np.add(1., self.f, out=self.f) + + def time_float_add_temp(self): + 1. + self.f + 1. + + def time_double_add(self): + np.add(self.d, 1., out=self.d) + np.add(1., self.d, out=self.d) + + def time_double_add_temp(self): + 1. + self.d + 1. + + class CustomScalar(Benchmark): params = [np.float32, np.float64] param_names = ['dtype'] diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 157b30e70..2720c361f 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -87,22 +87,25 @@ * combine with NPY_GCC_OPT_3 to allow autovectorization * should only be used where its worthwhile to avoid code bloat */ +#define BASE_UNARY_LOOP(tin, tout, op) \ + UNARY_LOOP { \ + const tin in = *(tin *)ip1; \ + tout * out = (tout *)op1; \ + op; \ + } #define UNARY_LOOP_FAST(tin, tout, op) \ do { \ /* condition allows compiler to optimize the generic macro */ \ if (IS_UNARY_CONT(tin, tout)) { \ - UNARY_LOOP { \ - const tin in = *(tin *)ip1; \ - tout * out = (tout *)op1; \ - op; \ + if (args[0] == args[1]) { \ + BASE_UNARY_LOOP(tin, tout, op) \ + } \ + else { \ + BASE_UNARY_LOOP(tin, tout, op) \ } \ } \ else { \ - UNARY_LOOP { \ - const tin in = *(tin *)ip1; \ - tout * out = (tout *)op1; \ - op; \ - } \ + BASE_UNARY_LOOP(tin, tout, op) \ } \ } \ while (0) @@ -128,40 +131,52 @@ * combine with NPY_GCC_OPT_3 to allow autovectorization * should only be used where its worthwhile to avoid code bloat */ +#define BASE_BINARY_LOOP(tin, tout, op) \ + BINARY_LOOP { \ + const tin in1 = *(tin *)ip1; \ + const tin in2 = *(tin *)ip2; \ + tout * out = (tout *)op1; \ + op; \ + } +#define BASE_BINARY_LOOP_S(tin, tout, cin, cinp, vin, vinp, op) \ + const tin cin = *(tin *)cinp; \ + BINARY_LOOP { \ + const tin vin = *(tin *)vinp; \ + tout * out = (tout *)op1; \ + op; \ + } #define BINARY_LOOP_FAST(tin, tout, op) \ do { \ /* condition allows compiler to optimize the generic macro */ \ if (IS_BINARY_CONT(tin, tout)) { \ - BINARY_LOOP { \ - const tin in1 = *(tin *)ip1; \ - const tin in2 = *(tin *)ip2; \ - tout * out = (tout *)op1; \ - op; \ + if (args[2] == args[0]) { \ + BASE_BINARY_LOOP(tin, tout, op) \ + } \ + else if (args[2] == args[1]) { \ + BASE_BINARY_LOOP(tin, tout, op) \ + } \ + else { \ + BASE_BINARY_LOOP(tin, tout, op) \ } \ } \ else if (IS_BINARY_CONT_S1(tin, tout)) { \ - const tin in1 = *(tin *)args[0]; \ - BINARY_LOOP { \ - const tin in2 = *(tin *)ip2; \ - tout * out = (tout *)op1; \ - op; \ + if (args[1] == args[2]) { \ + BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \ + } \ + else { \ + BASE_BINARY_LOOP_S(tin, tout, in1, args[0], in2, ip2, op) \ } \ } \ else if (IS_BINARY_CONT_S2(tin, tout)) { \ - const tin in2 = *(tin *)args[1]; \ - BINARY_LOOP { \ - const tin in1 = *(tin *)ip1; \ - tout * out = (tout *)op1; \ - op; \ + if (args[0] == args[2]) { \ + BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \ } \ + else { \ + BASE_BINARY_LOOP_S(tin, tout, in2, args[1], in1, ip1, op) \ + }\ } \ else { \ - BINARY_LOOP { \ - const tin in1 = *(tin *)ip1; \ - const tin in2 = *(tin *)ip2; \ - tout * out = (tout *)op1; \ - op; \ - } \ + BASE_BINARY_LOOP(tin, tout, op) \ } \ } \ while (0) diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index 6b55f0101..87b050bb9 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -65,7 +65,7 @@ class TestBaseMath(TestCase): def test_blocked(self): # test alignments offsets for simd instructions # alignments for vz + 2 * (vs - 1) + 1 - for dt, sz in [(np.float32, 11), (np.float64, 7)]: + for dt, sz in [(np.float32, 11), (np.float64, 7), (np.int32, 11)]: for out, inp1, inp2, msg in _gen_alignment_data(dtype=dt, type='binary', max_size=sz): @@ -73,7 +73,7 @@ class TestBaseMath(TestCase): inp1[...] = np.ones_like(inp1) inp2[...] = np.zeros_like(inp2) assert_almost_equal(np.add(inp1, inp2), exp1, err_msg=msg) - assert_almost_equal(np.add(inp1, 1), exp1 + 1, err_msg=msg) + assert_almost_equal(np.add(inp1, 2), exp1 + 2, err_msg=msg) assert_almost_equal(np.add(1, inp2), exp1, err_msg=msg) np.add(inp1, inp2, out=out) @@ -82,15 +82,17 @@ class TestBaseMath(TestCase): inp2[...] += np.arange(inp2.size, dtype=dt) + 1 assert_almost_equal(np.square(inp2), np.multiply(inp2, inp2), err_msg=msg) - assert_almost_equal(np.reciprocal(inp2), - np.divide(1, inp2), err_msg=msg) + # skip true divide for ints + if dt != np.int32 or sys.version_info.major < 3: + assert_almost_equal(np.reciprocal(inp2), + np.divide(1, inp2), err_msg=msg) inp1[...] = np.ones_like(inp1) - inp2[...] = np.zeros_like(inp2) - np.add(inp1, 1, out=out) - assert_almost_equal(out, exp1 + 1, err_msg=msg) - np.add(1, inp2, out=out) - assert_almost_equal(out, exp1, err_msg=msg) + np.add(inp1, 2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) + inp2[...] = np.ones_like(inp2) + np.add(2, inp2, out=out) + assert_almost_equal(out, exp1 + 2, err_msg=msg) def test_lower_align(self): # check data that is not aligned to element size diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 20bcec241..a800de918 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1201,8 +1201,9 @@ class TestAbsoluteNegative(TestCase): assert_array_equal(out, d, err_msg=msg) assert_array_equal(-inp, -1*inp, err_msg=msg) + d = -1 * inp np.negative(inp, out=out) - assert_array_equal(out, -1*inp, err_msg=msg) + assert_array_equal(out, d, err_msg=msg) def test_lower_align(self): # check data that is not aligned to element size diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index 682b032b4..683a4f0a6 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -1794,7 +1794,8 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24): inp = lambda: arange(s, dtype=dtype)[o:] out = empty((s,), dtype=dtype)[o:] yield out, inp(), ufmt % (o, o, s, dtype, 'out of place') - yield inp(), inp(), ufmt % (o, o, s, dtype, 'in place') + d = inp() + yield d, d, ufmt % (o, o, s, dtype, 'in place') yield out[1:], inp()[:-1], ufmt % \ (o + 1, o, s - 1, dtype, 'out of place') yield out[:-1], inp()[1:], ufmt % \ @@ -1809,9 +1810,11 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24): out = empty((s,), dtype=dtype)[o:] yield out, inp1(), inp2(), bfmt % \ (o, o, o, s, dtype, 'out of place') - yield inp1(), inp1(), inp2(), bfmt % \ + d = inp1() + yield d, d, inp2(), bfmt % \ (o, o, o, s, dtype, 'in place1') - yield inp2(), inp1(), inp2(), bfmt % \ + d = inp2() + yield d, inp1(), d, bfmt % \ (o, o, o, s, dtype, 'in place2') yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \ (o + 1, o, o, s - 1, dtype, 'out of place') |