diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2016-08-31 20:15:16 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2016-09-01 09:58:24 +0200 |
commit | fd298a341ddeb05c471c8dfc16f4cc641d08f8a7 (patch) | |
tree | 65ff792e70bd848d90250195ee2f03b50618d905 /benchmarks | |
parent | a93d9f7a97358e618aa52b2bbfa119317ee56d08 (diff) | |
download | numpy-fd298a341ddeb05c471c8dfc16f4cc641d08f8a7.tar.gz |
ENH: add inplace cases to fast ufunc loop macros
Both gcc and clang don't automatically specialize the inplace case, so
add extra conditions to the loop macros to get the compilers to emit
decent code.
Without them inplace code ends up much slower than the out of place
code.
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/benchmarks/bench_ufunc.py | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py index 0140718e0..1baee1340 100644 --- a/benchmarks/benchmarks/bench_ufunc.py +++ b/benchmarks/benchmarks/bench_ufunc.py @@ -77,6 +77,45 @@ class Custom(Benchmark): (self.b | self.b) +class CustomInplace(Benchmark): + def setup(self): + self.c = np.ones(500000, dtype=np.int8) + self.i = np.ones(150000, dtype=np.int32) + self.f = np.zeros(150000, dtype=np.float32) + self.d = np.zeros(75000, dtype=np.float64) + # fault memory + self.f *= 1. + self.d *= 1. + + def time_char_or(self): + np.bitwise_or(self.c, 0, out=self.c) + np.bitwise_or(0, self.c, out=self.c) + + def time_char_or_temp(self): + 0 | self.c | 0 + + def time_int_or(self): + np.bitwise_or(self.i, 0, out=self.i) + np.bitwise_or(0, self.i, out=self.i) + + def time_int_or_temp(self): + 0 | self.i | 0 + + def time_float_add(self): + np.add(self.f, 1., out=self.f) + np.add(1., self.f, out=self.f) + + def time_float_add_temp(self): + 1. + self.f + 1. + + def time_double_add(self): + np.add(self.d, 1., out=self.d) + np.add(1., self.d, out=self.d) + + def time_double_add_temp(self): + 1. + self.d + 1. + + class CustomScalar(Benchmark): params = [np.float32, np.float64] param_names = ['dtype'] |