diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/setup.py | 1 | ||||
-rw-r--r-- | numpy/core/setup_common.py | 4 | ||||
-rw-r--r-- | numpy/core/src/multiarray/number.c | 83 | ||||
-rw-r--r-- | numpy/core/src/multiarray/temp_elide.c | 391 | ||||
-rw-r--r-- | numpy/core/src/multiarray/temp_elide.h | 15 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 47 |
6 files changed, 536 insertions, 5 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index f45042bec..891ba2a19 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -824,6 +824,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'shape.c'), join('src', 'multiarray', 'scalarapi.c'), join('src', 'multiarray', 'scalartypes.c.src'), + join('src', 'multiarray', 'temp_elide.c'), join('src', 'multiarray', 'usertypes.c'), join('src', 'multiarray', 'ucsnarrow.c'), join('src', 'multiarray', 'vdot.c'), diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 357051cdb..7691a2aeb 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -107,7 +107,8 @@ MANDATORY_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", OPTIONAL_STDFUNCS = ["expm1", "log1p", "acosh", "asinh", "atanh", "rint", "trunc", "exp2", "log2", "hypot", "atan2", "pow", "copysign", "nextafter", "ftello", "fseeko", - "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate"] + "strtoll", "strtoull", "cbrt", "strtold_l", "fallocate", + "backtrace"] OPTIONAL_HEADERS = [ @@ -116,6 +117,7 @@ OPTIONAL_HEADERS = [ "emmintrin.h", # SSE2 "features.h", # for glibc version linux "xlocale.h" # see GH#8367 + "dlfcn.h", # dladdr ] # optional gcc compiler builtins and their call arguments and optional a diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c index fec015a30..c4e675430 100644 --- a/numpy/core/src/multiarray/number.c +++ b/numpy/core/src/multiarray/number.c @@ -12,6 +12,7 @@ #include "npy_import.h" #include "common.h" #include "number.h" +#include "temp_elide.h" /************************************************************************* **************** Implement Number Protocol **************************** @@ -353,23 +354,60 @@ PyArray_GenericInplaceUnaryFunction(PyArrayObject *m1, PyObject *op) } static PyObject * +array_inplace_add(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_subtract(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_multiply(PyArrayObject *m1, PyObject *m2); +#if !defined(NPY_PY3K) +static PyObject * +array_inplace_divide(PyArrayObject *m1, PyObject *m2); +#endif +static PyObject * +array_inplace_true_divide(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_floor_divide(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_bitwise_and(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_bitwise_or(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_bitwise_xor(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_left_shift(PyArrayObject *m1, PyObject *m2); +static PyObject * +array_inplace_right_shift(PyArrayObject *m1, PyObject *m2); + +static PyObject * array_add(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__add__", "__radd__", 0, nb_add); + if (try_binary_elide(m1, m2, &array_inplace_add, &res, 1)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.add); } static PyObject * array_subtract(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__sub__", "__rsub__", 0, nb_subtract); + if (try_binary_elide(m1, m2, &array_inplace_subtract, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.subtract); } static PyObject * array_multiply(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__mul__", "__rmul__", 0, nb_multiply); + if (try_binary_elide(m1, m2, &array_inplace_multiply, &res, 1)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.multiply); } @@ -377,7 +415,11 @@ array_multiply(PyArrayObject *m1, PyObject *m2) static PyObject * array_divide(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__div__", "__rdiv__", 0, nb_divide); + if (try_binary_elide(m1, m2, &array_inplace_divide, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.divide); } #endif @@ -529,7 +571,7 @@ fast_scalar_power(PyArrayObject *a1, PyObject *o2, int inplace) return NULL; } - if (inplace) { + if (inplace || can_elide_temp_unary(a1)) { return PyArray_GenericInplaceUnaryFunction(a1, fastop); } else { return PyArray_GenericUnaryFunction(a1, fastop); @@ -588,53 +630,82 @@ array_power(PyArrayObject *a1, PyObject *o2, PyObject *NPY_UNUSED(modulo)) static PyObject * array_negative(PyArrayObject *m1) { + if (can_elide_temp_unary(m1)) { + return PyArray_GenericInplaceUnaryFunction(m1, n_ops.negative); + } return PyArray_GenericUnaryFunction(m1, n_ops.negative); } static PyObject * array_absolute(PyArrayObject *m1) { + if (can_elide_temp_unary(m1)) { + return PyArray_GenericInplaceUnaryFunction(m1, n_ops.absolute); + } return PyArray_GenericUnaryFunction(m1, n_ops.absolute); } static PyObject * array_invert(PyArrayObject *m1) { + if (can_elide_temp_unary(m1)) { + return PyArray_GenericInplaceUnaryFunction(m1, n_ops.invert); + } return PyArray_GenericUnaryFunction(m1, n_ops.invert); } static PyObject * array_left_shift(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__lshift__", "__rlshift__", 0, nb_lshift); + if (try_binary_elide(m1, m2, &array_inplace_left_shift, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.left_shift); } static PyObject * array_right_shift(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__rshift__", "__rrshift__", 0, nb_rshift); + if (try_binary_elide(m1, m2, &array_inplace_right_shift, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.right_shift); } static PyObject * array_bitwise_and(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__and__", "__rand__", 0, nb_and); + if (try_binary_elide(m1, m2, &array_inplace_bitwise_and, &res, 1)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_and); } static PyObject * array_bitwise_or(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__or__", "__ror__", 0, nb_or); + if (try_binary_elide(m1, m2, &array_inplace_bitwise_or, &res, 1)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_or); } static PyObject * array_bitwise_xor(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__xor__", "__rxor__", 0, nb_xor); + if (try_binary_elide(m1, m2, &array_inplace_bitwise_xor, &res, 1)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.bitwise_xor); } @@ -726,14 +797,24 @@ array_inplace_bitwise_xor(PyArrayObject *m1, PyObject *m2) static PyObject * array_floor_divide(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__floordiv__", "__rfloordiv__", 0, nb_floor_divide); + if (try_binary_elide(m1, m2, &array_inplace_floor_divide, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.floor_divide); } static PyObject * array_true_divide(PyArrayObject *m1, PyObject *m2) { + PyObject * res; GIVE_UP_IF_HAS_RIGHT_BINOP(m1, m2, "__truediv__", "__rtruediv__", 0, nb_true_divide); + if (PyArray_CheckExact(m1) && + (PyArray_ISFLOAT(m1) || PyArray_ISCOMPLEX(m1)) && + try_binary_elide(m1, m2, &array_inplace_true_divide, &res, 0)) { + return res; + } return PyArray_GenericBinaryFunction(m1, m2, n_ops.true_divide); } diff --git a/numpy/core/src/multiarray/temp_elide.c b/numpy/core/src/multiarray/temp_elide.c new file mode 100644 index 000000000..5d18e1a08 --- /dev/null +++ b/numpy/core/src/multiarray/temp_elide.c @@ -0,0 +1,391 @@ +#define PY_SSIZE_T_CLEAN +#include <Python.h> + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include "numpy/arrayobject.h" + +#define NPY_NUMBER_MAX(a, b) ((a) > (b) ? (a) : (b)) + +/* + * Functions used to try to avoid/elide temporaries in python expressions + * of type a + b + b by translating some operations into inplace operations. + * This example translates to this bytecode: + * + * 0 LOAD_FAST 0 (a) + * 3 LOAD_FAST 1 (b) + * 6 BINARY_ADD + * 7 LOAD_FAST 1 (b) + * 10 BINARY_ADD + * + * The two named variables get their reference count increased by the load + * instructions so they always have a reference count larger than 1. + * The temporary of the first BINARY_ADD on the other hand only has a count of + * 1. Only temporaries can have a count of 1 in python so we can use this to + * transform the second operation into an inplace operation and not affect the + * output of the program. + * CPython does the same thing to resize memory instead of copying when doing + * string concatenation. + * The gain can be very significant (4x-6x) when avoiding the temporary allows + * the operation to remain in the cpu caches and can still be 50% faster for + * array larger than cpu cache size. + * + * A complication is that a DSO (dynamic shared object) module (e.g. cython) + * could call the PyNumber functions directly on arrays with reference count of + * 1. + * This is handled by checking the call stack to verify that we have been + * called directly from the cpython interpreter. + * To achieve this we check that all functions in the callstack until the + * cpython frame evaluation function are located in cpython or numpy. + * This is an expensive operation so temporaries are only avoided for rather + * large arrays. + * + * A possible future improvement would be to change cpython to give as access + * to the top of the stack. Then we could just check that the objects involved + * are on the cpython stack instead of checking the function callstack. + * + * Elision can be applied to all operations that do have inplace variants and + * do not change types (addition, subtraction, multiplication, float division, + * logical and bitwise operations ...) + * For commutative operations (addition, multiplication, ...) if eliding into + * the lefthand side fails it can succedd on the righthand side by swapping the + * arguments. E.g. b * (a * 2) can be elided by changing it to (2 * a) * b. + * + * TODO only supports systems with backtrace(), windows can probably be + * supported too by using the appropriate windows apis. + */ + +#if defined HAVE_BACKTRACE && defined HAVE_DLFCN_H && ! defined PYPY_VERSION +/* 1 prints elided operations, 2 prints stacktraces */ +#define NPY_ELIDE_DEBUG 0 +#define NPY_MAX_STACKSIZE 10 + +#if PY_VERSION_HEX >= 0x03060000 +/* TODO can pep523 be used to somehow? */ +#define PYFRAMEEVAL_FUNC "_PyEval_EvalFrameDefault" +#else +#define PYFRAMEEVAL_FUNC "PyEval_EvalFrameEx" +#endif +/* + * Heuristic size of the array in bytes at which backtrace overhead generation + * becomes less than speed gained by inplace operations. Depends on stack depth + * being checked. Measurements with 10 stacks show it getting worthwhile + * around 100KiB but to be conservative put it higher around where the L2 cache + * spills. + */ +#ifndef Py_DEBUG +#define NPY_MIN_ELIDE_BYTES (256 * 1024) +#else +/* + * in debug mode always elide but skip scalars as these can convert to 0d array + * during in place operations + */ +#define NPY_MIN_ELIDE_BYTES (32) +#endif +#include <dlfcn.h> +#include <execinfo.h> + +/* + * linear search pointer in table + * number of pointers is usually quite small but if a performance impact can be + * measured this could be converted to a binary search + */ +static int +find_addr(void * addresses[], npy_intp naddr, void * addr) +{ + npy_intp j; + for (j = 0; j < naddr; j++) { + if (addr == addresses[j]) { + return 1; + } + } + return 0; +} + +static int +check_callers(int * cannot) +{ + /* + * get base addresses of multiarray and python, check if + * backtrace is in these libraries only calling dladdr if a new max address + * is found. + * When after the initial multiarray stack everything is inside python we + * can elide as no C-API user could have messed up the reference counts. + * Only check until the python frame evaluation function is found + * approx 10us overhead for stack size of 10 + * + * TODO some calls go over scalarmath in umath but we cannot get the base + * address of it from multiarraymodule as it is not linked against it + */ + static int init = 0; + /* + * measured DSO object memory start and end, if an address is located + * inside these bounds it is part of that library so we don't need to call + * dladdr on it (assuming linear memory) + */ + static void * pos_python_start; + static void * pos_python_end; + static void * pos_ma_start; + static void * pos_ma_end; + + /* known address storage to save dladdr calls */ + static void * py_addr[64]; + static void * pyeval_addr[64]; + static npy_intp n_py_addr = 0; + static npy_intp n_pyeval = 0; + + void *buffer[NPY_MAX_STACKSIZE]; + int i, nptrs; + int ok = 0; + /* cannot determine callers */ + if (init == -1) { + *cannot = 1; + return 0; + } + + nptrs = backtrace(buffer, NPY_MAX_STACKSIZE); + if (nptrs == 0) { + /* complete failure, disable elision */ + init = -1; + *cannot = 1; + return 0; + } + + /* setup DSO base addresses, ends updated later */ + if (NPY_UNLIKELY(init == 0)) { + Dl_info info; + /* get python base address */ + if (dladdr(&PyNumber_Or, &info)) { + pos_python_start = info.dli_fbase; + pos_python_end = info.dli_fbase; + } + else { + init = -1; + return 0; + } + /* get multiarray base address */ + if (dladdr(&PyArray_SetNumericOps, &info)) { + pos_ma_start = info.dli_fbase; + pos_ma_end = info.dli_fbase; + } + else { + init = -1; + return 0; + } + init = 1; + } + + /* loop over callstack addresses to check if they leave numpy or cpython */ + for (i = 0; i < nptrs; i++) { + Dl_info info; + int in_python = 0; + int in_multiarray = 0; +#if NPY_ELIDE_DEBUG >= 2 + dladdr(buffer[i], &info); + printf("%s(%p) %s(%p)\n", info.dli_fname, info.dli_fbase, + info.dli_sname, info.dli_saddr); +#endif + + /* check stored DSO boundaries first */ + if (buffer[i] >= pos_python_start && buffer[i] <= pos_python_end) { + in_python = 1; + } + else if (buffer[i] >= pos_ma_start && buffer[i] <= pos_ma_end) { + in_multiarray = 1; + } + + /* update DSO boundaries via dladdr if necessary */ + if (!in_python && !in_multiarray) { + if (dladdr(buffer[i], &info) == 0) { + init = -1; + ok = 0; + break; + } + /* update DSO end */ + if (info.dli_fbase == pos_python_start) { + pos_python_end = NPY_NUMBER_MAX(buffer[i], pos_python_end); + in_python = 1; + } + else if (info.dli_fbase == pos_ma_start) { + pos_ma_end = NPY_NUMBER_MAX(buffer[i], pos_ma_end); + in_multiarray = 1; + } + } + + /* no longer in ok libraries and not reached PyEval -> no elide */ + if (!in_python && !in_multiarray) { + ok = 0; + break; + } + + /* in python check if the frame eval function was reached */ + if (in_python) { + /* if reached eval we are done */ + if (find_addr(pyeval_addr, n_pyeval, buffer[i])) { + ok = 1; + break; + } + /* + * check if its some other function, use pointer lookup table to + * save expensive dladdr calls + */ + if (find_addr(py_addr, n_py_addr, buffer[i])) { + continue; + } + + /* new python address, check for PyEvalFrame */ + if (dladdr(buffer[i], &info) == 0) { + init = -1; + ok = 0; + break; + } + if (info.dli_sname && + strcmp(info.dli_sname, PYFRAMEEVAL_FUNC) == 0) { + if (n_pyeval < sizeof(pyeval_addr) / sizeof(pyeval_addr[0])) { + /* store address to not have to dladdr it again */ + pyeval_addr[n_pyeval++] = buffer[i]; + } + ok = 1; + break; + } + else if (n_py_addr < sizeof(py_addr) / sizeof(py_addr[0])) { + /* store other py function to not have to dladdr it again */ + py_addr[n_py_addr++] = buffer[i]; + } + } + } + + /* all stacks after numpy are from python, we can elide */ + if (ok) { + *cannot = 0; + return 1; + } + else { +#if NPY_ELIDE_DEBUG != 0 + puts("cannot elide due to c-api usage"); +#endif + *cannot = 1; + return 0; + } +} + +/* + * check if in "alhs @op@ orhs" that alhs is a temporary (refcnt == 1) so we + * can do inplace operations instead of creating a new temporary + * "cannot" is set to true if it cannot be done even with swapped arguments + */ +static int +can_elide_temp(PyArrayObject * alhs, PyObject * orhs, int * cannot) +{ + /* + * to be a candidate the array needs to have reference count 1, be an exact + * array of a basic type, own its data and size larger than threshold + */ + if (Py_REFCNT(alhs) != 1 || !PyArray_CheckExact(alhs) || + PyArray_DESCR(alhs)->type_num >= NPY_OBJECT || + !(PyArray_FLAGS(alhs) & NPY_ARRAY_OWNDATA) || + PyArray_NBYTES(alhs) < NPY_MIN_ELIDE_BYTES) { + return 0; + } + if (PyArray_CheckExact(orhs) || + PyArray_CheckAnyScalar(orhs)) { + PyArrayObject * arhs; + + /* create array from right hand side */ + Py_INCREF(orhs); + arhs = (PyArrayObject *)PyArray_EnsureArray(orhs); + if (arhs == NULL) { + return 0; + } + + /* + * if rhs is not a scalar dimensions must match + * TODO: one could allow broadcasting on equal types + */ + if (!(PyArray_NDIM(arhs) == 0 || + (PyArray_NDIM(arhs) == PyArray_NDIM(alhs) && + PyArray_CompareLists(PyArray_DIMS(alhs), PyArray_DIMS(arhs), + PyArray_NDIM(arhs))))) { + Py_DECREF(arhs); + return 0; + } + + /* must be safe to cast (checks values for scalar in rhs) */ + if (PyArray_CanCastArrayTo(arhs, PyArray_DESCR(alhs), + NPY_SAFE_CASTING)) { + Py_DECREF(arhs); + return check_callers(cannot); + } + Py_DECREF(arhs); + } + + return 0; +} + +/* + * try eliding a binary op, if commutative is true also try swapped arguments + */ +NPY_NO_EXPORT int +try_binary_elide(PyArrayObject * m1, PyObject * m2, + PyObject * (inplace_op)(PyArrayObject * m1, PyObject * m2), + PyObject ** res, int commutative) +{ + /* set when no elision can be done independent of argument order */ + int cannot = 0; + if (can_elide_temp(m1, m2, &cannot)) { + *res = inplace_op(m1, m2); +#if NPY_ELIDE_DEBUG != 0 + puts("elided temporary in binary op"); +#endif + return 1; + } + else if (commutative && !cannot) { + if (can_elide_temp((PyArrayObject *)m2, (PyObject *)m1, &cannot)) { + *res = inplace_op((PyArrayObject *)m2, (PyObject *)m1); +#if NPY_ELIDE_DEBUG != 0 + puts("elided temporary in commutative binary op"); +#endif + return 1; + } + } + *res = NULL; + return 0; +} + +/* try elide unary temporary */ +NPY_NO_EXPORT int +can_elide_temp_unary(PyArrayObject * m1) +{ + int cannot; + if (Py_REFCNT(m1) != 1 || !PyArray_CheckExact(m1) || + PyArray_DESCR(m1)->type_num == NPY_VOID || + !(PyArray_FLAGS(m1) & NPY_ARRAY_OWNDATA) || + PyArray_NBYTES(m1) < NPY_MIN_ELIDE_BYTES) { + return 0; + } + if (check_callers(&cannot)) { +#if NPY_ELIDE_DEBUG != 0 + puts("elided temporary in unary op"); +#endif + return 1; + } + else { + return 0; + } +} +#else /* unsupported interpreter or missing backtrace */ +NPY_NO_EXPORT int +can_elide_temp_unary(PyArrayObject * m1) +{ + return 0; +} + +NPY_NO_EXPORT int +try_binary_elide(PyArrayObject * m1, PyObject * m2, + PyObject * (inplace_op)(PyArrayObject * m1, PyObject * m2), + PyObject ** res, int commutative) +{ + *res = NULL; + return 0; +} +#endif diff --git a/numpy/core/src/multiarray/temp_elide.h b/numpy/core/src/multiarray/temp_elide.h new file mode 100644 index 000000000..d073adf28 --- /dev/null +++ b/numpy/core/src/multiarray/temp_elide.h @@ -0,0 +1,15 @@ +#ifndef _NPY_ARRAY_TEMP_AVOID_H_ +#define _NPY_ARRAY_TEMP_AVOID_H_ +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#define _MULTIARRAYMODULE +#include <numpy/ndarraytypes.h> + +NPY_NO_EXPORT int +can_elide_temp_unary(PyArrayObject * m1); + +NPY_NO_EXPORT int +try_binary_elide(PyArrayObject * m1, PyObject * m2, + PyObject * (inplace_op)(PyArrayObject * m1, PyObject * m2), + PyObject ** res, int commutative); + +#endif diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index fa5051ba7..b780c5558 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -2743,8 +2743,9 @@ class TestBinop(object): # d = input.copy() # refcount 1 # return d, d + d # PyNumber_Add without increasing refcount from numpy.core.multiarray_tests import incref_elide - d = np.ones(5) + d = np.ones(100000) orig, res = incref_elide(d) + d + d # the return original should not be changed to an inplace operation assert_array_equal(orig, d) assert_array_equal(res, d + d) @@ -2758,12 +2759,52 @@ class TestBinop(object): # return l[4] + l[4] # PyNumber_Add without increasing refcount from numpy.core.multiarray_tests import incref_elide_l # padding with 1 makes sure the object on the stack is not overwriten - l = [1, 1, 1, 1, np.ones(5)] + l = [1, 1, 1, 1, np.ones(100000)] res = incref_elide_l(l) # the return original should not be changed to an inplace operation - assert_array_equal(l[4], np.ones(5)) + assert_array_equal(l[4], np.ones(100000)) assert_array_equal(res, l[4] + l[4]) + def test_temporary_with_cast(self): + # check that we don't elide into a temporary which would need casting + d = np.ones(200000, dtype=np.int64) + assert_equal(((d + d) + 2**222).dtype, np.dtype('O')) + + r = ((d + d) / 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = np.true_divide((d + d), 2) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) / 2.) + assert_equal(r.dtype, np.dtype('f8')) + + r = ((d + d) // 2) + assert_equal(r.dtype, np.dtype(np.int64)) + + # commutative elision into the astype result + f = np.ones(100000, dtype=np.float32) + assert_equal(((f + f) + f.astype(np.float64)).dtype, np.dtype('f8')) + + # no elision into f + f + d = f.astype(np.float64) + assert_equal(((f + f) + d).dtype, np.dtype('f8')) + + def test_elide_broadcast(self): + # test no elision on broadcast to higher dimension + # only triggers elision code path in debug mode as triggering it in + # normal mode needs 256kb large matching dimension, so a lot of memory + d = np.ones((2000, 1), dtype=int) + b = np.ones((2000), dtype=np.bool) + r = (1 - d) + b + assert_equal(r, 1) + assert_equal(r.shape, (2000, 2000)) + + def test_elide_scalar(self): + # check inplace op does not create ndarray from scalars + a = np.bool_() + assert_(type(~(a & a)) is np.bool_) + def test_ufunc_override_rop_precedence(self): # 2016-01-29: NUMPY_UFUNC_DISABLED return |