diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/add_newdocs.py | 30 | ||||
-rw-r--r-- | numpy/core/_internal.py | 4 | ||||
-rw-r--r-- | numpy/core/bento.info | 3 | ||||
-rw-r--r-- | numpy/core/bscript | 1 | ||||
-rw-r--r-- | numpy/core/function_base.py | 47 | ||||
-rw-r--r-- | numpy/core/numeric.py | 9 | ||||
-rw-r--r-- | numpy/core/setup.py | 8 | ||||
-rw-r--r-- | numpy/core/src/multiarray/array_assign.c | 27 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arrayobject.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 33 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/getset.c | 1 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarray_tests.c.src | 649 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 82 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule_onefile.c | 1 | ||||
-rw-r--r-- | numpy/core/src/private/mem_overlap.c | 905 | ||||
-rw-r--r-- | numpy/core/src/private/mem_overlap.h | 50 | ||||
-rw-r--r-- | numpy/core/src/private/npy_extint128.h | 317 | ||||
-rw-r--r-- | numpy/core/tests/test_extint128.py | 225 | ||||
-rw-r--r-- | numpy/core/tests/test_mem_overlap.py | 486 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray_assignment.py | 84 |
21 files changed, 2804 insertions, 164 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 195789a39..607e28a28 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -3784,24 +3784,40 @@ add_newdoc('numpy.core.multiarray', 'ndarray', ('min', """)) -add_newdoc('numpy.core.multiarray', 'may_share_memory', +add_newdoc('numpy.core.multiarray', 'shares_memory', """ - Determine if two arrays can share memory + shares_memory(a, b, max_work=None) - The memory-bounds of a and b are computed. If they overlap then - this function returns True. Otherwise, it returns False. - - A return of True does not necessarily mean that the two arrays - share any element. It just means that they *might*. + Determine if two arrays share memory Parameters ---------- a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem (maximum number + of candidate solutions to consider). The following special + values are recognized: + + max_work=MAY_SHARE_EXACT (default) + The problem is solved exactly. In this case, the function returns + True only if there is an element shared between the arrays. + max_work=MAY_SHARE_BOUNDS + Only the memory bounds of a and b are checked. + + Raises + ------ + numpy.TooHardError + Exceeded max_work. Returns ------- out : bool + See Also + -------- + may_share_memory + Examples -------- >>> np.may_share_memory(np.array([1,2]), np.array([5,8,9])) diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py index bf492d105..879f4a224 100644 --- a/numpy/core/_internal.py +++ b/numpy/core/_internal.py @@ -759,3 +759,7 @@ def _gcd(a, b): while b: a, b = b, a % b return a + +# Exception used in shares_memory() +class TooHardError(RuntimeError): + pass diff --git a/numpy/core/bento.info b/numpy/core/bento.info index aaf261ddc..0c335e08a 100644 --- a/numpy/core/bento.info +++ b/numpy/core/bento.info @@ -22,7 +22,8 @@ Library: src/multiarray/multiarraymodule_onefile.c Extension: multiarray_tests Sources: - src/multiarray/multiarray_tests.c.src + src/multiarray/multiarray_tests.c.src, + src/private/mem_overlap.c Extension: umath Sources: src/umath/umathmodule_onefile.c diff --git a/numpy/core/bscript b/numpy/core/bscript index 4eea31ed7..944c2996f 100644 --- a/numpy/core/bscript +++ b/numpy/core/bscript @@ -484,6 +484,7 @@ def pre_build(context): pjoin('src', 'multiarray', 'usertypes.c'), pjoin('src', 'multiarray', 'vdot.c'), pjoin('src', 'private', 'templ_common.h.src'), + pjoin('src', 'private', 'mem_overlap.c'), ] if bld.env.HAS_CBLAS: diff --git a/numpy/core/function_base.py b/numpy/core/function_base.py index 532ef2950..05fea557a 100644 --- a/numpy/core/function_base.py +++ b/numpy/core/function_base.py @@ -1,9 +1,9 @@ from __future__ import division, absolute_import, print_function -__all__ = ['logspace', 'linspace'] +__all__ = ['logspace', 'linspace', 'may_share_memory'] from . import numeric as _nx -from .numeric import result_type, NaN +from .numeric import result_type, NaN, shares_memory, MAY_SHARE_BOUNDS, TooHardError def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None): @@ -201,3 +201,46 @@ def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None): if dtype is None: return _nx.power(base, y) return _nx.power(base, y).astype(dtype) + + +def may_share_memory(a, b, max_work=None): + """Determine if two arrays can share memory + + A return of True does not necessarily mean that the two arrays + share any element. It just means that they *might*. + + Only the memory bounds of a and b are checked by default. + + Parameters + ---------- + a, b : ndarray + Input arrays + max_work : int, optional + Effort to spend on solving the overlap problem. See + `shares_memory` for details. Default for ``may_share_memory`` + is to do a bounds check. + + Returns + ------- + out : bool + + See Also + -------- + shares_memory + + Examples + -------- + >>> np.may_share_memory(np.array([1,2]), np.array([5,8,9])) + False + >>> x = np.zeros([3, 4]) + >>> np.may_share_memory(x[:,0], x[:,1]) + True + + """ + if max_work is None: + max_work = MAY_SHARE_BOUNDS + try: + return shares_memory(a, b, max_work=max_work) + except (TooHardError, OverflowError): + # Unable to determine, assume yes + return True diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index f0163876f..1b7dfca3e 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -10,6 +10,7 @@ from .umath import (invert, sin, UFUNC_BUFSIZE_DEFAULT, ERR_IGNORE, ERR_DEFAULT, PINF, NAN) from . import numerictypes from .numerictypes import longlong, intc, int_, float_, complex_, bool_ +from ._internal import TooHardError if sys.version_info[0] >= 3: import pickle @@ -39,8 +40,8 @@ __all__ = [ 'getbufsize', 'seterrcall', 'geterrcall', 'errstate', 'flatnonzero', 'Inf', 'inf', 'infty', 'Infinity', 'nan', 'NaN', 'False_', 'True_', 'bitwise_not', 'CLIP', 'RAISE', 'WRAP', 'MAXDIMS', 'BUFSIZE', - 'ALLOW_THREADS', 'ComplexWarning', 'may_share_memory', 'full', - 'full_like', 'matmul', + 'ALLOW_THREADS', 'ComplexWarning', 'full', 'full_like', 'matmul', + 'shares_memory', 'MAY_SHARE_BOUNDS', 'MAY_SHARE_EXACT', 'TooHardError', ] if sys.version_info[0] < 3: @@ -65,6 +66,8 @@ RAISE = multiarray.RAISE MAXDIMS = multiarray.MAXDIMS ALLOW_THREADS = multiarray.ALLOW_THREADS BUFSIZE = multiarray.BUFSIZE +MAY_SHARE_BOUNDS = multiarray.MAY_SHARE_BOUNDS +MAY_SHARE_EXACT = multiarray.MAY_SHARE_EXACT ndarray = multiarray.ndarray flatiter = multiarray.flatiter @@ -375,7 +378,7 @@ fromstring = multiarray.fromstring fromiter = multiarray.fromiter fromfile = multiarray.fromfile frombuffer = multiarray.frombuffer -may_share_memory = multiarray.may_share_memory +shares_memory = multiarray.shares_memory if sys.version_info[0] < 3: newbuffer = multiarray.newbuffer getbuffer = multiarray.getbuffer diff --git a/numpy/core/setup.py b/numpy/core/setup.py index 99084b25e..6d9926d89 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -762,6 +762,8 @@ def configuration(parent_package='',top_path=None): join('src', 'private', 'npy_config.h'), join('src', 'private', 'templ_common.h.src'), join('src', 'private', 'lowlevel_strided_loops.h'), + join('src', 'private', 'mem_overlap.h'), + join('src', 'private', 'npy_extint128.h'), join('include', 'numpy', 'arrayobject.h'), join('include', 'numpy', '_neighborhood_iterator_imp.h'), join('include', 'numpy', 'npy_endian.h'), @@ -831,6 +833,7 @@ def configuration(parent_package='',top_path=None): join('src', 'multiarray', 'ucsnarrow.c'), join('src', 'multiarray', 'vdot.c'), join('src', 'private', 'templ_common.h.src'), + join('src', 'private', 'mem_overlap.c'), ] blas_info = get_info('blas_opt', 0) @@ -959,7 +962,10 @@ def configuration(parent_package='',top_path=None): ####################################################################### config.add_extension('multiarray_tests', - sources=[join('src', 'multiarray', 'multiarray_tests.c.src')]) + sources=[join('src', 'multiarray', 'multiarray_tests.c.src'), + join('src', 'private', 'mem_overlap.c')], + depends=[join('src', 'private', 'mem_overlap.h'), + join('src', 'private', 'npy_extint128.h')]) ####################################################################### # operand_flag_tests module # diff --git a/numpy/core/src/multiarray/array_assign.c b/numpy/core/src/multiarray/array_assign.c index fa764d758..a48e245d8 100644 --- a/numpy/core/src/multiarray/array_assign.c +++ b/numpy/core/src/multiarray/array_assign.c @@ -23,6 +23,7 @@ #include "array_assign.h" #include "common.h" #include "lowlevel_strided_loops.h" +#include "mem_overlap.h" /* See array_assign.h for parameter documentation */ NPY_NO_EXPORT int @@ -101,27 +102,17 @@ raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment) } -/* Gets a half-open range [start, end) which contains the array data */ -NPY_NO_EXPORT void -get_array_memory_extents(PyArrayObject *arr, - npy_uintp *out_start, npy_uintp *out_end) -{ - npy_intp low, upper; - offset_bounds_from_strides(PyArray_ITEMSIZE(arr), PyArray_NDIM(arr), - PyArray_DIMS(arr), PyArray_STRIDES(arr), - &low, &upper); - *out_start = (npy_uintp)PyArray_DATA(arr) + (npy_uintp)low; - *out_end = (npy_uintp)PyArray_DATA(arr) + (npy_uintp)upper; -} - /* Returns 1 if the arrays have overlapping data, 0 otherwise */ NPY_NO_EXPORT int arrays_overlap(PyArrayObject *arr1, PyArrayObject *arr2) { - npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0; - - get_array_memory_extents(arr1, &start1, &end1); - get_array_memory_extents(arr2, &start2, &end2); + mem_overlap_t result; - return (start1 < end2) && (start2 < end1); + result = solve_may_share_memory(arr1, arr2, NPY_MAY_SHARE_BOUNDS); + if (result == MEM_OVERLAP_NO) { + return 0; + } + else { + return 1; + } } diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 86ea38cd7..fd5b15a0a 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -51,6 +51,7 @@ maintainer email: oliphant.travis@ieee.org #include "buffer.h" #include "array_assign.h" #include "alloc.h" +#include "mem_overlap.h" /*NUMPY_API Compute the size of an array (in number of items) diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index a5f3b3d55..3352c3529 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -765,39 +765,6 @@ _IsWriteable(PyArrayObject *ap) } -/* Gets a half-open range [start, end) of offsets from the data pointer */ -NPY_NO_EXPORT void -offset_bounds_from_strides(const int itemsize, const int nd, - const npy_intp *dims, const npy_intp *strides, - npy_intp *lower_offset, npy_intp *upper_offset) { - npy_intp max_axis_offset; - npy_intp lower = 0; - npy_intp upper = 0; - int i; - - for (i = 0; i < nd; i++) { - if (dims[i] == 0) { - /* If the array size is zero, return an empty range */ - *lower_offset = 0; - *upper_offset = 0; - return; - } - /* Expand either upwards or downwards depending on stride */ - max_axis_offset = strides[i] * (dims[i] - 1); - if (max_axis_offset > 0) { - upper += max_axis_offset; - } - else { - lower += max_axis_offset; - } - } - /* Return a half-open range */ - upper += itemsize; - *lower_offset = lower; - *upper_offset = upper; -} - - /** * Convert an array shape to a string such as "(1, 2)". * diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index 9cf2e27bf..11993829f 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -65,11 +65,6 @@ _IsAligned(PyArrayObject *ap); NPY_NO_EXPORT npy_bool _IsWriteable(PyArrayObject *ap); -NPY_NO_EXPORT void -offset_bounds_from_strides(const int itemsize, const int nd, - const npy_intp *dims, const npy_intp *strides, - npy_intp *lower_offset, npy_intp *upper_offset); - NPY_NO_EXPORT PyObject * convert_shape_to_string(npy_intp n, npy_intp *vals, char *ending); diff --git a/numpy/core/src/multiarray/getset.c b/numpy/core/src/multiarray/getset.c index 0b694deed..5147b9735 100644 --- a/numpy/core/src/multiarray/getset.c +++ b/numpy/core/src/multiarray/getset.c @@ -17,6 +17,7 @@ #include "descriptor.h" #include "getset.h" #include "arrayobject.h" +#include "mem_overlap.h" /******************* array attribute get and set routines ******************/ diff --git a/numpy/core/src/multiarray/multiarray_tests.c.src b/numpy/core/src/multiarray/multiarray_tests.c.src index e7f8e36fd..5e247e15e 100644 --- a/numpy/core/src/multiarray/multiarray_tests.c.src +++ b/numpy/core/src/multiarray/multiarray_tests.c.src @@ -1,6 +1,9 @@ +/* -*-c-*- */ #define NPY_NO_DEPRECATED_API NPY_API_VERSION #include <Python.h> #include "numpy/arrayobject.h" +#include "mem_overlap.h" +#include "npy_extint128.h" /* test PyArray_IsPythonScalar, before including private py3 compat header */ static PyObject * @@ -947,6 +950,607 @@ test_nditer_too_large(PyObject *NPY_UNUSED(self), PyObject *args) { } +static PyObject * +array_solve_diophantine(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) +{ + PyObject *A = NULL; + PyObject *U = NULL; + Py_ssize_t b_input = 0; + Py_ssize_t max_work = -1; + int simplify = 0; + int require_ub_nontrivial = 0; + static char *kwlist[] = {"A", "U", "b", "max_work", "simplify", + "require_ub_nontrivial", NULL}; + + diophantine_term_t terms[2*NPY_MAXDIMS+2]; + npy_int64 x[2*NPY_MAXDIMS+2]; + npy_int64 b; + unsigned int nterms, j; + mem_overlap_t result = MEM_OVERLAP_YES; + PyObject *retval = NULL; + NPY_BEGIN_THREADS_DEF; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!n|nii", kwlist, + &PyTuple_Type, &A, + &PyTuple_Type, &U, + &b_input, &max_work, &simplify, + &require_ub_nontrivial)) { + return NULL; + } + + if (PyTuple_GET_SIZE(A) > sizeof(terms) / sizeof(diophantine_term_t)) { + PyErr_SetString(PyExc_ValueError, "too many terms in equation"); + goto fail; + } + + nterms = PyTuple_GET_SIZE(A); + + if (PyTuple_GET_SIZE(U) != nterms) { + PyErr_SetString(PyExc_ValueError, "A, U must be tuples of equal length"); + goto fail; + } + + for (j = 0; j < nterms; ++j) { + terms[j].a = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(A, j)); + if (terms[j].a == -1 && PyErr_Occurred()) { + goto fail; + } + terms[j].ub = (npy_int64)PyInt_AsSsize_t(PyTuple_GET_ITEM(U, j)); + if (terms[j].ub == -1 && PyErr_Occurred()) { + goto fail; + } + } + + b = b_input; + + NPY_BEGIN_THREADS; + if (simplify && !require_ub_nontrivial) { + if (diophantine_simplify(&nterms, terms, b)) { + result = MEM_OVERLAP_OVERFLOW; + } + } + if (result == MEM_OVERLAP_YES) { + result = solve_diophantine(nterms, terms, b, max_work, require_ub_nontrivial, x); + } + NPY_END_THREADS; + + if (result == MEM_OVERLAP_YES) { + retval = PyTuple_New(nterms); + if (retval == NULL) { + goto fail; + } + + for (j = 0; j < nterms; ++j) { + PyObject *obj; +#if defined(NPY_PY3K) + obj = PyLong_FromSsize_t(x[j]); +#else + obj = PyInt_FromSsize_t(x[j]); +#endif + if (obj == NULL) { + goto fail; + } + PyTuple_SET_ITEM(retval, j, obj); + } + } + else if (result == MEM_OVERLAP_NO) { + retval = Py_None; + Py_INCREF(retval); + } + else if (result == MEM_OVERLAP_ERROR) { + PyErr_SetString(PyExc_ValueError, "Invalid arguments"); + } + else if (result == MEM_OVERLAP_OVERFLOW) { + PyErr_SetString(PyExc_OverflowError, "Integer overflow"); + } + else if (result == MEM_OVERLAP_TOO_HARD) { + PyErr_SetString(PyExc_RuntimeError, "Too much work done"); + } + else { + PyErr_SetString(PyExc_RuntimeError, "Unknown error"); + } + + return retval; + +fail: + Py_XDECREF(retval); + return NULL; +} + + +static PyObject * +array_internal_overlap(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) +{ + PyArrayObject * self = NULL; + static char *kwlist[] = {"self", "max_work", NULL}; + + mem_overlap_t result; + Py_ssize_t max_work = NPY_MAY_SHARE_EXACT; + NPY_BEGIN_THREADS_DEF; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|n", kwlist, + PyArray_Converter, &self, + &max_work)) { + return NULL; + } + + if (max_work < -2) { + PyErr_SetString(PyExc_ValueError, "Invalid value for max_work"); + goto fail; + } + + NPY_BEGIN_THREADS; + result = solve_may_have_internal_overlap(self, max_work); + NPY_END_THREADS; + + Py_XDECREF(self); + + if (result == MEM_OVERLAP_NO) { + Py_RETURN_FALSE; + } + else if (result == MEM_OVERLAP_YES) { + Py_RETURN_TRUE; + } + else if (result == MEM_OVERLAP_OVERFLOW) { + PyErr_SetString(PyExc_OverflowError, + "Integer overflow in computing overlap"); + return NULL; + } + else if (result == MEM_OVERLAP_TOO_HARD) { + PyErr_SetString(PyExc_ValueError, + "Exceeded max_work"); + return NULL; + } + else { + /* Doesn't happen usually */ + PyErr_SetString(PyExc_RuntimeError, + "Error in computing overlap"); + return NULL; + } + +fail: + Py_XDECREF(self); + return NULL; +} + + +static PyObject * +pylong_from_int128(npy_extint128_t value) +{ + PyObject *val_64 = NULL, *val = NULL, *tmp = NULL, *tmp2 = NULL; + + val_64 = PyLong_FromLong(64); + if (val_64 == NULL) { + goto fail; + } + + val = PyLong_FromUnsignedLongLong(value.hi); + if (val == NULL) { + goto fail; + } + + tmp = PyNumber_Lshift(val, val_64); + if (tmp == NULL) { + goto fail; + } + + Py_DECREF(val); + val = tmp; + + tmp = PyLong_FromUnsignedLongLong(value.lo); + if (tmp == NULL) { + goto fail; + } + + tmp2 = PyNumber_Or(val, tmp); + if (tmp2 == NULL) { + goto fail; + } + + Py_DECREF(val); + Py_DECREF(tmp); + + val = NULL; + tmp = NULL; + + if (value.sign < 0) { + val = PyNumber_Negative(tmp2); + if (val == NULL) { + goto fail; + } + Py_DECREF(tmp2); + return val; + } + else { + val = tmp2; + } + return val; + +fail: + Py_XDECREF(val_64); + Py_XDECREF(tmp); + Py_XDECREF(tmp2); + Py_XDECREF(val); + return NULL; +} + + +static int +int128_from_pylong(PyObject *obj, npy_extint128_t *result) +{ + PyObject *long_obj = NULL, *val_64 = NULL, *val_0 = NULL, + *mask_64 = NULL, *max_128 = NULL, *hi_bits = NULL, + *lo_bits = NULL, *tmp = NULL; + int cmp; + int negative_zero = 0; + + if (PyBool_Check(obj)) { + /* False means negative zero */ + negative_zero = 1; + } + + long_obj = PyObject_CallFunction((PyObject*)&PyLong_Type, "O", obj); + if (long_obj == NULL) { + goto fail; + } + + val_0 = PyLong_FromLong(0); + if (val_0 == NULL) { + goto fail; + } + + val_64 = PyLong_FromLong(64); + if (val_64 == NULL) { + goto fail; + } + + mask_64 = PyLong_FromUnsignedLongLong(0xffffffffffffffffULL); + if (mask_64 == NULL) { + goto fail; + } + + tmp = PyNumber_Lshift(mask_64, val_64); + if (tmp == NULL) { + goto fail; + } + max_128 = PyNumber_Or(tmp, mask_64); + if (max_128 == NULL) { + goto fail; + } + Py_DECREF(tmp); + tmp = NULL; + + cmp = PyObject_RichCompareBool(long_obj, val_0, Py_LT); + if (cmp == -1) { + goto fail; + } + else if (cmp == 1) { + tmp = PyNumber_Negative(long_obj); + if (tmp == NULL) { + goto fail; + } + Py_DECREF(long_obj); + long_obj = tmp; + tmp = NULL; + result->sign = -1; + } + else { + result->sign = 1; + } + + cmp = PyObject_RichCompareBool(long_obj, max_128, Py_GT); + if (cmp == 1) { + PyErr_SetString(PyExc_OverflowError, ""); + goto fail; + } + else if (cmp == -1) { + goto fail; + } + + hi_bits = PyNumber_Rshift(long_obj, val_64); + if (hi_bits == NULL) { + goto fail; + } + + lo_bits = PyNumber_And(long_obj, mask_64); + if (lo_bits == NULL) { + goto fail; + } + + result->hi = PyLong_AsUnsignedLongLong(hi_bits); + if (result->hi == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred()) { + goto fail; + } + + result->lo = PyLong_AsUnsignedLongLong(lo_bits); + if (result->lo == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred()) { + goto fail; + } + + if (negative_zero && result->hi == 0 && result->lo == 0) { + result->sign = -1; + } + + Py_XDECREF(long_obj); + Py_XDECREF(val_64); + Py_XDECREF(val_0); + Py_XDECREF(mask_64); + Py_XDECREF(max_128); + Py_XDECREF(hi_bits); + Py_XDECREF(lo_bits); + Py_XDECREF(tmp); + return 0; + +fail: + Py_XDECREF(long_obj); + Py_XDECREF(val_64); + Py_XDECREF(val_0); + Py_XDECREF(mask_64); + Py_XDECREF(max_128); + Py_XDECREF(hi_bits); + Py_XDECREF(lo_bits); + Py_XDECREF(tmp); + return -1; +} + + +static PyObject * +extint_safe_binop(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a, b, c; + int op; + char overflow = 0; + if (!PyArg_ParseTuple(args, "LLi", &a, &b, &op)) { + return NULL; + } + if (op == 1) { + c = safe_add(a, b, &overflow); + } + else if (op == 2) { + c = safe_sub(a, b, &overflow); + } + else if (op == 3) { + c = safe_mul(a, b, &overflow); + } + else { + PyErr_SetString(PyExc_ValueError, "invalid op"); + return NULL; + } + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return PyLong_FromLongLong(c); +} + + +static PyObject * +extint_to_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a; + if (!PyArg_ParseTuple(args, "L", &a)) { + return NULL; + } + return pylong_from_int128(to_128(a)); +} + + +static PyObject * +extint_to_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a; + PY_LONG_LONG r; + char overflow = 0; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + r = to_64(a, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return PyLong_FromLongLong(r); +} + + +static PyObject * +extint_mul_64_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PY_LONG_LONG a, b; + npy_extint128_t c; + if (!PyArg_ParseTuple(args, "LL", &a, &b)) { + return NULL; + } + c = mul_64_64(a, b); + return pylong_from_int128(c); +} + + +static PyObject * +extint_add_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b, c; + char overflow = 0; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + c = add_128(a, b, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return pylong_from_int128(c); +} + + +static PyObject * +extint_sub_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b, c; + char overflow = 0; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + c = sub_128(a, b, &overflow); + if (overflow) { + PyErr_SetString(PyExc_OverflowError, ""); + return NULL; + } + return pylong_from_int128(c); +} + + +static PyObject * +extint_neg_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = neg_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_shl_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = shl_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_shr_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "O", &a_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + b = shr_128(a); + return pylong_from_int128(b); +} + + +static PyObject * +extint_gt_128(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *b_obj; + npy_extint128_t a, b; + if (!PyArg_ParseTuple(args, "OO", &a_obj, &b_obj)) { + return NULL; + } + if (int128_from_pylong(a_obj, &a) || int128_from_pylong(b_obj, &b)) { + return NULL; + } + if (gt_128(a, b)) { + Py_RETURN_TRUE; + } + else { + Py_RETURN_FALSE; + } +} + + +static PyObject * +extint_divmod_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj, *ret = NULL, *tmp = NULL; + npy_extint128_t a, c; + PY_LONG_LONG b; + npy_int64 mod; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + goto fail; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + goto fail; + } + if (int128_from_pylong(a_obj, &a)) { + goto fail; + } + + c = divmod_128_64(a, b, &mod); + + ret = PyTuple_New(2); + + tmp = pylong_from_int128(c); + if (tmp == NULL) { + goto fail; + } + PyTuple_SET_ITEM(ret, 0, tmp); + + tmp = PyLong_FromLongLong(mod); + if (tmp == NULL) { + goto fail; + } + PyTuple_SET_ITEM(ret, 1, tmp); + return ret; + +fail: + Py_XDECREF(ret); + Py_XDECREF(tmp); + return NULL; +} + + +static PyObject * +extint_floordiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, c; + PY_LONG_LONG b; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + return NULL; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + c = floordiv_128_64(a, b); + return pylong_from_int128(c); +} + + +static PyObject * +extint_ceildiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *a_obj; + npy_extint128_t a, c; + PY_LONG_LONG b; + if (!PyArg_ParseTuple(args, "OL", &a_obj, &b)) { + return NULL; + } + if (b <= 0) { + PyErr_SetString(PyExc_ValueError, ""); + return NULL; + } + if (int128_from_pylong(a_obj, &a)) { + return NULL; + } + c = ceildiv_128_64(a, b); + return pylong_from_int128(c); +} + + static PyMethodDef Multiarray_TestsMethods[] = { {"IsPythonScalar", IsPythonScalar, @@ -989,6 +1593,51 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"test_nditer_too_large", test_nditer_too_large, METH_VARARGS, NULL}, + {"solve_diophantine", + (PyCFunction)array_solve_diophantine, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"internal_overlap", + (PyCFunction)array_internal_overlap, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"extint_safe_binop", + extint_safe_binop, + METH_VARARGS, NULL}, + {"extint_to_128", + extint_to_128, + METH_VARARGS, NULL}, + {"extint_to_64", + extint_to_64, + METH_VARARGS, NULL}, + {"extint_mul_64_64", + extint_mul_64_64, + METH_VARARGS, NULL}, + {"extint_add_128", + extint_add_128, + METH_VARARGS, NULL}, + {"extint_sub_128", + extint_sub_128, + METH_VARARGS, NULL}, + {"extint_neg_128", + extint_neg_128, + METH_VARARGS, NULL}, + {"extint_shl_128", + extint_shl_128, + METH_VARARGS, NULL}, + {"extint_shr_128", + extint_shr_128, + METH_VARARGS, NULL}, + {"extint_gt_128", + extint_gt_128, + METH_VARARGS, NULL}, + {"extint_divmod_128_64", + extint_divmod_128_64, + METH_VARARGS, NULL}, + {"extint_floordiv_128_64", + extint_floordiv_128_64, + METH_VARARGS, NULL}, + {"extint_ceildiv_128_64", + extint_ceildiv_128_64, + METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 04513c56c..e72c355dc 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -60,6 +60,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0; #include "vdot.h" #include "templ_common.h" /* for npy_mul_with_overflow_intp */ #include "compiled_base.h" +#include "mem_overlap.h" /* Only here for API compatibility */ NPY_NO_EXPORT PyTypeObject PyBigArray_Type; @@ -3993,30 +3994,88 @@ test_interrupt(PyObject *NPY_UNUSED(self), PyObject *args) return PyInt_FromLong(a); } + static PyObject * -array_may_share_memory(PyObject *NPY_UNUSED(ignored), PyObject *args) +array_shares_memory(PyObject *NPY_UNUSED(ignored), PyObject *args, PyObject *kwds) { PyArrayObject * self = NULL; PyArrayObject * other = NULL; - int overlap; + PyObject *max_work_obj = NULL; + static char *kwlist[] = {"self", "other", "max_work", NULL}; + + mem_overlap_t result; + static PyObject *too_hard_cls = NULL; + Py_ssize_t max_work = NPY_MAY_SHARE_EXACT; + NPY_BEGIN_THREADS_DEF; - if (!PyArg_ParseTuple(args, "O&O&", PyArray_Converter, &self, - PyArray_Converter, &other)) { + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O", kwlist, + PyArray_Converter, &self, + PyArray_Converter, &other, + &max_work_obj)) { return NULL; } - overlap = arrays_overlap(self, other); + if (max_work_obj == NULL || max_work_obj == Py_None) { + /* noop */ + } + else if (PyLong_Check(max_work_obj)) { + max_work = PyLong_AsSsize_t(max_work_obj); + } +#if !defined(NPY_PY3K) + else if (PyInt_Check(max_work_obj)) { + max_work = PyInt_AsSsize_t(max_work_obj); + } +#endif + else { + PyErr_SetString(PyExc_ValueError, "max_work must be an integer"); + goto fail; + } + + if (max_work < -2) { + PyErr_SetString(PyExc_ValueError, "Invalid value for max_work"); + goto fail; + } + + NPY_BEGIN_THREADS; + result = solve_may_share_memory(self, other, max_work); + NPY_END_THREADS; + Py_XDECREF(self); Py_XDECREF(other); - if (overlap) { + if (result == MEM_OVERLAP_NO) { + Py_RETURN_FALSE; + } + else if (result == MEM_OVERLAP_YES) { Py_RETURN_TRUE; } + else if (result == MEM_OVERLAP_OVERFLOW) { + PyErr_SetString(PyExc_OverflowError, + "Integer overflow in computing overlap"); + return NULL; + } + else if (result == MEM_OVERLAP_TOO_HARD) { + npy_cache_import("numpy.core._internal", "TooHardError", + &too_hard_cls); + if (too_hard_cls) { + PyErr_SetString(too_hard_cls, "Exceeded max_work"); + } + return NULL; + } else { - Py_RETURN_FALSE; + /* Doesn't happen usually */ + PyErr_SetString(PyExc_RuntimeError, + "Error in computing overlap"); + return NULL; } + +fail: + Py_XDECREF(self); + Py_XDECREF(other); + return NULL; } + static struct PyMethodDef array_module_methods[] = { {"_get_ndarray_c_version", (PyCFunction)array__get_ndarray_c_version, @@ -4123,9 +4182,9 @@ static struct PyMethodDef array_module_methods[] = { {"result_type", (PyCFunction)array_result_type, METH_VARARGS, NULL}, - {"may_share_memory", - (PyCFunction)array_may_share_memory, - METH_VARARGS, NULL}, + {"shares_memory", + (PyCFunction)array_shares_memory, + METH_VARARGS | METH_KEYWORDS, NULL}, /* Datetime-related functions */ {"datetime_data", (PyCFunction)array_datetime_data, @@ -4583,6 +4642,9 @@ PyMODINIT_FUNC initmultiarray(void) { ADDCONST(RAISE); ADDCONST(WRAP); ADDCONST(MAXDIMS); + + ADDCONST(MAY_SHARE_BOUNDS); + ADDCONST(MAY_SHARE_EXACT); #undef ADDCONST Py_INCREF(&PyArray_Type); diff --git a/numpy/core/src/multiarray/multiarraymodule_onefile.c b/numpy/core/src/multiarray/multiarraymodule_onefile.c index 3940d009b..3924f3cf4 100644 --- a/numpy/core/src/multiarray/multiarraymodule_onefile.c +++ b/numpy/core/src/multiarray/multiarraymodule_onefile.c @@ -53,6 +53,7 @@ #include "ucsnarrow.c" #include "arrayobject.c" #include "numpymemoryview.c" +#include "mem_overlap.c" #include "multiarraymodule.c" #include "compiled_base.c" diff --git a/numpy/core/src/private/mem_overlap.c b/numpy/core/src/private/mem_overlap.c new file mode 100644 index 000000000..3cab83497 --- /dev/null +++ b/numpy/core/src/private/mem_overlap.c @@ -0,0 +1,905 @@ +/* + Solving memory overlap integer programs and bounded Diophantine equations with + positive coefficients. + + Asking whether two strided arrays `a` and `b` overlap is equivalent to + asking whether there is a solution to the following problem:: + + sum(stride_a[i] * x_a[i] for i in range(ndim_a)) + - + sum(stride_b[i] * x_b[i] for i in range(ndim_b)) + == + base_b - base_a + + 0 <= x_a[i] < shape_a[i] + 0 <= x_b[i] < shape_b[i] + + for some integer x_a, x_b. Itemsize needs to be considered as an additional + dimension with stride 1 and size itemsize. + + Negative strides can be changed to positive (and vice versa) by changing + variables x[i] -> shape[i] - 1 - x[i], and zero strides can be dropped, so + that the problem can be recast into a bounded Diophantine equation with + positive coefficients:: + + sum(a[i] * x[i] for i in range(n)) == b + + a[i] > 0 + + 0 <= x[i] <= ub[i] + + This problem is NP-hard --- runtime of algorithms grows exponentially with + increasing ndim. + + + *Algorithm description* + + A straightforward algorithm that excludes infeasible solutions using GCD-based + pruning is outlined in Ref. [1]. It is implemented below. A number of other + algorithms exist in the literature; however, this one seems to have + performance satisfactory for the present purpose. + + The idea is that an equation:: + + a_1 x_1 + a_2 x_2 + ... + a_n x_n = b + 0 <= x_i <= ub_i, i = 1...n + + implies:: + + a_2' x_2' + a_3 x_3 + ... + a_n x_n = b + + 0 <= x_i <= ub_i, i = 2...n + + 0 <= x_1' <= c_1 ub_1 + c_2 ub_2 + + with a_2' = gcd(a_1, a_2) and x_2' = c_1 x_1 + c_2 x_2 with c_1 = (a_1/a_1'), + and c_2 = (a_2/a_1'). This procedure can be repeated to obtain:: + + a_{n-1}' x_{n-1}' + a_n x_n = b + + 0 <= x_{n-1}' <= ub_{n-1}' + + 0 <= x_n <= ub_n + + Now, one can enumerate all candidate solutions for x_n. For each, one can use + the previous-level equation to enumerate potential solutions for x_{n-1}, with + transformed right-hand side b -> b - a_n x_n. And so forth, until after n-1 + nested for loops we either arrive at a candidate solution for x_1 (in which + case we have found one solution to the problem), or find that the equations do + not allow any solutions either for x_1 or one of the intermediate x_i (in + which case we have proved there is no solution for the upper-level candidates + chosen). If no solution is found for any candidate x_n, we have proved the + problem is infeasible --- which for the memory overlap problem means there is + no overlap. + + + *Performance* + + Some common ndarray cases are easy for the algorithm: + + - Two arrays whose memory ranges do not overlap. + + These will be excluded by the bounds on x_n, with max_work=1. We also add + this check as a fast path, to avoid computing GCDs needlessly, as this can + take some time. + + - Arrays produced by continuous slicing of a continuous parent array (no + internal overlap), e.g., a=x[:,0,:], b=x[:,1,:]. The strides taken together, + mapped positive, and duplicates then satisfy gcd(stride[0], .., stride[j]) = + stride[j] for some ordering. + + In this case, for each x[i] at most one candidate exists, given that the + algorithm runs with strides sorted from largest to smallest. The problem can + be written as:: + + sum a_j x_j ?= b = sum a_j z_j + + a_j = n_{j+1} * n_{j+2} * ... * n_d, a_d = 1 + 0 <= x_j <= u_j <= 2*n_j - 2 + 0 <= z_j <= n_j - 1 + + b is the offset of the last element of the second array from the start of + the first. z_j are uniquely determined because of the gcd property. For + each x_j, the bounds at first sight allow x_j=z_j and x_j=z_j+n_j. However, + u_j <= n_j - 1 + z_j, so that at most one candidate is left. + + - Two arrays with stride-incommensurate starting points. For example, + a=x[:,::2], b=x[:,1::2]. + + The base address difference is incommensurate with all strides, so that + there are no solution candidates to consider. For itemsize != 1, similar + result is obtained for x_{n-1}. + + The above cases cover arrays produced by typical slicing of well-behaved + parent arrays. More generally, more difficult cases can result:: + + x = np.arange(4*20).reshape(4, 20).astype(np.int8) + a = x[:,::7] + b = x[:,3::3] + + <=> + + 20*x1 + 7*x2 + 3*x3 = 78 (= 3 + 3*20 + 5*3) + 0 <= x1 <= 6, 0 <= x2 <= 2, 0 <= x3 <= 5 + + Non-overlapping in this case relies on x.shape[1] <= lcm(7, 3) = 21. However, + elimination of x1 does not restrict candidate values for x3, so the algorithm + ends up considering all values x3=0...5 separately. + + The upper bound for work done is prod(shape_a)*prod(shape_b), which scales + faster than than work done by binary ufuncs, after broadcasting, + prod(shape_a). The bound may be loose, but it is possible to construct hard + instances where ufunc is faster (adapted from [2,3]):: + + from numpy.lib.stride_tricks import as_strided + # Construct non-overlapping x1 and x2 + x = np.zeros([192163377], dtype=np.int8) + x1 = as_strided(x, strides=(36674, 61119, 85569), shape=(1049, 1049, 1049)) + x2 = as_strided(x[64023025:], strides=(12223, 12224, 1), shape=(1049, 1049, 1)) + + To avoid such worst cases, the amount of work done needs to be capped. If the + overlap problem is related to ufuncs, one suitable cap choice is to scale + max_work with the number of elements of the array. (Ref. [3] describes a more + efficient algorithm for solving problems similar to the above --- however, + also it must scale exponentially.) + + + *Integer overflows* + + The algorithm is written in fixed-width integers, and can terminate with + failure if integer overflow is detected (the implementation catches all + cases). Potential failure modes: + + - Array extent sum(stride*(shape-1)) is too large (for int64). + + - Minimal solutions to a_i x_i + a_j x_j == b are too large, + in some of the intermediate equations. + + We do this part of the computation in 128-bit integers. + + In general, overflows are expected only if array size is close to + NPY_INT64_MAX, requiring ~exabyte size arrays, which is usually not possible. + + References + ---------- + .. [1] P. Ramachandran, ''Use of Extended Euclidean Algorithm in Solving + a System of Linear Diophantine Equations with Bounded Variables''. + Algorithmic Number Theory, Lecture Notes in Computer Science **4076**, + 182-192 (2006). doi:10.1007/11792086_14 + + .. [2] Cornuejols, Urbaniak, Weismantel, and Wolsey, + ''Decomposition of integer programs and of generating sets.'', + Lecture Notes in Computer Science 1284, 92-103 (1997). + + .. [3] K. Aardal, A.K. Lenstra, + ''Hard equality constrained integer knapsacks'', + Lecture Notes in Computer Science 2337, 350-366 (2002). +*/ + +/* + Copyright (c) 2015 Pauli Virtanen + All rights reserved. + Licensed under 3-clause BSD license, see LICENSE.txt. +*/ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <Python.h> + +#define NPY_NO_DEPRECATED_API NPY_API_VERSION +#include "numpy/ndarraytypes.h" +#include "mem_overlap.h" +#include "npy_extint128.h" + + +#define MAX(a, b) (((a) >= (b)) ? (a) : (b)) +#define MIN(a, b) (((a) <= (b)) ? (a) : (b)) + + +/** + * Euclid's algorithm for GCD. + * + * Solves for gamma*a1 + epsilon*a2 == gcd(a1, a2) + * providing |gamma| < |a2|/gcd, |epsilon| < |a1|/gcd. + */ +static void +euclid(npy_int64 a1, npy_int64 a2, npy_int64 *a_gcd, npy_int64 *gamma, npy_int64 *epsilon) +{ + npy_int64 gamma1, gamma2, epsilon1, epsilon2, r; + + assert(a1 > 0); + assert(a2 > 0); + + gamma1 = 1; + gamma2 = 0; + epsilon1 = 0; + epsilon2 = 1; + + /* The numbers remain bounded by |a1|, |a2| during + the iteration, so no integer overflows */ + while (1) { + if (a2 > 0) { + r = a1/a2; + a1 -= r*a2; + gamma1 -= r*gamma2; + epsilon1 -= r*epsilon2; + } + else { + *a_gcd = a1; + *gamma = gamma1; + *epsilon = epsilon1; + break; + } + + if (a1 > 0) { + r = a2/a1; + a2 -= r*a1; + gamma2 -= r*gamma1; + epsilon2 -= r*epsilon1; + } + else { + *a_gcd = a2; + *gamma = gamma2; + *epsilon = epsilon2; + break; + } + } +} + + +/** + * Precompute GCD and bounds transformations + */ +static int +diophantine_precompute(unsigned int n, + diophantine_term_t *E, + diophantine_term_t *Ep, + npy_int64 *Gamma, npy_int64 *Epsilon) +{ + npy_int64 a_gcd, gamma, epsilon, c1, c2; + unsigned int j; + char overflow = 0; + + assert(n >= 2); + + euclid(E[0].a, E[1].a, &a_gcd, &gamma, &epsilon); + Ep[0].a = a_gcd; + Gamma[0] = gamma; + Epsilon[0] = epsilon; + + if (n > 2) { + c1 = E[0].a / a_gcd; + c2 = E[1].a / a_gcd; + + /* Ep[0].ub = E[0].ub * c1 + E[1].ub * c2; */ + Ep[0].ub = safe_add(safe_mul(E[0].ub, c1, &overflow), + safe_mul(E[1].ub, c2, &overflow), &overflow); + if (overflow) { + return 1; + } + } + + for (j = 2; j < n; ++j) { + euclid(Ep[j-2].a, E[j].a, &a_gcd, &gamma, &epsilon); + Ep[j-1].a = a_gcd; + Gamma[j-1] = gamma; + Epsilon[j-1] = epsilon; + + if (j < n - 1) { + c1 = Ep[j-2].a / a_gcd; + c2 = E[j].a / a_gcd; + + /* Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub; */ + Ep[j-1].ub = safe_add(safe_mul(c1, Ep[j-2].ub, &overflow), + safe_mul(c2, E[j].ub, &overflow), &overflow); + + if (overflow) { + return 1; + } + } + } + + return 0; +} + + +/** + * Depth-first bounded Euclid search + */ +static mem_overlap_t +diophantine_dfs(unsigned int n, + unsigned int v, + diophantine_term_t *E, + diophantine_term_t *Ep, + npy_int64 *Gamma, npy_int64 *Epsilon, + npy_int64 b, + Py_ssize_t max_work, + int require_ub_nontrivial, + npy_int64 *x, + Py_ssize_t *count) +{ + npy_int64 a_gcd, gamma, epsilon, a1, u1, a2, u2, c, r, c1, c2, t, t_l, t_u, b2, x1, x2; + npy_extint128_t x10, x20, t_l1, t_l2, t_u1, t_u2; + mem_overlap_t res; + char overflow = 0; + + if (max_work >= 0 && *count >= max_work) { + return MEM_OVERLAP_TOO_HARD; + } + + /* Fetch precomputed values for the reduced problem */ + if (v == 1) { + a1 = E[0].a; + u1 = E[0].ub; + } + else { + a1 = Ep[v-2].a; + u1 = Ep[v-2].ub; + } + + a2 = E[v].a; + u2 = E[v].ub; + + a_gcd = Ep[v-1].a; + gamma = Gamma[v-1]; + epsilon = Epsilon[v-1]; + + /* Generate set of allowed solutions */ + c = b / a_gcd; + r = b % a_gcd; + if (r != 0) { + ++*count; + return MEM_OVERLAP_NO; + } + + c1 = a2 / a_gcd; + c2 = a1 / a_gcd; + + /* + The set to enumerate is: + x1 = gamma*c + c1*t + x2 = epsilon*c - c2*t + t integer + 0 <= x1 <= u1 + 0 <= x2 <= u2 + and we have c, c1, c2 >= 0 + */ + + x10 = mul_64_64(gamma, c); + x20 = mul_64_64(epsilon, c); + + t_l1 = ceildiv_128_64(neg_128(x10), c1); + t_l2 = ceildiv_128_64(sub_128(x20, to_128(u2), &overflow), c2); + + t_u1 = floordiv_128_64(sub_128(to_128(u1), x10, &overflow), c1); + t_u2 = floordiv_128_64(x20, c2); + + if (overflow) { + return MEM_OVERLAP_OVERFLOW; + } + + if (gt_128(t_l2, t_l1)) { + t_l1 = t_l2; + } + + if (gt_128(t_u1, t_u2)) { + t_u1 = t_u2; + } + + if (gt_128(t_l1, t_u1)) { + ++*count; + return MEM_OVERLAP_NO; + } + + t_l = to_64(t_l1, &overflow); + t_u = to_64(t_u1, &overflow); + + x10 = add_128(x10, mul_64_64(c1, t_l), &overflow); + x20 = sub_128(x20, mul_64_64(c2, t_l), &overflow); + + t_u = safe_sub(t_u, t_l, &overflow); + t_l = 0; + x1 = to_64(x10, &overflow); + x2 = to_64(x20, &overflow); + + if (overflow) { + return MEM_OVERLAP_OVERFLOW; + } + + /* The bounds t_l, t_u ensure the x computed below do not overflow */ + + if (v == 1) { + /* Base case */ + if (t_u >= t_l) { + x[0] = x1 + c1*t_l; + x[1] = x2 - c2*t_l; + if (require_ub_nontrivial) { + int j, is_ub_trivial; + + is_ub_trivial = 1; + for (j = 0; j < n; ++j) { + if (x[j] != E[j].ub/2) { + is_ub_trivial = 0; + break; + } + } + + if (is_ub_trivial) { + /* Ignore 'trivial' solution */ + ++*count; + return MEM_OVERLAP_NO; + } + } + return MEM_OVERLAP_YES; + } + ++*count; + return MEM_OVERLAP_NO; + } + else { + /* Recurse to all candidates */ + for (t = t_l; t <= t_u; ++t) { + x[v] = x2 - c2*t; + + /* b2 = b - a2*x[v]; */ + b2 = safe_sub(b, safe_mul(a2, x[v], &overflow), &overflow); + if (overflow) { + return MEM_OVERLAP_OVERFLOW; + } + + res = diophantine_dfs(n, v-1, E, Ep, Gamma, Epsilon, + b2, max_work, require_ub_nontrivial, + x, count); + if (res != MEM_OVERLAP_NO) { + return res; + } + } + ++*count; + return MEM_OVERLAP_NO; + } +} + + +/** + * Solve bounded Diophantine equation + * + * The problem considered is:: + * + * A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b + * 0 <= x[i] <= U[i] + * A[i] > 0 + * + * Solve via depth-first Euclid's algorithm, as explained in [1]. + * + * If require_ub_nontrivial!=0, look for solutions to the problem + * where b = A[0]*(U[0]/2) + ... + A[n]*(U[n-1]/2) but ignoring + * the trivial solution x[i] = U[i]/2. All U[i] must be divisible by 2. + * The value given for `b` is ignored in this case. + */ +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_diophantine(unsigned int n, diophantine_term_t *E, npy_int64 b, + Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x) +{ + unsigned int j; + + for (j = 0; j < n; ++j) { + if (E[j].a <= 0) { + return MEM_OVERLAP_ERROR; + } + else if (E[j].ub < 0) { + return MEM_OVERLAP_NO; + } + } + + if (require_ub_nontrivial) { + npy_int64 ub_sum = 0; + char overflow = 0; + for (j = 0; j < n; ++j) { + if (E[j].ub % 2 != 0) { + return MEM_OVERLAP_ERROR; + } + ub_sum = safe_add(ub_sum, + safe_mul(E[j].a, E[j].ub/2, &overflow), + &overflow); + } + if (overflow) { + return MEM_OVERLAP_ERROR; + } + b = ub_sum; + } + + if (b < 0) { + return MEM_OVERLAP_NO; + } + + if (n == 0) { + if (require_ub_nontrivial) { + /* Only trivial solution for 0-variable problem */ + return MEM_OVERLAP_NO; + } + if (b == 0) { + return MEM_OVERLAP_YES; + } + return MEM_OVERLAP_NO; + } + else if (n == 1) { + if (require_ub_nontrivial) { + /* Only trivial solution for 1-variable problem */ + return MEM_OVERLAP_NO; + } + if (b % E[0].a == 0) { + x[0] = b / E[0].a; + if (x[0] >= 0 && x[0] <= E[0].ub) { + return MEM_OVERLAP_YES; + } + } + return MEM_OVERLAP_NO; + } + else { + diophantine_term_t Ep[n]; + npy_int64 Epsilon[n], Gamma[n]; + Py_ssize_t count = 0; + + if (diophantine_precompute(n, E, Ep, Gamma, Epsilon)) { + return MEM_OVERLAP_OVERFLOW; + } + return diophantine_dfs(n, n-1, E, Ep, Gamma, Epsilon, b, max_work, + require_ub_nontrivial, x, &count); + } +} + + +static int +diophantine_sort_A(const void *xp, const void *yp) +{ + npy_int64 xa = ((diophantine_term_t*)xp)->a; + npy_int64 ya = ((diophantine_term_t*)yp)->a; + + if (xa < ya) { + return 1; + } + else if (ya < xa) { + return -1; + } + else { + return 0; + } +} + + +/** + * Simplify Diophantine decision problem. + * + * Combine identical coefficients, remove unnecessary variables, and trim + * bounds. + * + * The feasible/infeasible decision result is retained. + * + * Returns: 0 (success), -1 (integer overflow). + */ +NPY_VISIBILITY_HIDDEN int +diophantine_simplify(unsigned int *n, diophantine_term_t *E, npy_int64 b) +{ + unsigned int i, j, m; + char overflow = 0; + + /* Skip obviously infeasible cases */ + for (j = 0; j < *n; ++j) { + if (E[j].ub < 0) { + return 0; + } + } + + if (b < 0) { + return 0; + } + + /* Sort vs. coefficients */ + qsort(E, *n, sizeof(diophantine_term_t), diophantine_sort_A); + + /* Combine identical coefficients */ + m = *n; + i = 0; + for (j = 1; j < m; ++j) { + if (E[i].a == E[j].a) { + E[i].ub = safe_add(E[i].ub, E[j].ub, &overflow); + --*n; + } + else { + ++i; + if (i != j) { + E[i] = E[j]; + } + } + } + + /* Trim bounds and remove unnecessary variables */ + m = *n; + i = 0; + for (j = 0; j < m; ++j) { + E[j].ub = MIN(E[j].ub, b / E[j].a); + if (E[j].ub == 0) { + /* If the problem is feasible at all, x[i]=0 */ + --*n; + } + else { + if (i != j) { + E[i] = E[j]; + } + ++i; + } + } + + if (overflow) { + return -1; + } + else { + return 0; + } +} + + +/* Gets a half-open range [start, end) of offsets from the data pointer */ +NPY_VISIBILITY_HIDDEN void +offset_bounds_from_strides(const int itemsize, const int nd, + const npy_intp *dims, const npy_intp *strides, + npy_intp *lower_offset, npy_intp *upper_offset) +{ + npy_intp max_axis_offset; + npy_intp lower = 0; + npy_intp upper = 0; + int i; + + for (i = 0; i < nd; i++) { + if (dims[i] == 0) { + /* If the array size is zero, return an empty range */ + *lower_offset = 0; + *upper_offset = 0; + return; + } + /* Expand either upwards or downwards depending on stride */ + max_axis_offset = strides[i] * (dims[i] - 1); + if (max_axis_offset > 0) { + upper += max_axis_offset; + } + else { + lower += max_axis_offset; + } + } + /* Return a half-open range */ + upper += itemsize; + *lower_offset = lower; + *upper_offset = upper; +} + + +/* Gets a half-open range [start, end) which contains the array data */ +static void +get_array_memory_extents(PyArrayObject *arr, + npy_uintp *out_start, npy_uintp *out_end, + npy_uintp *num_bytes) +{ + npy_intp low, upper; + int j; + offset_bounds_from_strides(PyArray_ITEMSIZE(arr), PyArray_NDIM(arr), + PyArray_DIMS(arr), PyArray_STRIDES(arr), + &low, &upper); + *out_start = (npy_uintp)PyArray_DATA(arr) + (npy_uintp)low; + *out_end = (npy_uintp)PyArray_DATA(arr) + (npy_uintp)upper; + + *num_bytes = PyArray_ITEMSIZE(arr); + for (j = 0; j < PyArray_NDIM(arr); ++j) { + *num_bytes *= PyArray_DIM(arr, j); + } +} + + +static int +strides_to_terms(PyArrayObject *arr, diophantine_term_t *terms, + unsigned int *nterms, int skip_empty) +{ + unsigned int i; + + for (i = 0; i < PyArray_NDIM(arr); ++i) { + if (skip_empty) { + if (PyArray_DIM(arr, i) <= 1 || PyArray_STRIDE(arr, i) == 0) { + continue; + } + } + + terms[*nterms].a = PyArray_STRIDE(arr, i); + + if (terms[*nterms].a < 0) { + terms[*nterms].a = -terms[*nterms].a; + } + + if (terms[*nterms].a < 0) { + /* integer overflow */ + return 1; + } + + terms[*nterms].ub = PyArray_DIM(arr, i) - 1; + ++*nterms; + } + + return 0; +} + + +/** + * Determine whether two arrays share some memory. + * + * Returns: 0 (no shared memory), 1 (shared memory), or < 0 (failed to solve). + * + * Note that failures to solve can occur due to integer overflows, or effort + * required solving the problem exceeding max_work. The general problem is + * NP-hard and worst case runtime is exponential in the number of dimensions. + * max_work controls the amount of work done, either exact (max_work == -1), only + * a simple memory extent check (max_work == 0), or set an upper bound + * max_work > 0 for the number of solution candidates considered. + */ +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_may_share_memory(PyArrayObject *a, PyArrayObject *b, + Py_ssize_t max_work) +{ + npy_int64 rhs; + diophantine_term_t terms[2*NPY_MAXDIMS+2]; + npy_uintp start1 = 0, start2 = 0, end1 = 0, end2 = 0, size1 = 0, size2 = 0; + npy_int64 x[2*NPY_MAXDIMS+2]; + unsigned int nterms; + + get_array_memory_extents(a, &start1, &end1, &size1); + get_array_memory_extents(b, &start2, &end2, &size2); + + if (!(start1 < end2 && start2 < end1 && start1 < end1 && start2 < end2)) { + /* Memory extents don't overlap */ + return MEM_OVERLAP_NO; + } + + if (max_work == 0) { + /* Too much work required, give up */ + return MEM_OVERLAP_TOO_HARD; + } + + /* Convert problem to Diophantine equation form with positive coefficients. + The bounds computed by offset_bounds_from_strides correspond to + all-positive strides. + + start1 + sum(abs(stride1)*x1) + == start2 + sum(abs(stride2)*x2) + == end1 - 1 - sum(abs(stride1)*x1') + == end2 - 1 - sum(abs(stride2)*x2') + + <=> + + sum(abs(stride1)*x1) + sum(abs(stride2)*x2') + == end2 - 1 - start1 + + OR + + sum(abs(stride1)*x1') + sum(abs(stride2)*x2) + == end1 - 1 - start2 + + We pick the problem with the smaller RHS (they are non-negative due to + the extent check above.) + */ + + rhs = MIN(end2 - 1 - start1, end1 - 1 - start2); + + if (rhs != (npy_uintp)rhs) { + /* Integer overflow */ + return MEM_OVERLAP_OVERFLOW; + } + + nterms = 0; + if (strides_to_terms(a, terms, &nterms, 1)) { + return MEM_OVERLAP_OVERFLOW; + } + if (strides_to_terms(b, terms, &nterms, 1)) { + return MEM_OVERLAP_OVERFLOW; + } + if (PyArray_ITEMSIZE(a) > 1) { + terms[nterms].a = 1; + terms[nterms].ub = PyArray_ITEMSIZE(a) - 1; + ++nterms; + } + if (PyArray_ITEMSIZE(b) > 1) { + terms[nterms].a = 1; + terms[nterms].ub = PyArray_ITEMSIZE(b) - 1; + ++nterms; + } + + /* Simplify, if possible */ + if (diophantine_simplify(&nterms, terms, rhs)) { + /* Integer overflow */ + return MEM_OVERLAP_OVERFLOW; + } + + /* Solve */ + return solve_diophantine(nterms, terms, rhs, max_work, 0, x); +} + + +/** + * Determine whether an array has internal overlap. + * + * Returns: 0 (no overlap), 1 (overlap), or < 0 (failed to solve). + * + * max_work and reasons for solver failures are as in solve_may_share_memory. + */ +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_may_have_internal_overlap(PyArrayObject *a, Py_ssize_t max_work) +{ + diophantine_term_t terms[NPY_MAXDIMS+1]; + npy_int64 x[NPY_MAXDIMS+1]; + unsigned int nterms; + int i, j; + + if (PyArray_ISCONTIGUOUS(a)) { + /* Quick case */ + return MEM_OVERLAP_NO; + } + + /* The internal memory overlap problem is looking for two different + solutions to + + sum(a*x) = b, 0 <= x[i] <= ub[i] + + for any b. Equivalently, + + sum(a*x0) - sum(a*x1) = 0 + + Mapping the coefficients on the left by x0'[i] = x0[i] if a[i] > 0 + else ub[i]-x0[i] and opposite for x1, we have + + sum(abs(a)*(x0' + x1')) = sum(abs(a)*ub) + + Now, x0!=x1 if for some i we have x0'[i] + x1'[i] != ub[i]. + We can now change variables to z[i] = x0'[i] + x1'[i] so the problem + becomes + + sum(abs(a)*z) = sum(abs(a)*ub), 0 <= z[i] <= 2*ub[i], z != ub + + This can be solved with solve_diophantine. + */ + + nterms = 0; + if (strides_to_terms(a, terms, &nterms, 0)) { + return MEM_OVERLAP_OVERFLOW; + } + if (PyArray_ITEMSIZE(a) > 1) { + terms[nterms].a = 1; + terms[nterms].ub = PyArray_ITEMSIZE(a) - 1; + ++nterms; + } + + /* Get rid of zero coefficients and empty terms */ + i = 0; + for (j = 0; j < nterms; ++j) { + if (terms[j].ub == 0) { + continue; + } + else if (terms[j].ub < 0) { + return MEM_OVERLAP_NO; + } + else if (terms[j].a == 0) { + return MEM_OVERLAP_YES; + } + if (i != j) { + terms[i] = terms[j]; + } + ++i; + } + nterms = i; + + /* Double bounds to get the internal overlap problem */ + for (j = 0; j < nterms; ++j) { + terms[j].ub *= 2; + } + + /* Sort vs. coefficients; cannot call diophantine_simplify because it may + change the decision problem inequality part */ + qsort(terms, nterms, sizeof(diophantine_term_t), diophantine_sort_A); + + /* Solve */ + return solve_diophantine(nterms, terms, -1, max_work, 1, x); +} diff --git a/numpy/core/src/private/mem_overlap.h b/numpy/core/src/private/mem_overlap.h new file mode 100644 index 000000000..8044f1663 --- /dev/null +++ b/numpy/core/src/private/mem_overlap.h @@ -0,0 +1,50 @@ +#ifndef MEM_OVERLAP_H_ +#define MEM_OVERLAP_H_ + +#include "npy_config.h" +#include "numpy/ndarraytypes.h" + + +/* Bounds check only */ +#define NPY_MAY_SHARE_BOUNDS 0 + +/* Exact solution */ +#define NPY_MAY_SHARE_EXACT -1 + + +typedef enum { + MEM_OVERLAP_NO = 0, /* no solution exists */ + MEM_OVERLAP_YES = 1, /* solution found */ + MEM_OVERLAP_TOO_HARD = -1, /* max_work exceeded */ + MEM_OVERLAP_OVERFLOW = -2, /* algorithm failed due to integer overflow */ + MEM_OVERLAP_ERROR = -3 /* invalid input */ +} mem_overlap_t; + + +typedef struct { + npy_int64 a; + npy_int64 ub; +} diophantine_term_t; + +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_diophantine(unsigned int n, diophantine_term_t *E, + npy_int64 b, Py_ssize_t max_work, int require_nontrivial, + npy_int64 *x); + +NPY_VISIBILITY_HIDDEN int +diophantine_simplify(unsigned int *n, diophantine_term_t *E, npy_int64 b); + +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_may_share_memory(PyArrayObject *a, PyArrayObject *b, + Py_ssize_t max_work); + +NPY_VISIBILITY_HIDDEN mem_overlap_t +solve_may_have_internal_overlap(PyArrayObject *a, Py_ssize_t max_work); + +NPY_VISIBILITY_HIDDEN void +offset_bounds_from_strides(const int itemsize, const int nd, + const npy_intp *dims, const npy_intp *strides, + npy_intp *lower_offset, npy_intp *upper_offset); + +#endif + diff --git a/numpy/core/src/private/npy_extint128.h b/numpy/core/src/private/npy_extint128.h new file mode 100644 index 000000000..6a35e736f --- /dev/null +++ b/numpy/core/src/private/npy_extint128.h @@ -0,0 +1,317 @@ +#ifndef NPY_EXTINT128_H_ +#define NPY_EXTINT128_H_ + + +typedef struct { + char sign; + npy_uint64 lo, hi; +} npy_extint128_t; + + +/* Integer addition with overflow checking */ +static NPY_INLINE npy_int64 +safe_add(npy_int64 a, npy_int64 b, char *overflow_flag) +{ + if (a > 0 && b > NPY_MAX_INT64 - a) { + *overflow_flag = 1; + } + else if (a < 0 && b < NPY_MIN_INT64 - a) { + *overflow_flag = 1; + } + return a + b; +} + + +/* Integer subtraction with overflow checking */ +static NPY_INLINE npy_int64 +safe_sub(npy_int64 a, npy_int64 b, char *overflow_flag) +{ + if (a >= 0 && b < a - NPY_MAX_INT64) { + *overflow_flag = 1; + } + else if (a < 0 && b > a - NPY_MIN_INT64) { + *overflow_flag = 1; + } + return a - b; +} + + +/* Integer multiplication with overflow checking */ +static NPY_INLINE npy_int64 +safe_mul(npy_int64 a, npy_int64 b, char *overflow_flag) +{ + if (a > 0) { + if (b > NPY_MAX_INT64 / a || b < NPY_MIN_INT64 / a) { + *overflow_flag = 1; + } + } + else if (a < 0) { + if (b > 0 && a < NPY_MIN_INT64 / b) { + *overflow_flag = 1; + } + else if (b < 0 && a < NPY_MAX_INT64 / b) { + *overflow_flag = 1; + } + } + return a * b; +} + + +/* Long integer init */ +static NPY_INLINE npy_extint128_t +to_128(npy_int64 x) +{ + npy_extint128_t result; + result.sign = (x >= 0 ? 1 : -1); + if (x >= 0) { + result.lo = x; + } + else { + result.lo = (npy_uint64)(-(x + 1)) + 1; + } + result.hi = 0; + return result; +} + + +static NPY_INLINE npy_int64 +to_64(npy_extint128_t x, char *overflow) +{ + if (x.hi != 0 || + (x.sign > 0 && x.lo > NPY_MAX_INT64) || + (x.sign < 0 && x.lo != 0 && x.lo - 1 > -(NPY_MIN_INT64 + 1))) { + *overflow = 1; + } + return x.lo * x.sign; +} + + +/* Long integer multiply */ +static NPY_INLINE npy_extint128_t +mul_64_64(npy_int64 a, npy_int64 b) +{ + npy_extint128_t x, y, z; + npy_uint64 x1, x2, y1, y2, r1, r2, prev; + + x = to_128(a); + y = to_128(b); + + x1 = x.lo & 0xffffffff; + x2 = x.lo >> 32; + + y1 = y.lo & 0xffffffff; + y2 = y.lo >> 32; + + r1 = x1*y2; + r2 = x2*y1; + + z.sign = x.sign * y.sign; + z.hi = x2*y2 + (r1 >> 32) + (r2 >> 32); + z.lo = x1*y1; + + /* Add with carry */ + prev = z.lo; + z.lo += (r1 << 32); + if (z.lo < prev) { + ++z.hi; + } + + prev = z.lo; + z.lo += (r2 << 32); + if (z.lo < prev) { + ++z.hi; + } + + return z; +} + + +/* Long integer add */ +static NPY_INLINE npy_extint128_t +add_128(npy_extint128_t x, npy_extint128_t y, char *overflow) +{ + npy_extint128_t z; + + if (x.sign == y.sign) { + z.sign = x.sign; + z.hi = x.hi + y.hi; + if (z.hi < x.hi) { + *overflow = 1; + } + z.lo = x.lo + y.lo; + if (z.lo < x.lo) { + if (z.hi == NPY_MAX_UINT64) { + *overflow = 1; + } + ++z.hi; + } + } + else if (x.hi > y.hi || (x.hi == y.hi && x.lo >= y.lo)) { + z.sign = x.sign; + z.hi = x.hi - y.hi; + z.lo = x.lo; + z.lo -= y.lo; + if (z.lo > x.lo) { + --z.hi; + } + } + else { + z.sign = y.sign; + z.hi = y.hi - x.hi; + z.lo = y.lo; + z.lo -= x.lo; + if (z.lo > y.lo) { + --z.hi; + } + } + + return z; +} + + +/* Long integer negation */ +static NPY_INLINE npy_extint128_t +neg_128(npy_extint128_t x) +{ + npy_extint128_t z = x; + z.sign *= -1; + return z; +} + + +static NPY_INLINE npy_extint128_t +sub_128(npy_extint128_t x, npy_extint128_t y, char *overflow) +{ + return add_128(x, neg_128(y), overflow); +} + + +static NPY_INLINE npy_extint128_t +shl_128(npy_extint128_t v) +{ + npy_extint128_t z; + z = v; + z.hi <<= 1; + z.hi |= (z.lo & (((npy_uint64)1) << 63)) >> 63; + z.lo <<= 1; + return z; +} + + +static NPY_INLINE npy_extint128_t +shr_128(npy_extint128_t v) +{ + npy_extint128_t z; + z = v; + z.lo >>= 1; + z.lo |= (z.hi & 0x1) << 63; + z.hi >>= 1; + return z; +} + +static NPY_INLINE int +gt_128(npy_extint128_t a, npy_extint128_t b) +{ + if (a.sign > 0 && b.sign > 0) { + return (a.hi > b.hi) || (a.hi == b.hi && a.lo > b.lo); + } + else if (a.sign < 0 && b.sign < 0) { + return (a.hi < b.hi) || (a.hi == b.hi && a.lo < b.lo); + } + else if (a.sign > 0 && b.sign < 0) { + return a.hi != 0 || a.lo != 0 || b.hi != 0 || b.lo != 0; + } + else { + return 0; + } +} + + +/* Long integer divide */ +static NPY_INLINE npy_extint128_t +divmod_128_64(npy_extint128_t x, npy_int64 b, npy_int64 *mod) +{ + npy_extint128_t remainder, pointer, result, divisor; + char overflow = 0; + + assert(b > 0); + + if (b <= 1 || x.hi == 0) { + result.sign = x.sign; + result.lo = x.lo / b; + result.hi = x.hi / b; + *mod = x.sign * (x.lo % b); + return result; + } + + /* Long division, not the most efficient choice */ + remainder = x; + remainder.sign = 1; + + divisor.sign = 1; + divisor.hi = 0; + divisor.lo = b; + + result.sign = 1; + result.lo = 0; + result.hi = 0; + + pointer.sign = 1; + pointer.lo = 1; + pointer.hi = 0; + + while ((divisor.hi & (((npy_uint64)1) << 63)) == 0 && + gt_128(remainder, divisor)) { + divisor = shl_128(divisor); + pointer = shl_128(pointer); + } + + while (pointer.lo || pointer.hi) { + if (!gt_128(divisor, remainder)) { + remainder = sub_128(remainder, divisor, &overflow); + result = add_128(result, pointer, &overflow); + } + divisor = shr_128(divisor); + pointer = shr_128(pointer); + } + + /* Fix signs and return; cannot overflow */ + result.sign = x.sign; + *mod = x.sign * remainder.lo; + + return result; +} + + +/* Divide and round down (positive divisor; no overflows) */ +static NPY_INLINE npy_extint128_t +floordiv_128_64(npy_extint128_t a, npy_int64 b) +{ + npy_extint128_t result; + npy_int64 remainder; + char overflow = 0; + assert(b > 0); + result = divmod_128_64(a, b, &remainder); + if (a.sign < 0 && remainder != 0) { + result = sub_128(result, to_128(1), &overflow); + } + return result; +} + + +/* Divide and round up (positive divisor; no overflows) */ +static NPY_INLINE npy_extint128_t +ceildiv_128_64(npy_extint128_t a, npy_int64 b) +{ + npy_extint128_t result; + npy_int64 remainder; + char overflow = 0; + assert(b > 0); + result = divmod_128_64(a, b, &remainder); + if (a.sign > 0 && remainder != 0) { + result = add_128(result, to_128(1), &overflow); + } + return result; +} + +#endif diff --git a/numpy/core/tests/test_extint128.py b/numpy/core/tests/test_extint128.py new file mode 100644 index 000000000..2afae2f6b --- /dev/null +++ b/numpy/core/tests/test_extint128.py @@ -0,0 +1,225 @@ +from __future__ import division, absolute_import, print_function + +import sys +import itertools +import contextlib +import operator + +import numpy as np +import numpy.core.multiarray_tests as mt +from numpy.compat import long + +from numpy.testing import assert_raises, assert_equal + + +INT64_MAX = np.iinfo(np.int64).max +INT64_MIN = np.iinfo(np.int64).min +INT64_MID = 2**32 + +# int128 is not two's complement, the sign bit is separate +INT128_MAX = 2**128 - 1 +INT128_MIN = -INT128_MAX +INT128_MID = 2**64 + +INT64_VALUES = ( + [INT64_MIN + j for j in range(20)] + + [INT64_MAX - j for j in range(20)] + + [INT64_MID + j for j in range(-20, 20)] + + [2*INT64_MID + j for j in range(-20, 20)] + + [INT64_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) +) + +INT128_VALUES = ( + [INT128_MIN + j for j in range(20)] + + [INT128_MAX - j for j in range(20)] + + [INT128_MID + j for j in range(-20, 20)] + + [2*INT128_MID + j for j in range(-20, 20)] + + [INT128_MID//2 + j for j in range(-20, 20)] + + list(range(-70, 70)) + + [False] # negative zero +) + +INT64_POS_VALUES = [x for x in INT64_VALUES if x > 0] + + +@contextlib.contextmanager +def exc_iter(*args): + """ + Iterate over Cartesian product of *args, and if an exception is raised, + add information of the current iterate. + """ + + value = [None] + + def iterate(): + for v in itertools.product(*args): + value[0] = v + yield v + + try: + yield iterate() + except: + import traceback + msg = "At: %r\n%s" % (repr(value[0]), + traceback.format_exc()) + raise AssertionError(msg) + + +def test_safe_binop(): + # Test checked arithmetic routines + + ops = [ + (operator.add, 1), + (operator.sub, 2), + (operator.mul, 3) + ] + + with exc_iter(ops, INT64_VALUES, INT64_VALUES) as it: + for xop, a, b in it: + pyop, op = xop + c = pyop(a, b) + + if not (INT64_MIN <= c <= INT64_MAX): + assert_raises(OverflowError, mt.extint_safe_binop, a, b, op) + else: + d = mt.extint_safe_binop(a, b, op) + if c != d: + # assert_equal is slow + assert_equal(d, c) + + +def test_to_128(): + with exc_iter(INT64_VALUES) as it: + for a, in it: + b = mt.extint_to_128(a) + if a != b: + assert_equal(b, a) + + +def test_to_64(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if not (INT64_MIN <= a <= INT64_MAX): + assert_raises(OverflowError, mt.extint_to_64, a) + else: + b = mt.extint_to_64(a) + if a != b: + assert_equal(b, a) + + +def test_mul_64_64(): + with exc_iter(INT64_VALUES, INT64_VALUES) as it: + for a, b in it: + c = a * b + d = mt.extint_mul_64_64(a, b) + if c != d: + assert_equal(d, c) + + +def test_add_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a + b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_add_128, a, b) + else: + d = mt.extint_add_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_sub_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a - b + if not (INT128_MIN <= c <= INT128_MAX): + assert_raises(OverflowError, mt.extint_sub_128, a, b) + else: + d = mt.extint_sub_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_neg_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + b = -a + c = mt.extint_neg_128(a) + if b != c: + assert_equal(c, b) + + +def test_shl_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -(((-a) << 1) & (2**128-1)) + else: + b = (a << 1) & (2**128-1) + c = mt.extint_shl_128(a) + if b != c: + assert_equal(c, b) + + +def test_shr_128(): + with exc_iter(INT128_VALUES) as it: + for a, in it: + if a < 0: + b = -((-a) >> 1) + else: + b = a >> 1 + c = mt.extint_shr_128(a) + if b != c: + assert_equal(c, b) + + +def test_gt_128(): + with exc_iter(INT128_VALUES, INT128_VALUES) as it: + for a, b in it: + c = a > b + d = mt.extint_gt_128(a, b) + if c != d: + assert_equal(d, c) + + +def test_divmod_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + if a >= 0: + c, cr = divmod(a, b) + else: + c, cr = divmod(-a, b) + c = -c + cr = -cr + + d, dr = mt.extint_divmod_128_64(a, b) + + if c != d or d != dr or b*d + dr != a: + assert_equal(d, c) + assert_equal(dr, cr) + assert_equal(b*d + dr, a) + + +def test_floordiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = a // b + d = mt.extint_floordiv_128_64(a, b) + + if c != d: + assert_equal(d, c) + + +def test_ceildiv_128_64(): + with exc_iter(INT128_VALUES, INT64_POS_VALUES) as it: + for a, b in it: + c = (a + b - 1) // b + d = mt.extint_ceildiv_128_64(a, b) + + if c != d: + assert_equal(d, c) + + +if __name__ == "__main__": + run_module_suite() diff --git a/numpy/core/tests/test_mem_overlap.py b/numpy/core/tests/test_mem_overlap.py new file mode 100644 index 000000000..728cc675d --- /dev/null +++ b/numpy/core/tests/test_mem_overlap.py @@ -0,0 +1,486 @@ +from __future__ import division, absolute_import, print_function + +import sys +import itertools + +import numpy as np +from numpy.testing import run_module_suite, assert_, assert_raises, assert_equal + +from numpy.core.multiarray_tests import solve_diophantine, internal_overlap +from numpy.lib.stride_tricks import as_strided +from numpy.compat import long + +if sys.version_info[0] >= 3: + xrange = range + + +ndims = 2 +size = 10 +shape = tuple([size] * ndims) + +MAY_SHARE_BOUNDS = 0 +MAY_SHARE_EXACT = -1 + + +def _indices_for_nelems(nelems): + """Returns slices of length nelems, from start onwards, in direction sign.""" + + if nelems == 0: + return [size // 2] # int index + + res = [] + for step in (1, 2): + for sign in (-1, 1): + start = size // 2 - nelems * step * sign // 2 + stop = start + nelems * step * sign + res.append(slice(start, stop, step * sign)) + + return res + + +def _indices_for_axis(): + """Returns (src, dst) pairs of indices.""" + + res = [] + for nelems in (0, 2, 3): + ind = _indices_for_nelems(nelems) + + # no itertools.product available in Py2.4 + res.extend([(a, b) for a in ind for b in ind]) # all assignments of size "nelems" + + return res + + +def _indices(ndims): + """Returns ((axis0_src, axis0_dst), (axis1_src, axis1_dst), ... ) index pairs.""" + + ind = _indices_for_axis() + + # no itertools.product available in Py2.4 + + res = [[]] + for i in range(ndims): + newres = [] + for elem in ind: + for others in res: + newres.append([elem] + others) + res = newres + + return res + + +def _check_assignment(srcidx, dstidx): + """Check assignment arr[dstidx] = arr[srcidx] works.""" + + arr = np.arange(np.product(shape)).reshape(shape) + + cpy = arr.copy() + + cpy[dstidx] = arr[srcidx] + arr[dstidx] = arr[srcidx] + + assert np.all(arr == cpy), 'assigning arr[%s] = arr[%s]' % (dstidx, srcidx) + + +def test_overlapping_assignments(): + """Test automatically generated assignments which overlap in memory.""" + + inds = _indices(ndims) + + for ind in inds: + srcidx = tuple([a[0] for a in ind]) + dstidx = tuple([a[1] for a in ind]) + + yield _check_assignment, srcidx, dstidx + + +def test_diophantine_fuzz(): + # Fuzz test the diophantine solver + rng = np.random.RandomState(1234) + + max_int = np.iinfo(np.intp).max + + for ndim in range(10): + feasible_count = 0 + infeasible_count = 0 + + min_count = 500//(ndim + 1) + + numbers = [] + while min(feasible_count, infeasible_count) < min_count: + # Ensure big and small integer problems + A_max = 1 + rng.randint(0, 11)**6 + U_max = rng.randint(0, 11)**6 + + A_max = min(max_int, A_max) + U_max = min(max_int-1, U_max) + + A = tuple(rng.randint(1, A_max+1) for j in range(ndim)) + U = tuple(rng.randint(0, U_max+2) for j in range(ndim)) + + b_ub = min(max_int-2, sum(a*ub for a, ub in zip(A, U))) + b = rng.randint(-1, b_ub+2) + + if ndim == 0 and feasible_count < min_count: + b = 0 + + X = solve_diophantine(A, U, b) + + if X is None: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert X_simplified is None, (A, U, b, X_simplified) + + # Check no solution exists (provided the problem is + # small enough so that brute force checking doesn't + # take too long) + try: + ranges = tuple(xrange(0, a*ub+1, a) for a, ub in zip(A, U)) + except OverflowError: + # xrange on 32-bit Python 2 may overflow + continue + + size = 1 + for r in ranges: + size *= len(r) + if size < 100000: + assert_(not any(sum(w) == b for w in itertools.product(*ranges))) + infeasible_count += 1 + else: + # Check the simplified decision problem agrees + X_simplified = solve_diophantine(A, U, b, simplify=1) + assert X_simplified is not None, (A, U, b, X_simplified) + + # Check validity + assert_(sum(a*x for a, x in zip(A, X)) == b) + assert_(all(0 <= x <= ub for x, ub in zip(X, U))) + feasible_count += 1 + + +def test_diophantine_overflow(): + # Smoke test integer overflow detection + max_intp = np.iinfo(np.intp).max + max_int64 = np.iinfo(np.int64).max + + if max_int64 <= max_intp: + # Check that the algorithm works internally in 128-bit; + # solving this problem requires large intermediate numbers + A = (max_int64//2, max_int64//2 - 10) + U = (max_int64//2, max_int64//2 - 10) + b = 2*(max_int64//2) - 10 + + assert_equal(solve_diophantine(A, U, b), (1, 1)) + + +def check_may_share_memory_exact(a, b): + got = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + assert_equal(np.may_share_memory(a, b), + np.may_share_memory(a, b, max_work=MAY_SHARE_BOUNDS)) + + a.fill(0) + b.fill(0) + a.fill(1) + exact = b.any() + + err_msg = "" + if got != exact: + err_msg = " " + "\n ".join([ + "base_a - base_b = %r" % (a.__array_interface__['data'][0] - b.__array_interface__['data'][0],), + "shape_a = %r" % (a.shape,), + "shape_b = %r" % (b.shape,), + "strides_a = %r" % (a.strides,), + "strides_b = %r" % (b.strides,), + "size_a = %r" % (a.size,), + "size_b = %r" % (b.size,) + ]) + + assert_equal(got, exact, err_msg=err_msg) + + +def test_may_share_memory_manual(): + # Manual test cases for may_share_memory + + # Base arrays + xs0 = [ + np.zeros([13, 21, 23, 22], dtype=np.int8), + np.zeros([13, 21, 23*2, 22], dtype=np.int8)[:,:,::2,:] + ] + + # Generate all negative stride combinations + xs = [] + for x in xs0: + for ss in itertools.product(*(([slice(None), slice(None, None, -1)],)*4)): + xp = x[ss] + xs.append(xp) + + for x in xs: + # The default is a simple extent check + assert_(np.may_share_memory(x[:,0,:], x[:,1,:])) + assert_(np.may_share_memory(x[:,0,:], x[:,1,:], max_work=None)) + + # Exact checks + check_may_share_memory_exact(x[:,0,:], x[:,1,:]) + check_may_share_memory_exact(x[:,::7], x[:,3::3]) + + try: + xp = x.ravel() + if xp.flags.owndata: + continue + xp = xp.view(np.int16) + except ValueError: + continue + + # 0-size arrays cannot overlap + check_may_share_memory_exact(x.ravel()[6:6], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Test itemsize is dealt with + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)) + check_may_share_memory_exact(x[:,::7], + xp.reshape(13, 21, 23, 11)[:,3::3]) + check_may_share_memory_exact(x.ravel()[6:7], + xp.reshape(13, 21, 23, 11)[:,::7]) + + # Check unit size + x = np.zeros([1], dtype=np.int8) + check_may_share_memory_exact(x, x) + check_may_share_memory_exact(x, x.copy()) + + +def check_may_share_memory_easy_fuzz(get_max_work, same_steps, min_count): + # Check that overlap problems with common strides are solved with + # little work. + x = np.zeros([17,34,71,97], dtype=np.int16) + + rng = np.random.RandomState(1234) + + def random_slice(n, step): + start = rng.randint(0, n+1) + stop = rng.randint(start, n+1) + if rng.randint(0, 2) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + feasible = 0 + infeasible = 0 + + while min(feasible, infeasible) < min_count: + steps = tuple(rng.randint(1, 11) if rng.randint(0, 5) == 0 else 1 + for j in range(x.ndim)) + if same_steps: + steps2 = steps + else: + steps2 = tuple(rng.randint(1, 11) if rng.randint(0, 5) == 0 else 1 + for j in range(x.ndim)) + + t1 = np.arange(x.ndim) + rng.shuffle(t1) + + t2 = np.arange(x.ndim) + rng.shuffle(t2) + + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + s2 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps2)) + a = x[s1].transpose(t1) + b = x[s2].transpose(t2) + + bounds_overlap = np.may_share_memory(a, b) + may_share_answer = np.may_share_memory(a, b) + easy_answer = np.may_share_memory(a, b, max_work=get_max_work(a, b)) + exact_answer = np.may_share_memory(a, b, max_work=MAY_SHARE_EXACT) + + if easy_answer != exact_answer: + # assert_equal is slow... + assert_equal(easy_answer, exact_answer, err_msg=repr((s1, s2))) + + if may_share_answer != bounds_overlap: + assert_equal(may_share_answer, bounds_overlap, + err_msg=repr((s1, s2))) + + if bounds_overlap: + if exact_answer: + feasible += 1 + else: + infeasible += 1 + + +def test_may_share_memory_easy_fuzz(): + # Check that overlap problems with common strides are always + # solved with little work. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: 1, + same_steps=True, + min_count=2000) + + +def test_may_share_memory_harder_fuzz(): + # Overlap problems with not necessarily common strides take more + # work. + # + # The work bound below can't be reduced much. Harder problems can + # also exist but not be detected here, as the set of problems + # comes from RNG. + + check_may_share_memory_easy_fuzz(get_max_work=lambda a, b: max(a.size, b.size)//2, + same_steps=False, + min_count=2000) + + +def test_shares_memory_api(): + x = np.zeros([4, 5, 6], dtype=np.int8) + + assert_equal(np.shares_memory(x, x), True) + assert_equal(np.shares_memory(x, x.copy()), False) + + a = x[:,::2,::3] + b = x[:,::3,::2] + assert_equal(np.shares_memory(a, b), True) + assert_equal(np.shares_memory(a, b, max_work=None), True) + assert_raises(np.TooHardError, np.shares_memory, a, b, max_work=1) + assert_raises(np.TooHardError, np.shares_memory, a, b, max_work=long(1)) + + +def test_internal_overlap_diophantine(): + def check(A, U, exists=None): + X = solve_diophantine(A, U, 0, require_ub_nontrivial=1) + + if exists is None: + exists = (X is not None) + + if X is not None: + assert_(sum(a*x for a, x in zip(A, X)) == sum(a*u//2 for a, u in zip(A, U))) + assert_(all(0 <= x <= u for x, u in zip(X, U))) + assert_(any(x != u//2 for x, u in zip(X, U))) + + if exists: + assert_(X is not None, repr(X)) + else: + assert_(X is None, repr(X)) + + # Smoke tests + check((3, 2), (2*2, 3*2), exists=True) + check((3*2, 2), (15*2, (3-1)*2), exists=False) + + +def test_internal_overlap_slices(): + # Slicing an array never generates internal overlap + + x = np.zeros([17,34,71,97], dtype=np.int16) + + rng = np.random.RandomState(1234) + + def random_slice(n, step): + start = rng.randint(0, n+1) + stop = rng.randint(start, n+1) + if rng.randint(0, 2) == 0: + stop, start = start, stop + step *= -1 + return slice(start, stop, step) + + cases = 0 + min_count = 5000 + + while cases < min_count: + steps = tuple(rng.randint(1, 11) if rng.randint(0, 5) == 0 else 1 + for j in range(x.ndim)) + t1 = np.arange(x.ndim) + rng.shuffle(t1) + s1 = tuple(random_slice(p, s) for p, s in zip(x.shape, steps)) + a = x[s1].transpose(t1) + + assert not internal_overlap(a) + cases += 1 + + +def check_internal_overlap(a, manual_expected=None): + got = internal_overlap(a) + + # Brute-force check + m = set() + ranges = tuple(xrange(n) for n in a.shape) + for v in itertools.product(*ranges): + offset = sum(s*w for s, w in zip(a.strides, v)) + if offset in m: + expected = True + break + else: + m.add(offset) + else: + expected = False + + # Compare + if got != expected: + assert_equal(got, expected, err_msg=repr((a.strides, a.shape))) + if manual_expected is not None and expected != manual_expected: + assert_equal(expected, manual_expected) + return got + + +def test_internal_overlap_manual(): + # Stride tricks can construct arrays with internal overlap + + # We don't care about memory bounds, the array is not + # read/write accessed + x = np.arange(1).astype(np.int8) + + # Check low-dimensional special cases + + check_internal_overlap(x, False) # 1-dim + check_internal_overlap(x.reshape([]), False) # 0-dim + + a = as_strided(x, strides=(3, 4), shape=(4, 4)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(3, 4), shape=(5, 4)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0,), shape=(0,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(1,)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0,), shape=(2,)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(87, 22)) + check_internal_overlap(a, True) + + a = as_strided(x, strides=(0, -9993), shape=(1, 22)) + check_internal_overlap(a, False) + + a = as_strided(x, strides=(0, -9993), shape=(0, 22)) + check_internal_overlap(a, False) + + +def test_internal_overlap_fuzz(): + # Fuzz check; the brute-force check is fairly slow + + x = np.arange(1).astype(np.int8) + + overlap = 0 + no_overlap = 0 + min_count = 100 + + rng = np.random.RandomState(1234) + + while min(overlap, no_overlap) < min_count: + ndim = rng.randint(1, 4) + + strides = tuple(rng.randint(-1000, 1000) for j in range(ndim)) + shape = tuple(rng.randint(1, 30) for j in range(ndim)) + + a = as_strided(x, strides=strides, shape=shape) + result = check_internal_overlap(a) + + if result: + overlap += 1 + else: + no_overlap += 1 + + +if __name__ == "__main__": + run_module_suite() diff --git a/numpy/core/tests/test_multiarray_assignment.py b/numpy/core/tests/test_multiarray_assignment.py deleted file mode 100644 index 86e1b125e..000000000 --- a/numpy/core/tests/test_multiarray_assignment.py +++ /dev/null @@ -1,84 +0,0 @@ -from __future__ import division, absolute_import, print_function - -import numpy as np -from numpy.testing import run_module_suite - -ndims = 2 -size = 10 -shape = tuple([size] * ndims) - - -def _indices_for_nelems(nelems): - """Returns slices of length nelems, from start onwards, in direction sign.""" - - if nelems == 0: - return [size // 2] # int index - - res = [] - for step in (1, 2): - for sign in (-1, 1): - start = size // 2 - nelems * step * sign // 2 - stop = start + nelems * step * sign - res.append(slice(start, stop, step * sign)) - - return res - - -def _indices_for_axis(): - """Returns (src, dst) pairs of indices.""" - - res = [] - for nelems in (0, 2, 3): - ind = _indices_for_nelems(nelems) - - # no itertools.product available in Py2.4 - res.extend([(a, b) for a in ind for b in ind]) # all assignments of size "nelems" - - return res - - -def _indices(ndims): - """Returns ((axis0_src, axis0_dst), (axis1_src, axis1_dst), ... ) index pairs.""" - - ind = _indices_for_axis() - - # no itertools.product available in Py2.4 - - res = [[]] - for i in range(ndims): - newres = [] - for elem in ind: - for others in res: - newres.append([elem] + others) - res = newres - - return res - - -def _check_assignment(srcidx, dstidx): - """Check assignment arr[dstidx] = arr[srcidx] works.""" - - arr = np.arange(np.product(shape)).reshape(shape) - - cpy = arr.copy() - - cpy[dstidx] = arr[srcidx] - arr[dstidx] = arr[srcidx] - - assert np.all(arr == cpy), 'assigning arr[%s] = arr[%s]' % (dstidx, srcidx) - - -def test_overlapping_assignments(): - """Test automatically generated assignments which overlap in memory.""" - - inds = _indices(ndims) - - for ind in inds: - srcidx = tuple([a[0] for a in ind]) - dstidx = tuple([a[1] for a in ind]) - - yield _check_assignment, srcidx, dstidx - - -if __name__ == "__main__": - run_module_suite() |