diff options
Diffstat (limited to 'numpy')
43 files changed, 5664 insertions, 1721 deletions
diff --git a/numpy/conftest.py b/numpy/conftest.py index ea4197049..15985a75b 100644 --- a/numpy/conftest.py +++ b/numpy/conftest.py @@ -5,6 +5,8 @@ from __future__ import division, absolute_import, print_function import warnings import pytest +import numpy +import importlib from numpy.core.multiarray_tests import get_fpu_mode @@ -52,3 +54,33 @@ def check_fpu_mode(request): raise AssertionError("FPU precision mode changed from {0:#x} to {1:#x}" " when collecting the test".format(old_mode, new_mode)) + + +def pytest_addoption(parser): + parser.addoption("--runslow", action="store_true", + default=False, help="run slow tests") + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--runslow"): + # --runslow given in cli: do not skip slow tests + return + skip_slow = pytest.mark.skip(reason="need --runslow option to run") + for item in items: + if "slow" in item.keywords: + item.add_marker(skip_slow) + + +@pytest.fixture(autouse=True) +def add_np(doctest_namespace): + doctest_namespace['np'] = numpy + + +for module, replacement in { + 'numpy.testing.decorators': 'numpy.testing.pytest_tools.decorators', + 'numpy.testing.utils': 'numpy.testing.pytest_tools.utils', +}.items(): + module = importlib.import_module(module) + replacement = importlib.import_module(replacement) + module.__dict__.clear() + module.__dict__.update(replacement.__dict__) diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index 9287be095..9470e882a 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -789,7 +789,7 @@ defdict = { docstrings.get('numpy.core.umath.divmod'), None, TD(intflt), - TD(O, f='PyNumber_Divmod'), + # TD(O, f='PyNumber_Divmod'), # gh-9730 ), 'hypot': Ufunc(2, 1, Zero, @@ -942,16 +942,42 @@ def make_arrays(funcdict): k = 0 sub = 0 - if uf.nin > 1: - assert uf.nin == 2 - thedict = chartotype2 # two inputs and one output - else: - thedict = chartotype1 # one input and one output - for t in uf.type_descriptions: - if (t.func_data not in (None, FullTypeDescr) and - not isinstance(t.func_data, FuncNameSuffix)): + if t.func_data is FullTypeDescr: + tname = english_upper(chartoname[t.type]) + datalist.append('(void *)NULL') + funclist.append( + '%s_%s_%s_%s' % (tname, t.in_, t.out, name)) + elif isinstance(t.func_data, FuncNameSuffix): + datalist.append('(void *)NULL') + tname = english_upper(chartoname[t.type]) + funclist.append( + '%s_%s_%s' % (tname, name, t.func_data.suffix)) + elif t.func_data is None: + datalist.append('(void *)NULL') + tname = english_upper(chartoname[t.type]) + funclist.append('%s_%s' % (tname, name)) + if t.simd is not None: + for vt in t.simd: + code2list.append(textwrap.dedent("""\ + #ifdef HAVE_ATTRIBUTE_TARGET_{ISA} + if (NPY_CPU_SUPPORTS_{ISA}) {{ + {fname}_functions[{idx}] = {type}_{fname}_{isa}; + }} + #endif + """).format( + ISA=vt.upper(), isa=vt, + fname=name, type=tname, idx=k + )) + else: funclist.append('NULL') + if (uf.nin, uf.nout) == (2, 1): + thedict = chartotype2 + elif (uf.nin, uf.nout) == (1, 1): + thedict = chartotype1 + else: + raise ValueError("Could not handle {}[{}]".format(name, t.type)) + astype = '' if not t.astype is None: astype = '_As_%s' % thedict[t.astype] @@ -972,29 +998,6 @@ def make_arrays(funcdict): datalist.append('(void *)NULL') #datalist.append('(void *)%s' % t.func_data) sub += 1 - elif t.func_data is FullTypeDescr: - tname = english_upper(chartoname[t.type]) - datalist.append('(void *)NULL') - funclist.append( - '%s_%s_%s_%s' % (tname, t.in_, t.out, name)) - elif isinstance(t.func_data, FuncNameSuffix): - datalist.append('(void *)NULL') - tname = english_upper(chartoname[t.type]) - funclist.append( - '%s_%s_%s' % (tname, name, t.func_data.suffix)) - else: - datalist.append('(void *)NULL') - tname = english_upper(chartoname[t.type]) - funclist.append('%s_%s' % (tname, name)) - if t.simd is not None: - for vt in t.simd: - code2list.append("""\ -#ifdef HAVE_ATTRIBUTE_TARGET_{ISA} -if (NPY_CPU_SUPPORTS_{ISA}) {{ - {fname}_functions[{idx}] = {type}_{fname}_{isa}; -}} -#endif -""".format(ISA=vt.upper(), isa=vt, fname=name, type=tname, idx=k)) for x in t.in_ + t.out: siglist.append('NPY_%s' % (english_upper(chartoname[x]),)) @@ -1032,10 +1035,10 @@ def make_ufuncs(funcdict): # string literal in C code. We split at endlines because textwrap.wrap # do not play well with \n docstring = '\\n\"\"'.join(docstring.split(r"\n")) - mlist.append(\ -r"""f = PyUFunc_FromFuncAndData(%s_functions, %s_data, %s_signatures, %d, - %d, %d, %s, "%s", - "%s", 0);""" % (name, name, name, + mlist.append(textwrap.dedent("""\ + f = PyUFunc_FromFuncAndData(%s_functions, %s_data, %s_signatures, %d, + %d, %d, %s, "%s", + "%s", 0);""") % (name, name, name, len(uf.type_descriptions), uf.nin, uf.nout, uf.identity, @@ -1054,23 +1057,23 @@ def make_code(funcdict, filename): code3 = make_ufuncs(funcdict) code2 = indent(code2, 4) code3 = indent(code3, 4) - code = r""" + code = textwrap.dedent(r""" -/** Warning this file is autogenerated!!! + /** Warning this file is autogenerated!!! - Please make changes to the code generator program (%s) -**/ + Please make changes to the code generator program (%s) + **/ -%s + %s -static void -InitOperators(PyObject *dictionary) { - PyObject *f; + static void + InitOperators(PyObject *dictionary) { + PyObject *f; -%s -%s -} -""" % (filename, code1, code2, code3) + %s + %s + } + """) % (filename, code1, code2, code3) return code diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 504d7e6a9..604168162 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -43,6 +43,8 @@ add_newdoc('numpy.core.umath', 'absolute', """ Calculate the absolute value element-wise. + ``np.abs`` is a shorthand for this function. + Parameters ---------- x : array_like diff --git a/numpy/core/einsumfunc.py b/numpy/core/einsumfunc.py index 3bfd984de..c5b37b7e2 100644 --- a/numpy/core/einsumfunc.py +++ b/numpy/core/einsumfunc.py @@ -1057,7 +1057,7 @@ def einsum(*operands, **kwargs): """ # Grab non-einsum kwargs - optimize_arg = kwargs.pop('optimize', False) + optimize_arg = kwargs.pop('optimize', True) # If no optimization, run pure einsum if optimize_arg is False: diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py index 431467a5a..123bff2ec 100644 --- a/numpy/core/numeric.py +++ b/numpy/core/numeric.py @@ -2276,6 +2276,9 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): relative difference (`rtol` * abs(`b`)) and the absolute difference `atol` are added together to compare against the absolute difference between `a` and `b`. + + .. warning:: The default `atol` is not appropriate for comparing numbers + that are much smaller than one (see Notes). Parameters ---------- @@ -2309,9 +2312,15 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) - The above equation is not symmetric in `a` and `b`, so that - `isclose(a, b)` might be different from `isclose(b, a)` in - some rare cases. + Unlike the built-in `math.isclose`, the above equation is not symmetric + in `a` and `b` -- it assumes `b` is the reference value -- so that + `isclose(a, b)` might be different from `isclose(b, a)`. Furthermore, + the default value of atol is not zero, and is used to determine what + small values should be considered close to zero. The default value is + appropriate for expected values of order unity: if the expected values + are significantly smaller than one, it can result in false positives. + `atol` should be carefully selected for the use case at hand. A zero value + for `atol` will result in `False` if either `a` or `b` is zero. Examples -------- @@ -2325,6 +2334,14 @@ def isclose(a, b, rtol=1.e-5, atol=1.e-8, equal_nan=False): array([True, False]) >>> np.isclose([1.0, np.nan], [1.0, np.nan], equal_nan=True) array([True, True]) + >>> np.isclose([1e-8, 1e-7], [0.0, 0.0]) + array([ True, False], dtype=bool) + >>> np.isclose([1e-100, 1e-7], [0.0, 0.0], atol=0.0) + array([False, False], dtype=bool) + >>> np.isclose([1e-10, 1e-10], [1e-20, 0.0]) + array([ True, True], dtype=bool) + >>> np.isclose([1e-10, 1e-10], [1e-20, 0.999999e-10], atol=0.0) + array([False, True], dtype=bool) """ def within_tol(x, y, atol, rtol): with errstate(invalid='ignore'): diff --git a/numpy/core/src/multiarray/arrayobject.c b/numpy/core/src/multiarray/arrayobject.c index 7d62401f2..424c66915 100644 --- a/numpy/core/src/multiarray/arrayobject.c +++ b/numpy/core/src/multiarray/arrayobject.c @@ -475,22 +475,10 @@ array_dealloc(PyArrayObject *self) "called."; /* 2017-Nov-10 1.14 */ if (DEPRECATE(msg) < 0) { - /* dealloc must not raise an error, best effort try to write + /* dealloc cannot raise an error, best effort try to write to stderr and clear the error */ - PyObject * s; -#if PY_MAJOR_VERSION < 3 - s = PyString_FromString(msg); -#else - s = PyUnicode_FromString(msg); -#endif - if (s) { - PyErr_WriteUnraisable(s); - Py_DECREF(s); - } - else { - PyErr_WriteUnraisable(Py_None); - } + PyErr_WriteUnraisable((PyObject *)&PyArray_Type); } retval = PyArray_ResolveWritebackIfCopy(self); if (retval < 0) diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src index f9af33491..dca6e3840 100644 --- a/numpy/core/src/multiarray/scalartypes.c.src +++ b/numpy/core/src/multiarray/scalartypes.c.src @@ -238,44 +238,34 @@ gentype_@name@(PyObject *m1, PyObject *m2) /**end repeat**/ #endif +/* Get a nested slot, or NULL if absent */ +#define GET_NESTED_SLOT(type, group, slot) \ + ((type)->group == NULL ? NULL : (type)->group->slot) + static PyObject * gentype_multiply(PyObject *m1, PyObject *m2) { - npy_intp repeat; - /* * If the other object supports sequence repeat and not number multiply - * we should call sequence repeat to support e.g. list repeat by numpy - * scalars (they may be converted to ndarray otherwise). + * we fall back on the python builtin to invoke the sequence repeat, rather + * than promoting both arguments to ndarray. + * This covers a list repeat by numpy scalars. * A python defined class will always only have the nb_multiply slot and * some classes may have neither defined. For the latter we want need * to give the normal case a chance to convert the object to ndarray. * Probably no class has both defined, but if they do, prefer number. */ if (!PyArray_IsScalar(m1, Generic) && - ((Py_TYPE(m1)->tp_as_sequence != NULL) && - (Py_TYPE(m1)->tp_as_sequence->sq_repeat != NULL)) && - ((Py_TYPE(m1)->tp_as_number == NULL) || - (Py_TYPE(m1)->tp_as_number->nb_multiply == NULL))) { - /* Try to convert m2 to an int and try sequence repeat */ - repeat = PyArray_PyIntAsIntp(m2); - if (error_converting(repeat)) { - return NULL; - } - /* Note that npy_intp is compatible to Py_Ssize_t */ - return PySequence_Repeat(m1, repeat); + GET_NESTED_SLOT(Py_TYPE(m1), tp_as_sequence, sq_repeat) != NULL && + GET_NESTED_SLOT(Py_TYPE(m1), tp_as_number, nb_multiply) == NULL) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; } if (!PyArray_IsScalar(m2, Generic) && - ((Py_TYPE(m2)->tp_as_sequence != NULL) && - (Py_TYPE(m2)->tp_as_sequence->sq_repeat != NULL)) && - ((Py_TYPE(m2)->tp_as_number == NULL) || - (Py_TYPE(m2)->tp_as_number->nb_multiply == NULL))) { - /* Try to convert m1 to an int and try sequence repeat */ - repeat = PyArray_PyIntAsIntp(m1); - if (error_converting(repeat)) { - return NULL; - } - return PySequence_Repeat(m2, repeat); + GET_NESTED_SLOT(Py_TYPE(m2), tp_as_sequence, sq_repeat) != NULL && + GET_NESTED_SLOT(Py_TYPE(m2), tp_as_number, nb_multiply) == NULL) { + Py_INCREF(Py_NotImplemented); + return Py_NotImplemented; } /* All normal cases are handled by PyArray's multiply */ BINOP_GIVE_UP_IF_NEEDED(m1, m2, nb_multiply, gentype_multiply); diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h index e785c6796..094612b7d 100644 --- a/numpy/core/src/private/lowlevel_strided_loops.h +++ b/numpy/core/src/private/lowlevel_strided_loops.h @@ -414,20 +414,24 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape, char **out_dataC, npy_intp *out_stridesC); /* - * Return number of elements that must be peeled from - * the start of 'addr' with 'nvals' elements of size 'esize' - * in order to reach 'alignment'. - * alignment must be a power of two. - * see npy_blocked_end for an example + * Return number of elements that must be peeled from the start of 'addr' with + * 'nvals' elements of size 'esize' in order to reach blockable alignment. + * The required alignment in bytes is passed as the 'alignment' argument and + * must be a power of two. This function is used to prepare an array for + * blocking. See the 'npy_blocked_end' function documentation below for an + * example of how this function is used. */ -static NPY_INLINE npy_uintp +static NPY_INLINE npy_intp npy_aligned_block_offset(const void * addr, const npy_uintp esize, const npy_uintp alignment, const npy_uintp nvals) { - const npy_uintp offset = (npy_uintp)addr & (alignment - 1); - npy_uintp peel = offset ? (alignment - offset) / esize : 0; - peel = nvals < peel ? nvals : peel; - return peel; + npy_uintp offset, peel; + + offset = (npy_uintp)addr & (alignment - 1); + peel = offset ? (alignment - offset) / esize : 0; + peel = (peel <= nvals) ? peel : nvals; + assert(peel <= NPY_MAX_INTP); + return (npy_intp)peel; } /* @@ -450,11 +454,16 @@ npy_aligned_block_offset(const void * addr, const npy_uintp esize, * for(; i < n; i++) * <scalar-op> */ -static NPY_INLINE npy_uintp -npy_blocked_end(const npy_uintp offset, const npy_uintp esize, +static NPY_INLINE npy_intp +npy_blocked_end(const npy_uintp peel, const npy_uintp esize, const npy_uintp vsz, const npy_uintp nvals) { - return nvals - offset - (nvals - offset) % (vsz / esize); + npy_uintp ndiff = nvals - peel; + npy_uintp res = (ndiff - ndiff % (vsz / esize)); + + assert(nvals >= peel); + assert(res <= NPY_MAX_INTP); + return (npy_intp)(res); } diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 8791788d0..c1dfe15da 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1653,11 +1653,12 @@ NPY_NO_EXPORT void * when updating also update similar complex floats summation */ static @type@ -pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride) +pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride) { if (n < 8) { npy_intp i; @type@ res = 0.; + for (i = 0; i < n; i++) { res += @trf@(*((@dtype@*)(a + i * stride))); } @@ -1683,7 +1684,7 @@ pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride) for (i = 8; i < n - (n % 8); i += 8) { /* small blocksizes seems to mess with hardware prefetch */ - NPY_PREFETCH(a + (i + 512 / sizeof(@dtype@)) * stride, 0, 3); + NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@dtype@))*stride, 0, 3); r[0] += @trf@(*((@dtype@ *)(a + (i + 0) * stride))); r[1] += @trf@(*((@dtype@ *)(a + (i + 1) * stride))); r[2] += @trf@(*((@dtype@ *)(a + (i + 2) * stride))); @@ -1706,7 +1707,8 @@ pairwise_sum_@TYPE@(char *a, npy_uintp n, npy_intp stride) } else { /* divide by two but avoid non-multiples of unroll factor */ - npy_uintp n2 = n / 2; + npy_intp n2 = n / 2; + n2 -= n2 % 8; return pairwise_sum_@TYPE@(a, n2, stride) + pairwise_sum_@TYPE@(a + n2 * stride, n - n2, stride); @@ -2425,12 +2427,13 @@ HALF_ldexp_long(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UN /* similar to pairwise sum of real floats */ static void -pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n, +pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n, npy_intp stride) { assert(n % 2 == 0); if (n < 8) { npy_intp i; + *rr = 0.; *ri = 0.; for (i = 0; i < n; i += 2) { @@ -2459,7 +2462,7 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n, for (i = 8; i < n - (n % 8); i += 8) { /* small blocksizes seems to mess with hardware prefetch */ - NPY_PREFETCH(a + (i + 512 / sizeof(@ftype@)) * stride, 0, 3); + NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@ftype@))*stride, 0, 3); r[0] += *((@ftype@ *)(a + (i + 0) * stride)); r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@))); r[2] += *((@ftype@ *)(a + (i + 2) * stride)); @@ -2484,7 +2487,8 @@ pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_uintp n, else { /* divide by two but avoid non-multiples of unroll factor */ @ftype@ rr1, ri1, rr2, ri2; - npy_uintp n2 = n / 2; + npy_intp n2 = n / 2; + n2 -= n2 % 8; pairwise_sum_@TYPE@(&rr1, &ri1, a, n2, stride); pairwise_sum_@TYPE@(&rr2, &ri2, a + n2 * stride, n - n2, stride); diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c index 04f5cc1d3..33518b441 100644 --- a/numpy/core/src/umath/reduction.c +++ b/numpy/core/src/umath/reduction.c @@ -411,8 +411,8 @@ PyArray_InitializeReduceResult( /* * This function executes all the standard NumPy reduction function - * boilerplate code, just calling assign_identity and the appropriate - * inner loop function where necessary. + * boilerplate code, just calling the appropriate inner loop function where + * necessary. * * operand : The array to be reduced. * out : NULL, or the array into which to place the result. @@ -432,11 +432,11 @@ PyArray_InitializeReduceResult( * with size one. * subok : If true, the result uses the subclass of operand, otherwise * it is always a base class ndarray. - * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise - * this function is called to initialize the result to + * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise + * this value is used to initialize the result to * the reduction's unit. * loop : The loop which does the reduction. - * data : Data which is passed to assign_identity and the inner loop. + * data : Data which is passed to the inner loop. * buffersize : Buffer size for the iterator. For the default, pass in 0. * funcname : The name of the reduction function, for error messages. * errormask : forwarded from _get_bufsize_errmask @@ -459,7 +459,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, npy_bool *axis_flags, int reorderable, int keepdims, int subok, - PyArray_AssignReduceIdentityFunc *assign_identity, + PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, int errormask) @@ -500,7 +500,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, * Initialize the result to the reduction unit if possible, * otherwise copy the initial values and get a view to the rest. */ - if (assign_identity != NULL) { + if (identity != Py_None) { /* * If this reduction is non-reorderable, make sure there are * only 0 or 1 axes in axis_flags. @@ -510,7 +510,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, goto fail; } - if (assign_identity(result, data) < 0) { + if (PyArray_FillWithScalar(result, identity) < 0) { goto fail; } op_view = operand; diff --git a/numpy/core/src/umath/reduction.h b/numpy/core/src/umath/reduction.h index f7812768c..dfaeabcbb 100644 --- a/numpy/core/src/umath/reduction.h +++ b/numpy/core/src/umath/reduction.h @@ -109,8 +109,8 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, /* * This function executes all the standard NumPy reduction function - * boilerplate code, just calling assign_identity and the appropriate - * inner loop function where necessary. + * boilerplate code, just calling the appropriate inner loop function where + * necessary. * * operand : The array to be reduced. * out : NULL, or the array into which to place the result. @@ -130,11 +130,11 @@ typedef int (PyArray_ReduceLoopFunc)(NpyIter *iter, * with size one. * subok : If true, the result uses the subclass of operand, otherwise * it is always a base class ndarray. - * assign_identity : If NULL, PyArray_InitializeReduceResult is used, otherwise - * this function is called to initialize the result to + * identity : If Py_None, PyArray_InitializeReduceResult is used, otherwise + * this value is used to initialize the result to * the reduction's unit. * loop : The loop which does the reduction. - * data : Data which is passed to assign_identity and the inner loop. + * data : Data which is passed to the inner loop. * buffersize : Buffer size for the iterator. For the default, pass in 0. * funcname : The name of the reduction function, for error messages. * errormask : forwarded from _get_bufsize_errmask @@ -148,7 +148,7 @@ PyUFunc_ReduceWrapper(PyArrayObject *operand, PyArrayObject *out, npy_bool *axis_flags, int reorderable, int keepdims, int subok, - PyArray_AssignReduceIdentityFunc *assign_identity, + PyObject *identity, PyArray_ReduceLoopFunc *loop, void *data, npy_intp buffersize, const char *funcname, int errormask); diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 8a799fe61..2241414ac 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -22,40 +22,38 @@ #include "numpy/npy_math.h" #ifdef NPY_HAVE_SSE2_INTRINSICS #include <emmintrin.h> +#if !defined(_MSC_VER) || _MSC_VER >= 1600 +#include <immintrin.h> +#else +#undef __AVX2__ +#undef __AVX512F__ +#endif #endif #include <assert.h> #include <stdlib.h> #include <float.h> #include <string.h> /* for memcpy */ -/* Figure out the right abs function for pointer addresses */ -static NPY_INLINE npy_intp -abs_intp(npy_intp x) +static NPY_INLINE npy_uintp +abs_ptrdiff(char *a, char *b) { -#if (NPY_SIZEOF_INTP <= NPY_SIZEOF_INT) - return abs(x); -#elif (NPY_SIZEOF_INTP <= NPY_SIZEOF_LONG) - return labs(x); -#elif defined(_MSC_VER) && (_MSC_VER < 1600) - /* llabs is not available with Visual Studio 2008 */ - return x > 0 ? x : -x; -#else - return llabs(x); -#endif + return (a > b) ? (a - b) : (b - a); } + /* * stride is equal to element size and input and destination are equal or - * don't overlap within one register + * don't overlap within one register. The check of the steps against + * esize also quarantees that steps are >= 0. */ #define IS_BLOCKABLE_UNARY(esize, vsize) \ (steps[0] == (esize) && steps[0] == steps[1] && \ (npy_is_aligned(args[0], esize) && npy_is_aligned(args[1], esize)) && \ - ((abs_intp(args[1] - args[0]) >= (vsize)) || \ - ((abs_intp(args[1] - args[0]) == 0)))) + ((abs_ptrdiff(args[1], args[0]) >= (vsize)) || \ + ((abs_ptrdiff(args[1], args[0]) == 0)))) #define IS_BLOCKABLE_REDUCE(esize, vsize) \ - (steps[1] == (esize) && abs_intp(args[1] - args[0]) >= (vsize) && \ + (steps[1] == (esize) && abs_ptrdiff(args[1], args[0]) >= (vsize) && \ npy_is_aligned(args[1], (esize)) && \ npy_is_aligned(args[0], (esize))) @@ -63,26 +61,26 @@ abs_intp(npy_intp x) (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ npy_is_aligned(args[0], (esize)) && \ - (abs_intp(args[2] - args[0]) >= (vsize) || \ - abs_intp(args[2] - args[0]) == 0) && \ - (abs_intp(args[2] - args[1]) >= (vsize) || \ - abs_intp(args[2] - args[1]) >= 0)) + (abs_ptrdiff(args[2], args[0]) >= (vsize) || \ + abs_ptrdiff(args[2], args[0]) == 0) && \ + (abs_ptrdiff(args[2], args[1]) >= (vsize) || \ + abs_ptrdiff(args[2], args[1]) >= 0)) #define IS_BLOCKABLE_BINARY_SCALAR1(esize, vsize) \ (steps[0] == 0 && steps[1] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[1], (esize)) && \ - ((abs_intp(args[2] - args[1]) >= (vsize)) || \ - (abs_intp(args[2] - args[1]) == 0)) && \ - abs_intp(args[2] - args[0]) >= (esize)) + ((abs_ptrdiff(args[2], args[1]) >= (vsize)) || \ + (abs_ptrdiff(args[2], args[1]) == 0)) && \ + abs_ptrdiff(args[2], args[0]) >= (esize)) #define IS_BLOCKABLE_BINARY_SCALAR2(esize, vsize) \ (steps[1] == 0 && steps[0] == steps[2] && steps[2] == (esize) && \ npy_is_aligned(args[2], (esize)) && npy_is_aligned(args[0], (esize)) && \ - ((abs_intp(args[2] - args[0]) >= (vsize)) || \ - (abs_intp(args[2] - args[0]) == 0)) && \ - abs_intp(args[2] - args[1]) >= (esize)) + ((abs_ptrdiff(args[2], args[0]) >= (vsize)) || \ + (abs_ptrdiff(args[2], args[0]) == 0)) && \ + abs_ptrdiff(args[2], args[1]) >= (esize)) -#undef abs_intp +#undef abs_ptrdiff #define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \ (steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \ @@ -401,7 +399,11 @@ static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v) * #scalarf = npy_sqrtf, npy_sqrt# * #c = f, # * #vtype = __m128, __m128d# + * #vtype256 = __m256, __m256d# + * #vtype512 = __m512, __m512d# * #vpre = _mm, _mm# + * #vpre256 = _mm256, _mm256# + * #vpre512 = _mm512, _mm512# * #vsuf = ps, pd# * #vsufs = ss, sd# * #nan = NPY_NANF, NPY_NAN# @@ -420,6 +422,115 @@ static NPY_INLINE npy_double sse2_horizontal_@VOP@___m128d(__m128d v) static void sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) { +#ifdef __AVX512F__ + LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + op[i] = ip1[i] @OP@ ip2[i]; + /* lots of specializations, to squeeze out max performance */ + if (npy_is_aligned(&ip1[i], 64) && npy_is_aligned(&ip2[i], 64)) { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + } + else if (npy_is_aligned(&ip1[i], 64)) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else if (npy_is_aligned(&ip2[i], 64)) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, a); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + } +#elif __AVX2__ + LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + op[i] = ip1[i] @OP@ ip2[i]; + /* lots of specializations, to squeeze out max performance */ + if (npy_is_aligned(&ip1[i], 32) && npy_is_aligned(&ip2[i], 32)) { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + } + else if (npy_is_aligned(&ip1[i], 32)) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else if (npy_is_aligned(&ip2[i], 32)) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + if (ip1 == ip2) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, a); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + } +#else LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) op[i] = ip1[i] @OP@ ip2[i]; /* lots of specializations, to squeeze out max performance */ @@ -473,6 +584,7 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) } } } +#endif LOOP_BLOCKED_END { op[i] = ip1[i] @OP@ ip2[i]; } @@ -482,6 +594,45 @@ sse2_binary_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) static void sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) { +#ifdef __AVX512F__ + const @vtype512@ a = @vpre512@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + op[i] = ip1[0] @OP@ ip2[i]; + if (npy_is_aligned(&ip2[i], 64)) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ b = @vpre512@_load_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ b = @vpre512@_loadu_@vsuf@(&ip2[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + + +#elif __AVX2__ + const @vtype256@ a = @vpre256@_set1_@vsuf@(ip1[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + op[i] = ip1[0] @OP@ ip2[i]; + if (npy_is_aligned(&ip2[i], 32)) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ b = @vpre256@_load_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ b = @vpre256@_loadu_@vsuf@(&ip2[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } +#else const @vtype@ a = @vpre@_set1_@vsuf@(ip1[0]); LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) op[i] = ip1[0] @OP@ ip2[i]; @@ -499,6 +650,7 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i @vpre@_store_@vsuf@(&op[i], c); } } +#endif LOOP_BLOCKED_END { op[i] = ip1[0] @OP@ ip2[i]; } @@ -508,6 +660,44 @@ sse2_binary_scalar1_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i static void sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_intp n) { +#ifdef __AVX512F__ + const @vtype512@ b = @vpre512@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, 64) + op[i] = ip1[i] @OP@ ip2[0]; + if (npy_is_aligned(&ip1[i], 64)) { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_load_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 64) { + @vtype512@ a = @vpre512@_loadu_@vsuf@(&ip1[i]); + @vtype512@ c = @vpre512@_@VOP@_@vsuf@(a, b); + @vpre512@_store_@vsuf@(&op[i], c); + } + } + +#elif __AVX2__ + const @vtype256@ b = @vpre256@_set1_@vsuf@(ip2[0]); + LOOP_BLOCK_ALIGN_VAR(op, @type@, 32) + op[i] = ip1[i] @OP@ ip2[0]; + if (npy_is_aligned(&ip1[i], 32)) { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_load_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } + else { + LOOP_BLOCKED(@type@, 32) { + @vtype256@ a = @vpre256@_loadu_@vsuf@(&ip1[i]); + @vtype256@ c = @vpre256@_@VOP@_@vsuf@(a, b); + @vpre256@_store_@vsuf@(&op[i], c); + } + } +#else const @vtype@ b = @vpre@_set1_@vsuf@(ip2[0]); LOOP_BLOCK_ALIGN_VAR(op, @type@, 16) op[i] = ip1[i] @OP@ ip2[0]; @@ -525,6 +715,7 @@ sse2_binary_scalar2_@kind@_@TYPE@(@type@ * op, @type@ * ip1, @type@ * ip2, npy_i @vpre@_store_@vsuf@(&op[i], c); } } +#endif LOOP_BLOCKED_END { op[i] = ip1[i] @OP@ ip2[0]; } @@ -828,7 +1019,7 @@ sse2_@kind@_@TYPE@(@type@ * op, @type@ * ip, const npy_intp n) static void sse2_@kind@_@TYPE@(@type@ * ip, @type@ * op, const npy_intp n) { - const size_t stride = 16 / sizeof(@type@); + const npy_intp stride = 16 / (npy_intp)sizeof(@type@); LOOP_BLOCK_ALIGN_VAR(ip, @type@, 16) { *op = (*op @OP@ ip[i] || npy_isnan(*op)) ? *op : ip[i]; } diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 257067023..6e8556e29 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -70,16 +70,6 @@ static int _does_loop_use_arrays(void *data); -static int -assign_reduce_identity_zero(PyArrayObject *result, void *data); - -static int -assign_reduce_identity_minusone(PyArrayObject *result, void *data); - -static int -assign_reduce_identity_one(PyArrayObject *result, void *data); - - /*UFUNC_API*/ NPY_NO_EXPORT int PyUFunc_getfperr(void) @@ -1865,6 +1855,42 @@ _get_coredim_sizes(PyUFuncObject *ufunc, PyArrayObject **op, return 0; } +/* + * Returns a new reference + * TODO: store a reference in the ufunc object itself, rather than + * constructing one each time + */ +static PyObject * +_get_identity(PyUFuncObject *ufunc, npy_bool *reorderable) { + switch(ufunc->identity) { + case PyUFunc_One: + *reorderable = 1; + return PyInt_FromLong(1); + + case PyUFunc_Zero: + *reorderable = 1; + return PyInt_FromLong(0); + + case PyUFunc_MinusOne: + *reorderable = 1; + return PyInt_FromLong(-1); + + case PyUFunc_ReorderableNone: + *reorderable = 1; + Py_RETURN_NONE; + + case PyUFunc_None: + *reorderable = 0; + Py_RETURN_NONE; + + default: + PyErr_Format(PyExc_ValueError, + "ufunc %s has an invalid identity", ufunc_get_name_cstr(ufunc)); + return NULL; + } +} + + static int PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds, @@ -2267,34 +2293,27 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc, * product of two zero-length arrays will be a scalar, * which has size one. */ + npy_bool reorderable; + PyObject *identity = _get_identity(ufunc, &reorderable); + if (identity == NULL) { + retval = -1; + goto fail; + } + for (i = nin; i < nop; ++i) { if (PyArray_SIZE(op[i]) != 0) { - switch (ufunc->identity) { - case PyUFunc_Zero: - assign_reduce_identity_zero(op[i], NULL); - break; - case PyUFunc_One: - assign_reduce_identity_one(op[i], NULL); - break; - case PyUFunc_MinusOne: - assign_reduce_identity_minusone(op[i], NULL); - break; - case PyUFunc_None: - case PyUFunc_ReorderableNone: - PyErr_Format(PyExc_ValueError, - "ufunc %s ", - ufunc_name); - retval = -1; - goto fail; - default: - PyErr_Format(PyExc_ValueError, - "ufunc %s has an invalid identity for reduction", - ufunc_name); - retval = -1; - goto fail; + if (identity == Py_None) { + PyErr_Format(PyExc_ValueError, + "ufunc %s ", + ufunc_name); + Py_DECREF(identity); + retval = -1; + goto fail; } + PyArray_FillWithScalar(op[i], identity); } } + Py_DECREF(identity); } /* Check whether any errors occurred during the loop */ @@ -2689,31 +2708,6 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr, } static int -assign_reduce_identity_zero(PyArrayObject *result, void *NPY_UNUSED(data)) -{ - return PyArray_FillWithScalar(result, PyArrayScalar_False); -} - -static int -assign_reduce_identity_one(PyArrayObject *result, void *NPY_UNUSED(data)) -{ - return PyArray_FillWithScalar(result, PyArrayScalar_True); -} - -static int -assign_reduce_identity_minusone(PyArrayObject *result, void *NPY_UNUSED(data)) -{ - static PyObject *MinusOne = NULL; - - if (MinusOne == NULL) { - if ((MinusOne = PyInt_FromLong(-1)) == NULL) { - return -1; - } - } - return PyArray_FillWithScalar(result, MinusOne); -} - -static int reduce_loop(NpyIter *iter, char **dataptrs, npy_intp *strides, npy_intp *countptr, NpyIter_IterNextFunc *iternext, int needs_api, npy_intp skip_first_count, void *data) @@ -2818,11 +2812,12 @@ static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, int naxes, int *axes, PyArray_Descr *odtype, int keepdims) { - int iaxes, reorderable, ndim; + int iaxes, ndim; + npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; PyArray_Descr *dtype; PyArrayObject *result; - PyArray_AssignReduceIdentityFunc *assign_identity = NULL; + PyObject *identity = NULL; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ int buffersize = 0, errormask = 0; @@ -2843,60 +2838,28 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, axis_flags[axis] = 1; } - switch (ufunc->identity) { - case PyUFunc_Zero: - assign_identity = &assign_reduce_identity_zero; - reorderable = 1; - /* - * The identity for a dynamic dtype like - * object arrays can't be used in general - */ - if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { - assign_identity = NULL; - } - break; - case PyUFunc_One: - assign_identity = &assign_reduce_identity_one; - reorderable = 1; - /* - * The identity for a dynamic dtype like - * object arrays can't be used in general - */ - if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { - assign_identity = NULL; - } - break; - case PyUFunc_MinusOne: - assign_identity = &assign_reduce_identity_minusone; - reorderable = 1; - /* - * The identity for a dynamic dtype like - * object arrays can't be used in general - */ - if (PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { - assign_identity = NULL; - } - break; - - case PyUFunc_None: - reorderable = 0; - break; - case PyUFunc_ReorderableNone: - reorderable = 1; - break; - default: - PyErr_Format(PyExc_ValueError, - "ufunc %s has an invalid identity for reduction", - ufunc_name); - return NULL; + if (_get_bufsize_errmask(NULL, "reduce", &buffersize, &errormask) < 0) { + return NULL; } - if (_get_bufsize_errmask(NULL, "reduce", &buffersize, &errormask) < 0) { + /* Get the identity */ + identity = _get_identity(ufunc, &reorderable); + if (identity == NULL) { return NULL; } + /* + * The identity for a dynamic dtype like + * object arrays can't be used in general + */ + if (identity != Py_None && PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { + Py_DECREF(identity); + identity = Py_None; + Py_INCREF(identity); + } /* Get the reduction dtype */ if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) { + Py_DECREF(identity); return NULL; } @@ -2904,11 +2867,12 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NPY_UNSAFE_CASTING, axis_flags, reorderable, keepdims, 0, - assign_identity, + identity, reduce_loop, ufunc, buffersize, ufunc_name, errormask); Py_DECREF(dtype); + Py_DECREF(identity); return result; } @@ -4114,26 +4078,22 @@ ufunc_generic_call(PyUFuncObject *ufunc, PyObject *args, PyObject *kwds) PyObject *override = NULL; int errval; - /* - * Initialize all array objects to NULL to make cleanup easier - * if something goes wrong. - */ - for (i = 0; i < ufunc->nargs; i++) { - mps[i] = NULL; - } - errval = PyUFunc_CheckOverride(ufunc, "__call__", args, kwds, &override); if (errval) { return NULL; } else if (override) { - for (i = 0; i < ufunc->nargs; i++) { - PyArray_DiscardWritebackIfCopy(mps[i]); - Py_XDECREF(mps[i]); - } return override; } + /* + * Initialize all array objects to NULL to make cleanup easier + * if something goes wrong. + */ + for (i = 0; i < ufunc->nargs; i++) { + mps[i] = NULL; + } + errval = PyUFunc_GenericFunction(ufunc, args, kwds, mps); if (errval < 0) { for (i = 0; i < ufunc->nargs; i++) { @@ -5399,15 +5359,8 @@ ufunc_get_name(PyUFuncObject *ufunc) static PyObject * ufunc_get_identity(PyUFuncObject *ufunc) { - switch(ufunc->identity) { - case PyUFunc_One: - return PyInt_FromLong(1); - case PyUFunc_Zero: - return PyInt_FromLong(0); - case PyUFunc_MinusOne: - return PyInt_FromLong(-1); - } - Py_RETURN_NONE; + npy_bool reorderable; + return _get_identity(ufunc, &reorderable); } static PyObject * diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 39aab7efe..e90d2180a 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -4004,7 +4004,7 @@ class TestPutmask(object): for types in np.sctypes.values(): for T in types: if T not in unchecked_types: - yield self.tst_basic, x.copy().astype(T), T, mask, val + self.tst_basic(x.copy().astype(T), T, mask, val) def test_mask_size(self): assert_raises(ValueError, np.putmask, np.array([1, 2, 3]), [True], 5) @@ -4016,7 +4016,7 @@ class TestPutmask(object): def test_ip_byteorder(self): for dtype in ('>i4', '<i4'): - yield self.tst_byteorder, dtype + self.tst_byteorder(dtype) def test_record_array(self): # Note mixed byteorder. @@ -4045,7 +4045,7 @@ class TestTake(object): for types in np.sctypes.values(): for T in types: if T not in unchecked_types: - yield self.tst_basic, x.copy().astype(T) + self.tst_basic(x.copy().astype(T)) def test_raise(self): x = np.random.random(24)*100 @@ -4073,7 +4073,7 @@ class TestTake(object): def test_ip_byteorder(self): for dtype in ('>i4', '<i4'): - yield self.tst_byteorder, dtype + self.tst_byteorder(dtype) def test_record_array(self): # Note mixed byteorder. @@ -4459,10 +4459,10 @@ class TestFromBuffer(object): dt = np.dtype(dtype).newbyteorder(byteorder) x = (np.random.random((4, 7))*5).astype(dt) buf = x.tobytes() - yield self.tst_basic, buf, x.flat, {'dtype':dt} + self.tst_basic(buf, x.flat, {'dtype':dt}) def test_empty(self): - yield self.tst_basic, b'', np.array([]), {} + self.tst_basic(b'', np.array([]), {}) class TestFlat(object): diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 2cbcab2d1..7c012e9e8 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1796,7 +1796,7 @@ class TestAllclose(object): (np.inf, [np.inf])] for (x, y) in data: - yield (self.tst_allclose, x, y) + self.tst_allclose(x, y) def test_ip_not_allclose(self): # Parametric test factory. @@ -1817,7 +1817,7 @@ class TestAllclose(object): (np.array([np.inf, 1]), np.array([0, np.inf]))] for (x, y) in data: - yield (self.tst_not_allclose, x, y) + self.tst_not_allclose(x, y) def test_no_parameter_modification(self): x = np.array([np.inf, 1]) @@ -1901,7 +1901,7 @@ class TestIsclose(object): tests = self.some_close_tests results = self.some_close_results for (x, y), result in zip(tests, results): - yield (assert_array_equal, np.isclose(x, y), result) + assert_array_equal(np.isclose(x, y), result) def tst_all_isclose(self, x, y): assert_(np.all(np.isclose(x, y)), "%s and %s not close" % (x, y)) @@ -1921,19 +1921,19 @@ class TestIsclose(object): def test_ip_all_isclose(self): self.setup() for (x, y) in self.all_close_tests: - yield (self.tst_all_isclose, x, y) + self.tst_all_isclose(x, y) def test_ip_none_isclose(self): self.setup() for (x, y) in self.none_close_tests: - yield (self.tst_none_isclose, x, y) + self.tst_none_isclose(x, y) def test_ip_isclose_allclose(self): self.setup() tests = (self.all_close_tests + self.none_close_tests + self.some_close_tests) for (x, y) in tests: - yield (self.tst_isclose_allclose, x, y) + self.tst_isclose_allclose(x, y) def test_equal_nan(self): assert_array_equal(np.isclose(np.nan, np.nan, equal_nan=True), [True]) @@ -2640,7 +2640,7 @@ class TestRequire(object): fd = [None, 'f8', 'c16'] for idtype, fdtype, flag in itertools.product(id, fd, self.flag_names): a = self.generate_all_false(idtype) - yield self.set_and_check_flag, flag, fdtype, a + self.set_and_check_flag(flag, fdtype, a) def test_unknown_requirement(self): a = self.generate_all_false('f8') @@ -2672,7 +2672,7 @@ class TestRequire(object): for flag in self.flag_names: a = ArraySubclass((2, 2)) - yield self.set_and_check_flag, flag, None, a + self.set_and_check_flag(flag, None, a) class TestBroadcast(object): diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index 53b67327b..50824da41 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -10,7 +10,7 @@ from numpy.testing import ( run_module_suite, assert_, assert_equal, assert_raises, assert_almost_equal, assert_allclose, assert_array_equal, - IS_PYPY, suppress_warnings, dec, _gen_alignment_data, + IS_PYPY, suppress_warnings, dec, _gen_alignment_data, assert_warns ) types = [np.bool_, np.byte, np.ubyte, np.short, np.ushort, np.intc, np.uintc, @@ -561,16 +561,29 @@ class TestMultiply(object): # numpy integers. And errors are raised when multiplied with others. # Some of this behaviour may be controversial and could be open for # change. + accepted_types = set(np.typecodes["AllInteger"]) + deprecated_types = set('?') + forbidden_types = ( + set(np.typecodes["All"]) - accepted_types - deprecated_types) + forbidden_types -= set('V') # can't default-construct void scalars + for seq_type in (list, tuple): seq = seq_type([1, 2, 3]) - for numpy_type in np.typecodes["AllInteger"]: + for numpy_type in accepted_types: i = np.dtype(numpy_type).type(2) assert_equal(seq * i, seq * int(i)) assert_equal(i * seq, int(i) * seq) - for numpy_type in np.typecodes["All"].replace("V", ""): - if numpy_type in np.typecodes["AllInteger"]: - continue + for numpy_type in deprecated_types: + i = np.dtype(numpy_type).type() + assert_equal( + assert_warns(DeprecationWarning, operator.mul, seq, i), + seq * int(i)) + assert_equal( + assert_warns(DeprecationWarning, operator.mul, i, seq), + int(i) * seq) + + for numpy_type in forbidden_types: i = np.dtype(numpy_type).type() assert_raises(TypeError, operator.mul, seq, i) assert_raises(TypeError, operator.mul, i, seq) diff --git a/numpy/distutils/exec_command.py b/numpy/distutils/exec_command.py index 8faf4b225..8118e2fc3 100644 --- a/numpy/distutils/exec_command.py +++ b/numpy/distutils/exec_command.py @@ -56,6 +56,7 @@ __all__ = ['exec_command', 'find_executable'] import os import sys import subprocess +import locale from numpy.distutils.misc_util import is_sequence, make_temp_file from numpy.distutils import log @@ -246,17 +247,32 @@ def _exec_command(command, use_shell=None, use_tee = None, **env): # Inherit environment by default env = env or None try: + # universal_newlines is set to False so that communicate() + # will return bytes. We need to decode the output ourselves + # so that Python will not raise a UnicodeDecodeError when + # it encounters an invalid character; rather, we simply replace it proc = subprocess.Popen(command, shell=use_shell, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - universal_newlines=True) + universal_newlines=False) except EnvironmentError: # Return 127, as os.spawn*() and /bin/sh do return 127, '' + text, err = proc.communicate() + text = text.decode(locale.getpreferredencoding(False), + errors='replace') + + text = text.replace('\r\n', '\n') # Another historical oddity if text[-1:] == '\n': text = text[:-1] + + # stdio uses bytes in python 2, so to avoid issues, we simply + # remove all non-ascii characters + if sys.version_info < (3, 0): + text = text.encode('ascii', errors='replace') + if use_tee and text: print(text) return proc.returncode, text diff --git a/numpy/distutils/misc_util.py b/numpy/distutils/misc_util.py index 102af874f..8bf69ffbd 100644 --- a/numpy/distutils/misc_util.py +++ b/numpy/distutils/misc_util.py @@ -9,6 +9,7 @@ import atexit import tempfile import subprocess import shutil +import multiprocessing import distutils from distutils.errors import DistutilsError @@ -92,7 +93,11 @@ def get_num_build_jobs(): """ from numpy.distutils.core import get_distribution - envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", 1)) + try: + cpu_count = len(os.sched_getaffinity(0)) + except AttributeError: + cpu_count = multiprocessing.cpu_count() + envjobs = int(os.environ.get("NPY_NUM_BUILD_JOBS", cpu_count)) dist = get_distribution() # may be None during configuration if dist is None: diff --git a/numpy/lib/__init__.py b/numpy/lib/__init__.py index d85a179dd..cc05232a2 100644 --- a/numpy/lib/__init__.py +++ b/numpy/lib/__init__.py @@ -14,6 +14,7 @@ from .shape_base import * from .stride_tricks import * from .twodim_base import * from .ufunclike import * +from .histograms import * from . import scimath as emath from .polynomial import * @@ -43,6 +44,7 @@ __all__ += arraysetops.__all__ __all__ += npyio.__all__ __all__ += financial.__all__ __all__ += nanfunctions.__all__ +__all__ += histograms.__all__ from numpy.testing import _numpy_tester test = _numpy_tester().test diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index fb37801b9..e01333b8b 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -39,12 +39,14 @@ if sys.version_info[0] < 3: else: import builtins +# needed in this module for compatibility +from numpy.lib.histograms import histogram, histogramdd __all__ = [ 'select', 'piecewise', 'trim_zeros', 'copy', 'iterable', 'percentile', 'diff', 'gradient', 'angle', 'unwrap', 'sort_complex', 'disp', 'flip', 'rot90', 'extract', 'place', 'vectorize', 'asarray_chkfinite', 'average', - 'histogram', 'histogramdd', 'bincount', 'digitize', 'cov', 'corrcoef', + 'bincount', 'digitize', 'cov', 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc' @@ -241,806 +243,6 @@ def iterable(y): return True -def _hist_bin_sqrt(x): - """ - Square root histogram bin estimator. - - Bin width is inversely proportional to the data size. Used by many - programs for its simplicity. - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - return x.ptp() / np.sqrt(x.size) - - -def _hist_bin_sturges(x): - """ - Sturges histogram bin estimator. - - A very simplistic estimator based on the assumption of normality of - the data. This estimator has poor performance for non-normal data, - which becomes especially obvious for large data sets. The estimate - depends only on size of the data. - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - return x.ptp() / (np.log2(x.size) + 1.0) - - -def _hist_bin_rice(x): - """ - Rice histogram bin estimator. - - Another simple estimator with no normality assumption. It has better - performance for large data than Sturges, but tends to overestimate - the number of bins. The number of bins is proportional to the cube - root of data size (asymptotically optimal). The estimate depends - only on size of the data. - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - return x.ptp() / (2.0 * x.size ** (1.0 / 3)) - - -def _hist_bin_scott(x): - """ - Scott histogram bin estimator. - - The binwidth is proportional to the standard deviation of the data - and inversely proportional to the cube root of data size - (asymptotically optimal). - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x) - - -def _hist_bin_doane(x): - """ - Doane's histogram bin estimator. - - Improved version of Sturges' formula which works better for - non-normal data. See - stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - if x.size > 2: - sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) - sigma = np.std(x) - if sigma > 0.0: - # These three operations add up to - # g1 = np.mean(((x - np.mean(x)) / sigma)**3) - # but use only one temp array instead of three - temp = x - np.mean(x) - np.true_divide(temp, sigma, temp) - np.power(temp, 3, temp) - g1 = np.mean(temp) - return x.ptp() / (1.0 + np.log2(x.size) + - np.log2(1.0 + np.absolute(g1) / sg1)) - return 0.0 - - -def _hist_bin_fd(x): - """ - The Freedman-Diaconis histogram bin estimator. - - The Freedman-Diaconis rule uses interquartile range (IQR) to - estimate binwidth. It is considered a variation of the Scott rule - with more robustness as the IQR is less affected by outliers than - the standard deviation. However, the IQR depends on fewer points - than the standard deviation, so it is less accurate, especially for - long tailed distributions. - - If the IQR is 0, this function returns 1 for the number of bins. - Binwidth is inversely proportional to the cube root of data size - (asymptotically optimal). - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - """ - iqr = np.subtract(*np.percentile(x, [75, 25])) - return 2.0 * iqr * x.size ** (-1.0 / 3.0) - - -def _hist_bin_auto(x): - """ - Histogram bin estimator that uses the minimum width of the - Freedman-Diaconis and Sturges estimators. - - The FD estimator is usually the most robust method, but its width - estimate tends to be too large for small `x`. The Sturges estimator - is quite good for small (<1000) datasets and is the default in the R - language. This method gives good off the shelf behaviour. - - Parameters - ---------- - x : array_like - Input data that is to be histogrammed, trimmed to range. May not - be empty. - - Returns - ------- - h : An estimate of the optimal bin width for the given data. - - See Also - -------- - _hist_bin_fd, _hist_bin_sturges - """ - # There is no need to check for zero here. If ptp is, so is IQR and - # vice versa. Either both are zero or neither one is. - return min(_hist_bin_fd(x), _hist_bin_sturges(x)) - - -# Private dict initialized at module load time -_hist_bin_selectors = {'auto': _hist_bin_auto, - 'doane': _hist_bin_doane, - 'fd': _hist_bin_fd, - 'rice': _hist_bin_rice, - 'scott': _hist_bin_scott, - 'sqrt': _hist_bin_sqrt, - 'sturges': _hist_bin_sturges} - - -def histogram(a, bins=10, range=None, normed=False, weights=None, - density=None): - r""" - Compute the histogram of a set of data. - - Parameters - ---------- - a : array_like - Input data. The histogram is computed over the flattened array. - bins : int or sequence of scalars or str, optional - If `bins` is an int, it defines the number of equal-width - bins in the given range (10, by default). If `bins` is a - sequence, it defines the bin edges, including the rightmost - edge, allowing for non-uniform bin widths. - - .. versionadded:: 1.11.0 - - If `bins` is a string from the list below, `histogram` will use - the method chosen to calculate the optimal bin width and - consequently the number of bins (see `Notes` for more detail on - the estimators) from the data that falls within the requested - range. While the bin width will be optimal for the actual data - in the range, the number of bins will be computed to fill the - entire range, including the empty portions. For visualisation, - using the 'auto' option is suggested. Weighted data is not - supported for automated bin size selection. - - 'auto' - Maximum of the 'sturges' and 'fd' estimators. Provides good - all around performance. - - 'fd' (Freedman Diaconis Estimator) - Robust (resilient to outliers) estimator that takes into - account data variability and data size. - - 'doane' - An improved version of Sturges' estimator that works better - with non-normal datasets. - - 'scott' - Less robust estimator that that takes into account data - variability and data size. - - 'rice' - Estimator does not take variability into account, only data - size. Commonly overestimates number of bins required. - - 'sturges' - R's default method, only accounts for data size. Only - optimal for gaussian data and underestimates number of bins - for large non-gaussian datasets. - - 'sqrt' - Square root (of data size) estimator, used by Excel and - other programs for its speed and simplicity. - - range : (float, float), optional - The lower and upper range of the bins. If not provided, range - is simply ``(a.min(), a.max())``. Values outside the range are - ignored. The first element of the range must be less than or - equal to the second. `range` affects the automatic bin - computation as well. While bin width is computed to be optimal - based on the actual data within `range`, the bin count will fill - the entire range including portions containing no data. - normed : bool, optional - This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy - behavior. It will be removed in NumPy 2.0.0. Use the ``density`` - keyword instead. If ``False``, the result will contain the - number of samples in each bin. If ``True``, the result is the - value of the probability *density* function at the bin, - normalized such that the *integral* over the range is 1. Note - that this latter behavior is known to be buggy with unequal bin - widths; use ``density`` instead. - weights : array_like, optional - An array of weights, of the same shape as `a`. Each value in - `a` only contributes its associated weight towards the bin count - (instead of 1). If `density` is True, the weights are - normalized, so that the integral of the density over the range - remains 1. - density : bool, optional - If ``False``, the result will contain the number of samples in - each bin. If ``True``, the result is the value of the - probability *density* function at the bin, normalized such that - the *integral* over the range is 1. Note that the sum of the - histogram values will not be equal to 1 unless bins of unity - width are chosen; it is not a probability *mass* function. - - Overrides the ``normed`` keyword if given. - - Returns - ------- - hist : array - The values of the histogram. See `density` and `weights` for a - description of the possible semantics. - bin_edges : array of dtype float - Return the bin edges ``(length(hist)+1)``. - - - See Also - -------- - histogramdd, bincount, searchsorted, digitize - - Notes - ----- - All but the last (righthand-most) bin is half-open. In other words, - if `bins` is:: - - [1, 2, 3, 4] - - then the first bin is ``[1, 2)`` (including 1, but excluding 2) and - the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which - *includes* 4. - - .. versionadded:: 1.11.0 - - The methods to estimate the optimal number of bins are well founded - in literature, and are inspired by the choices R provides for - histogram visualisation. Note that having the number of bins - proportional to :math:`n^{1/3}` is asymptotically optimal, which is - why it appears in most estimators. These are simply plug-in methods - that give good starting points for number of bins. In the equations - below, :math:`h` is the binwidth and :math:`n_h` is the number of - bins. All estimators that compute bin counts are recast to bin width - using the `ptp` of the data. The final bin count is obtained from - ``np.round(np.ceil(range / h))`. - - 'Auto' (maximum of the 'Sturges' and 'FD' estimators) - A compromise to get a good value. For small datasets the Sturges - value will usually be chosen, while larger datasets will usually - default to FD. Avoids the overly conservative behaviour of FD - and Sturges for small and large datasets respectively. - Switchover point is usually :math:`a.size \approx 1000`. - - 'FD' (Freedman Diaconis Estimator) - .. math:: h = 2 \frac{IQR}{n^{1/3}} - - The binwidth is proportional to the interquartile range (IQR) - and inversely proportional to cube root of a.size. Can be too - conservative for small datasets, but is quite good for large - datasets. The IQR is very robust to outliers. - - 'Scott' - .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}} - - The binwidth is proportional to the standard deviation of the - data and inversely proportional to cube root of ``x.size``. Can - be too conservative for small datasets, but is quite good for - large datasets. The standard deviation is not very robust to - outliers. Values are very similar to the Freedman-Diaconis - estimator in the absence of outliers. - - 'Rice' - .. math:: n_h = 2n^{1/3} - - The number of bins is only proportional to cube root of - ``a.size``. It tends to overestimate the number of bins and it - does not take into account data variability. - - 'Sturges' - .. math:: n_h = \log _{2}n+1 - - The number of bins is the base 2 log of ``a.size``. This - estimator assumes normality of data and is too conservative for - larger, non-normal datasets. This is the default method in R's - ``hist`` method. - - 'Doane' - .. math:: n_h = 1 + \log_{2}(n) + - \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}}) - - g_1 = mean[(\frac{x - \mu}{\sigma})^3] - - \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}} - - An improved version of Sturges' formula that produces better - estimates for non-normal datasets. This estimator attempts to - account for the skew of the data. - - 'Sqrt' - .. math:: n_h = \sqrt n - The simplest and fastest estimator. Only takes into account the - data size. - - Examples - -------- - >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) - (array([0, 2, 1]), array([0, 1, 2, 3])) - >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) - (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) - >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) - (array([1, 4, 1]), array([0, 1, 2, 3])) - - >>> a = np.arange(5) - >>> hist, bin_edges = np.histogram(a, density=True) - >>> hist - array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) - >>> hist.sum() - 2.4999999999999996 - >>> np.sum(hist * np.diff(bin_edges)) - 1.0 - - .. versionadded:: 1.11.0 - - Automated Bin Selection Methods example, using 2 peak random data - with 2000 points: - - >>> import matplotlib.pyplot as plt - >>> rng = np.random.RandomState(10) # deterministic random data - >>> a = np.hstack((rng.normal(size=1000), - ... rng.normal(loc=5, scale=2, size=1000))) - >>> plt.hist(a, bins='auto') # arguments are passed to np.histogram - >>> plt.title("Histogram with 'auto' bins") - >>> plt.show() - - """ - a = asarray(a) - if weights is not None: - weights = asarray(weights) - if weights.shape != a.shape: - raise ValueError( - 'weights should have the same shape as a.') - weights = weights.ravel() - a = a.ravel() - - # Do not modify the original value of range so we can check for `None` - if range is None: - if a.size == 0: - # handle empty arrays. Can't determine range, so use 0-1. - first_edge, last_edge = 0.0, 1.0 - else: - first_edge, last_edge = a.min() + 0.0, a.max() + 0.0 - else: - first_edge, last_edge = [mi + 0.0 for mi in range] - if first_edge > last_edge: - raise ValueError( - 'max must be larger than min in range parameter.') - if not np.all(np.isfinite([first_edge, last_edge])): - raise ValueError( - 'range parameter must be finite.') - if first_edge == last_edge: - first_edge -= 0.5 - last_edge += 0.5 - - # density overrides the normed keyword - if density is not None: - normed = False - - # parse the overloaded bins argument - n_equal_bins = None - bin_edges = None - - if isinstance(bins, basestring): - bin_name = bins - # if `bins` is a string for an automatic method, - # this will replace it with the number of bins calculated - if bin_name not in _hist_bin_selectors: - raise ValueError( - "{!r} is not a valid estimator for `bins`".format(bin_name)) - if weights is not None: - raise TypeError("Automated estimation of the number of " - "bins is not supported for weighted data") - # Make a reference to `a` - b = a - # Update the reference if the range needs truncation - if range is not None: - keep = (a >= first_edge) - keep &= (a <= last_edge) - if not np.logical_and.reduce(keep): - b = a[keep] - - if b.size == 0: - n_equal_bins = 1 - else: - # Do not call selectors on empty arrays - width = _hist_bin_selectors[bin_name](b) - if width: - n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) - else: - # Width can be zero for some estimators, e.g. FD when - # the IQR of the data is zero. - n_equal_bins = 1 - - elif np.ndim(bins) == 0: - try: - n_equal_bins = operator.index(bins) - except TypeError: - raise TypeError( - '`bins` must be an integer, a string, or an array') - if n_equal_bins < 1: - raise ValueError('`bins` must be positive, when an integer') - - elif np.ndim(bins) == 1: - bin_edges = np.asarray(bins) - if np.any(bin_edges[:-1] > bin_edges[1:]): - raise ValueError( - '`bins` must increase monotonically, when an array') - - else: - raise ValueError('`bins` must be 1d, when an array') - - del bins - - # compute the bins if only the count was specified - if n_equal_bins is not None: - bin_edges = linspace( - first_edge, last_edge, n_equal_bins + 1, endpoint=True) - - # Histogram is an integer or a float array depending on the weights. - if weights is None: - ntype = np.dtype(np.intp) - else: - ntype = weights.dtype - - # We set a block size, as this allows us to iterate over chunks when - # computing histograms, to minimize memory usage. - BLOCK = 65536 - - # The fast path uses bincount, but that only works for certain types - # of weight - simple_weights = ( - weights is None or - np.can_cast(weights.dtype, np.double) or - np.can_cast(weights.dtype, complex) - ) - - if n_equal_bins is not None and simple_weights: - # Fast algorithm for equal bins - # We now convert values of a to bin indices, under the assumption of - # equal bin widths (which is valid here). - - # Initialize empty histogram - n = np.zeros(n_equal_bins, ntype) - - # Pre-compute histogram scaling factor - norm = n_equal_bins / (last_edge - first_edge) - - # We iterate over blocks here for two reasons: the first is that for - # large arrays, it is actually faster (for example for a 10^8 array it - # is 2x as fast) and it results in a memory footprint 3x lower in the - # limit of large arrays. - for i in arange(0, len(a), BLOCK): - tmp_a = a[i:i+BLOCK] - if weights is None: - tmp_w = None - else: - tmp_w = weights[i:i + BLOCK] - - # Only include values in the right range - keep = (tmp_a >= first_edge) - keep &= (tmp_a <= last_edge) - if not np.logical_and.reduce(keep): - tmp_a = tmp_a[keep] - if tmp_w is not None: - tmp_w = tmp_w[keep] - tmp_a_data = tmp_a.astype(float) - tmp_a = tmp_a_data - first_edge - tmp_a *= norm - - # Compute the bin indices, and for values that lie exactly on - # last_edge we need to subtract one - indices = tmp_a.astype(np.intp) - indices[indices == n_equal_bins] -= 1 - - # The index computation is not guaranteed to give exactly - # consistent results within ~1 ULP of the bin edges. - decrement = tmp_a_data < bin_edges[indices] - indices[decrement] -= 1 - # The last bin includes the right edge. The other bins do not. - increment = ((tmp_a_data >= bin_edges[indices + 1]) - & (indices != n_equal_bins - 1)) - indices[increment] += 1 - - # We now compute the histogram using bincount - if ntype.kind == 'c': - n.real += np.bincount(indices, weights=tmp_w.real, - minlength=n_equal_bins) - n.imag += np.bincount(indices, weights=tmp_w.imag, - minlength=n_equal_bins) - else: - n += np.bincount(indices, weights=tmp_w, - minlength=n_equal_bins).astype(ntype) - else: - # Compute via cumulative histogram - cum_n = np.zeros(bin_edges.shape, ntype) - if weights is None: - for i in arange(0, len(a), BLOCK): - sa = sort(a[i:i+BLOCK]) - cum_n += np.r_[sa.searchsorted(bin_edges[:-1], 'left'), - sa.searchsorted(bin_edges[-1], 'right')] - else: - zero = array(0, dtype=ntype) - for i in arange(0, len(a), BLOCK): - tmp_a = a[i:i+BLOCK] - tmp_w = weights[i:i+BLOCK] - sorting_index = np.argsort(tmp_a) - sa = tmp_a[sorting_index] - sw = tmp_w[sorting_index] - cw = np.concatenate(([zero], sw.cumsum())) - bin_index = np.r_[sa.searchsorted(bin_edges[:-1], 'left'), - sa.searchsorted(bin_edges[-1], 'right')] - cum_n += cw[bin_index] - - n = np.diff(cum_n) - - if density: - db = array(np.diff(bin_edges), float) - return n/db/n.sum(), bin_edges - elif normed: - # deprecated, buggy behavior. Remove for NumPy 2.0.0 - db = array(np.diff(bin_edges), float) - return n/(n*db).sum(), bin_edges - else: - return n, bin_edges - - -def histogramdd(sample, bins=10, range=None, normed=False, weights=None): - """ - Compute the multidimensional histogram of some data. - - Parameters - ---------- - sample : array_like - The data to be histogrammed. It must be an (N,D) array or data - that can be converted to such. The rows of the resulting array - are the coordinates of points in a D dimensional polytope. - bins : sequence or int, optional - The bin specification: - - * A sequence of arrays describing the bin edges along each dimension. - * The number of bins for each dimension (nx, ny, ... =bins) - * The number of bins for all dimensions (nx=ny=...=bins). - - range : sequence, optional - A sequence of lower and upper bin edges to be used if the edges are - not given explicitly in `bins`. Defaults to the minimum and maximum - values along each dimension. - normed : bool, optional - If False, returns the number of samples in each bin. If True, - returns the bin density ``bin_count / sample_count / bin_volume``. - weights : (N,) array_like, optional - An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. - Weights are normalized to 1 if normed is True. If normed is False, - the values of the returned histogram are equal to the sum of the - weights belonging to the samples falling into each bin. - - Returns - ------- - H : ndarray - The multidimensional histogram of sample x. See normed and weights - for the different possible semantics. - edges : list - A list of D arrays describing the bin edges for each dimension. - - See Also - -------- - histogram: 1-D histogram - histogram2d: 2-D histogram - - Examples - -------- - >>> r = np.random.randn(100,3) - >>> H, edges = np.histogramdd(r, bins = (5, 8, 4)) - >>> H.shape, edges[0].size, edges[1].size, edges[2].size - ((5, 8, 4), 6, 9, 5) - - """ - - try: - # Sample is an ND-array. - N, D = sample.shape - except (AttributeError, ValueError): - # Sample is a sequence of 1D arrays. - sample = atleast_2d(sample).T - N, D = sample.shape - - nbin = empty(D, int) - edges = D*[None] - dedges = D*[None] - if weights is not None: - weights = asarray(weights) - - try: - M = len(bins) - if M != D: - raise ValueError( - 'The dimension of bins must be equal to the dimension of the ' - ' sample x.') - except TypeError: - # bins is an integer - bins = D*[bins] - - # Select range for each dimension - # Used only if number of bins is given. - if range is None: - # Handle empty input. Range can't be determined in that case, use 0-1. - if N == 0: - smin = zeros(D) - smax = ones(D) - else: - smin = atleast_1d(array(sample.min(0), float)) - smax = atleast_1d(array(sample.max(0), float)) - else: - if not np.all(np.isfinite(range)): - raise ValueError( - 'range parameter must be finite.') - smin = zeros(D) - smax = zeros(D) - for i in arange(D): - smin[i], smax[i] = range[i] - - # Make sure the bins have a finite width. - for i in arange(len(smin)): - if smin[i] == smax[i]: - smin[i] = smin[i] - .5 - smax[i] = smax[i] + .5 - - # avoid rounding issues for comparisons when dealing with inexact types - if np.issubdtype(sample.dtype, np.inexact): - edge_dt = sample.dtype - else: - edge_dt = float - # Create edge arrays - for i in arange(D): - if isscalar(bins[i]): - if bins[i] < 1: - raise ValueError( - "Element at index %s in `bins` should be a positive " - "integer." % i) - nbin[i] = bins[i] + 2 # +2 for outlier bins - edges[i] = linspace(smin[i], smax[i], nbin[i]-1, dtype=edge_dt) - else: - edges[i] = asarray(bins[i], edge_dt) - nbin[i] = len(edges[i]) + 1 # +1 for outlier bins - dedges[i] = diff(edges[i]) - if np.any(np.asarray(dedges[i]) <= 0): - raise ValueError( - "Found bin edge of size <= 0. Did you specify `bins` with" - "non-monotonic sequence?") - - nbin = asarray(nbin) - - # Handle empty input. - if N == 0: - return np.zeros(nbin-2), edges - - # Compute the bin number each sample falls into. - Ncount = {} - for i in arange(D): - Ncount[i] = digitize(sample[:, i], edges[i]) - - # Using digitize, values that fall on an edge are put in the right bin. - # For the rightmost bin, we want values equal to the right edge to be - # counted in the last bin, and not as an outlier. - for i in arange(D): - # Rounding precision - mindiff = dedges[i].min() - if not np.isinf(mindiff): - decimal = int(-log10(mindiff)) + 6 - # Find which points are on the rightmost edge. - not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) - on_edge = (around(sample[:, i], decimal) == - around(edges[i][-1], decimal)) - # Shift these points one bin to the left. - Ncount[i][nonzero(on_edge & not_smaller_than_edge)[0]] -= 1 - - # Flattened histogram matrix (1D) - # Reshape is used so that overlarge arrays - # will raise an error. - hist = zeros(nbin, float).reshape(-1) - - # Compute the sample indices in the flattened histogram matrix. - ni = nbin.argsort() - xy = zeros(N, int) - for i in arange(0, D-1): - xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod() - xy += Ncount[ni[-1]] - - # Compute the number of repetitions in xy and assign it to the - # flattened histmat. - if len(xy) == 0: - return zeros(nbin-2, int), edges - - flatcount = bincount(xy, weights) - a = arange(len(flatcount)) - hist[a] = flatcount - - # Shape into a proper matrix - hist = hist.reshape(sort(nbin)) - for i in arange(nbin.size): - j = ni.argsort()[i] - hist = hist.swapaxes(i, j) - ni[i], ni[j] = ni[j], ni[i] - - # Remove outliers (indices 0 and -1 for each dimension). - core = D*[slice(1, -1)] - hist = hist[core] - - # Normalize if normed is True - if normed: - s = hist.sum() - for i in arange(D): - shape = ones(D, int) - shape[i] = nbin[i] - 2 - hist = hist / dedges[i].reshape(shape) - hist /= s - - if (hist.shape != nbin - 2).any(): - raise RuntimeError( - "Internal Shape Error") - return hist, edges - - def average(a, axis=None, weights=None, returned=False): """ Compute the weighted average along the specified axis. @@ -2942,7 +2144,7 @@ def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, .. versionadded:: 1.5 fweights : array_like, int, optional - 1-D array of integer freguency weights; the number of times each + 1-D array of integer frequency weights; the number of times each observation vector should be repeated. .. versionadded:: 1.10 diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py new file mode 100644 index 000000000..ec2d0fe81 --- /dev/null +++ b/numpy/lib/histograms.py @@ -0,0 +1,865 @@ +""" +Histogram-related functions +""" +from __future__ import division, absolute_import, print_function + +import operator + +import numpy as np +from numpy.compat.py3k import basestring + +__all__ = ['histogram', 'histogramdd'] + + +def _hist_bin_sqrt(x): + """ + Square root histogram bin estimator. + + Bin width is inversely proportional to the data size. Used by many + programs for its simplicity. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / np.sqrt(x.size) + + +def _hist_bin_sturges(x): + """ + Sturges histogram bin estimator. + + A very simplistic estimator based on the assumption of normality of + the data. This estimator has poor performance for non-normal data, + which becomes especially obvious for large data sets. The estimate + depends only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / (np.log2(x.size) + 1.0) + + +def _hist_bin_rice(x): + """ + Rice histogram bin estimator. + + Another simple estimator with no normality assumption. It has better + performance for large data than Sturges, but tends to overestimate + the number of bins. The number of bins is proportional to the cube + root of data size (asymptotically optimal). The estimate depends + only on size of the data. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return x.ptp() / (2.0 * x.size ** (1.0 / 3)) + + +def _hist_bin_scott(x): + """ + Scott histogram bin estimator. + + The binwidth is proportional to the standard deviation of the data + and inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x) + + +def _hist_bin_doane(x): + """ + Doane's histogram bin estimator. + + Improved version of Sturges' formula which works better for + non-normal data. See + stats.stackexchange.com/questions/55134/doanes-formula-for-histogram-binning + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + if x.size > 2: + sg1 = np.sqrt(6.0 * (x.size - 2) / ((x.size + 1.0) * (x.size + 3))) + sigma = np.std(x) + if sigma > 0.0: + # These three operations add up to + # g1 = np.mean(((x - np.mean(x)) / sigma)**3) + # but use only one temp array instead of three + temp = x - np.mean(x) + np.true_divide(temp, sigma, temp) + np.power(temp, 3, temp) + g1 = np.mean(temp) + return x.ptp() / (1.0 + np.log2(x.size) + + np.log2(1.0 + np.absolute(g1) / sg1)) + return 0.0 + + +def _hist_bin_fd(x): + """ + The Freedman-Diaconis histogram bin estimator. + + The Freedman-Diaconis rule uses interquartile range (IQR) to + estimate binwidth. It is considered a variation of the Scott rule + with more robustness as the IQR is less affected by outliers than + the standard deviation. However, the IQR depends on fewer points + than the standard deviation, so it is less accurate, especially for + long tailed distributions. + + If the IQR is 0, this function returns 1 for the number of bins. + Binwidth is inversely proportional to the cube root of data size + (asymptotically optimal). + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + """ + iqr = np.subtract(*np.percentile(x, [75, 25])) + return 2.0 * iqr * x.size ** (-1.0 / 3.0) + + +def _hist_bin_auto(x): + """ + Histogram bin estimator that uses the minimum width of the + Freedman-Diaconis and Sturges estimators. + + The FD estimator is usually the most robust method, but its width + estimate tends to be too large for small `x`. The Sturges estimator + is quite good for small (<1000) datasets and is the default in the R + language. This method gives good off the shelf behaviour. + + Parameters + ---------- + x : array_like + Input data that is to be histogrammed, trimmed to range. May not + be empty. + + Returns + ------- + h : An estimate of the optimal bin width for the given data. + + See Also + -------- + _hist_bin_fd, _hist_bin_sturges + """ + # There is no need to check for zero here. If ptp is, so is IQR and + # vice versa. Either both are zero or neither one is. + return min(_hist_bin_fd(x), _hist_bin_sturges(x)) + + +# Private dict initialized at module load time +_hist_bin_selectors = {'auto': _hist_bin_auto, + 'doane': _hist_bin_doane, + 'fd': _hist_bin_fd, + 'rice': _hist_bin_rice, + 'scott': _hist_bin_scott, + 'sqrt': _hist_bin_sqrt, + 'sturges': _hist_bin_sturges} + + +def _ravel_and_check_weights(a, weights): + """ Check a and weights have matching shapes, and ravel both """ + a = np.asarray(a) + if weights is not None: + weights = np.asarray(weights) + if weights.shape != a.shape: + raise ValueError( + 'weights should have the same shape as a.') + weights = weights.ravel() + a = a.ravel() + return a, weights + + +def _get_outer_edges(a, range): + """ + Determine the outer bin edges to use, from either the data or the range + argument + """ + if range is not None: + first_edge, last_edge = range + elif a.size == 0: + # handle empty arrays. Can't determine range, so use 0-1. + first_edge, last_edge = 0, 1 + else: + first_edge, last_edge = a.min(), a.max() + + if first_edge > last_edge: + raise ValueError( + 'max must be larger than min in range parameter.') + if not (np.isfinite(first_edge) and np.isfinite(last_edge)): + raise ValueError( + 'range parameter must be finite.') + + # expand empty range to avoid divide by zero + if first_edge == last_edge: + first_edge = first_edge - 0.5 + last_edge = last_edge + 0.5 + + return first_edge, last_edge + + +def _get_bin_edges(a, bins, range, weights): + """ + Computes the bins used internally by `histogram`. + + Parameters + ========== + a : ndarray + Ravelled data array + bins, range + Forwarded arguments from `histogram`. + weights : ndarray, optional + Ravelled weights array, or None + + Returns + ======= + bin_edges : ndarray + Array of bin edges + uniform_bins : (Number, Number, int): + The upper bound, lowerbound, and number of bins, used in the optimized + implementation of `histogram` that works on uniform bins. + """ + # parse the overloaded bins argument + n_equal_bins = None + bin_edges = None + + if isinstance(bins, basestring): + bin_name = bins + # if `bins` is a string for an automatic method, + # this will replace it with the number of bins calculated + if bin_name not in _hist_bin_selectors: + raise ValueError( + "{!r} is not a valid estimator for `bins`".format(bin_name)) + if weights is not None: + raise TypeError("Automated estimation of the number of " + "bins is not supported for weighted data") + + first_edge, last_edge = _get_outer_edges(a, range) + + # truncate the range if needed + if range is not None: + keep = (a >= first_edge) + keep &= (a <= last_edge) + if not np.logical_and.reduce(keep): + a = a[keep] + + if a.size == 0: + n_equal_bins = 1 + else: + # Do not call selectors on empty arrays + width = _hist_bin_selectors[bin_name](a) + if width: + n_equal_bins = int(np.ceil((last_edge - first_edge) / width)) + else: + # Width can be zero for some estimators, e.g. FD when + # the IQR of the data is zero. + n_equal_bins = 1 + + elif np.ndim(bins) == 0: + try: + n_equal_bins = operator.index(bins) + except TypeError: + raise TypeError( + '`bins` must be an integer, a string, or an array') + if n_equal_bins < 1: + raise ValueError('`bins` must be positive, when an integer') + + first_edge, last_edge = _get_outer_edges(a, range) + + elif np.ndim(bins) == 1: + bin_edges = np.asarray(bins) + if np.any(bin_edges[:-1] > bin_edges[1:]): + raise ValueError( + '`bins` must increase monotonically, when an array') + + else: + raise ValueError('`bins` must be 1d, when an array') + + if n_equal_bins is not None: + # bin edges must be computed + bin_edges = np.linspace( + first_edge, last_edge, n_equal_bins + 1, endpoint=True) + return bin_edges, (first_edge, last_edge, n_equal_bins) + else: + return bin_edges, None + + +def _search_sorted_inclusive(a, v): + """ + Like `searchsorted`, but where the last item in `v` is placed on the right. + + In the context of a histogram, this makes the last bin edge inclusive + """ + return np.concatenate(( + a.searchsorted(v[:-1], 'left'), + a.searchsorted(v[-1:], 'right') + )) + + +def histogram(a, bins=10, range=None, normed=False, weights=None, + density=None): + r""" + Compute the histogram of a set of data. + + Parameters + ---------- + a : array_like + Input data. The histogram is computed over the flattened array. + bins : int or sequence of scalars or str, optional + If `bins` is an int, it defines the number of equal-width + bins in the given range (10, by default). If `bins` is a + sequence, it defines the bin edges, including the rightmost + edge, allowing for non-uniform bin widths. + + .. versionadded:: 1.11.0 + + If `bins` is a string from the list below, `histogram` will use + the method chosen to calculate the optimal bin width and + consequently the number of bins (see `Notes` for more detail on + the estimators) from the data that falls within the requested + range. While the bin width will be optimal for the actual data + in the range, the number of bins will be computed to fill the + entire range, including the empty portions. For visualisation, + using the 'auto' option is suggested. Weighted data is not + supported for automated bin size selection. + + 'auto' + Maximum of the 'sturges' and 'fd' estimators. Provides good + all around performance. + + 'fd' (Freedman Diaconis Estimator) + Robust (resilient to outliers) estimator that takes into + account data variability and data size. + + 'doane' + An improved version of Sturges' estimator that works better + with non-normal datasets. + + 'scott' + Less robust estimator that that takes into account data + variability and data size. + + 'rice' + Estimator does not take variability into account, only data + size. Commonly overestimates number of bins required. + + 'sturges' + R's default method, only accounts for data size. Only + optimal for gaussian data and underestimates number of bins + for large non-gaussian datasets. + + 'sqrt' + Square root (of data size) estimator, used by Excel and + other programs for its speed and simplicity. + + range : (float, float), optional + The lower and upper range of the bins. If not provided, range + is simply ``(a.min(), a.max())``. Values outside the range are + ignored. The first element of the range must be less than or + equal to the second. `range` affects the automatic bin + computation as well. While bin width is computed to be optimal + based on the actual data within `range`, the bin count will fill + the entire range including portions containing no data. + normed : bool, optional + This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy + behavior. It will be removed in NumPy 2.0.0. Use the ``density`` + keyword instead. If ``False``, the result will contain the + number of samples in each bin. If ``True``, the result is the + value of the probability *density* function at the bin, + normalized such that the *integral* over the range is 1. Note + that this latter behavior is known to be buggy with unequal bin + widths; use ``density`` instead. + weights : array_like, optional + An array of weights, of the same shape as `a`. Each value in + `a` only contributes its associated weight towards the bin count + (instead of 1). If `density` is True, the weights are + normalized, so that the integral of the density over the range + remains 1. + density : bool, optional + If ``False``, the result will contain the number of samples in + each bin. If ``True``, the result is the value of the + probability *density* function at the bin, normalized such that + the *integral* over the range is 1. Note that the sum of the + histogram values will not be equal to 1 unless bins of unity + width are chosen; it is not a probability *mass* function. + + Overrides the ``normed`` keyword if given. + + Returns + ------- + hist : array + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. + bin_edges : array of dtype float + Return the bin edges ``(length(hist)+1)``. + + + See Also + -------- + histogramdd, bincount, searchsorted, digitize + + Notes + ----- + All but the last (righthand-most) bin is half-open. In other words, + if `bins` is:: + + [1, 2, 3, 4] + + then the first bin is ``[1, 2)`` (including 1, but excluding 2) and + the second ``[2, 3)``. The last bin, however, is ``[3, 4]``, which + *includes* 4. + + .. versionadded:: 1.11.0 + + The methods to estimate the optimal number of bins are well founded + in literature, and are inspired by the choices R provides for + histogram visualisation. Note that having the number of bins + proportional to :math:`n^{1/3}` is asymptotically optimal, which is + why it appears in most estimators. These are simply plug-in methods + that give good starting points for number of bins. In the equations + below, :math:`h` is the binwidth and :math:`n_h` is the number of + bins. All estimators that compute bin counts are recast to bin width + using the `ptp` of the data. The final bin count is obtained from + ``np.round(np.ceil(range / h))`. + + 'Auto' (maximum of the 'Sturges' and 'FD' estimators) + A compromise to get a good value. For small datasets the Sturges + value will usually be chosen, while larger datasets will usually + default to FD. Avoids the overly conservative behaviour of FD + and Sturges for small and large datasets respectively. + Switchover point is usually :math:`a.size \approx 1000`. + + 'FD' (Freedman Diaconis Estimator) + .. math:: h = 2 \frac{IQR}{n^{1/3}} + + The binwidth is proportional to the interquartile range (IQR) + and inversely proportional to cube root of a.size. Can be too + conservative for small datasets, but is quite good for large + datasets. The IQR is very robust to outliers. + + 'Scott' + .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}} + + The binwidth is proportional to the standard deviation of the + data and inversely proportional to cube root of ``x.size``. Can + be too conservative for small datasets, but is quite good for + large datasets. The standard deviation is not very robust to + outliers. Values are very similar to the Freedman-Diaconis + estimator in the absence of outliers. + + 'Rice' + .. math:: n_h = 2n^{1/3} + + The number of bins is only proportional to cube root of + ``a.size``. It tends to overestimate the number of bins and it + does not take into account data variability. + + 'Sturges' + .. math:: n_h = \log _{2}n+1 + + The number of bins is the base 2 log of ``a.size``. This + estimator assumes normality of data and is too conservative for + larger, non-normal datasets. This is the default method in R's + ``hist`` method. + + 'Doane' + .. math:: n_h = 1 + \log_{2}(n) + + \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}}) + + g_1 = mean[(\frac{x - \mu}{\sigma})^3] + + \sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}} + + An improved version of Sturges' formula that produces better + estimates for non-normal datasets. This estimator attempts to + account for the skew of the data. + + 'Sqrt' + .. math:: n_h = \sqrt n + The simplest and fastest estimator. Only takes into account the + data size. + + Examples + -------- + >>> np.histogram([1, 2, 1], bins=[0, 1, 2, 3]) + (array([0, 2, 1]), array([0, 1, 2, 3])) + >>> np.histogram(np.arange(4), bins=np.arange(5), density=True) + (array([ 0.25, 0.25, 0.25, 0.25]), array([0, 1, 2, 3, 4])) + >>> np.histogram([[1, 2, 1], [1, 0, 1]], bins=[0,1,2,3]) + (array([1, 4, 1]), array([0, 1, 2, 3])) + + >>> a = np.arange(5) + >>> hist, bin_edges = np.histogram(a, density=True) + >>> hist + array([ 0.5, 0. , 0.5, 0. , 0. , 0.5, 0. , 0.5, 0. , 0.5]) + >>> hist.sum() + 2.4999999999999996 + >>> np.sum(hist * np.diff(bin_edges)) + 1.0 + + .. versionadded:: 1.11.0 + + Automated Bin Selection Methods example, using 2 peak random data + with 2000 points: + + >>> import matplotlib.pyplot as plt + >>> rng = np.random.RandomState(10) # deterministic random data + >>> a = np.hstack((rng.normal(size=1000), + ... rng.normal(loc=5, scale=2, size=1000))) + >>> plt.hist(a, bins='auto') # arguments are passed to np.histogram + >>> plt.title("Histogram with 'auto' bins") + >>> plt.show() + + """ + a, weights = _ravel_and_check_weights(a, weights) + + bin_edges, uniform_bins = _get_bin_edges(a, bins, range, weights) + + # Histogram is an integer or a float array depending on the weights. + if weights is None: + ntype = np.dtype(np.intp) + else: + ntype = weights.dtype + + # We set a block size, as this allows us to iterate over chunks when + # computing histograms, to minimize memory usage. + BLOCK = 65536 + + # The fast path uses bincount, but that only works for certain types + # of weight + simple_weights = ( + weights is None or + np.can_cast(weights.dtype, np.double) or + np.can_cast(weights.dtype, complex) + ) + + if uniform_bins is not None and simple_weights: + # Fast algorithm for equal bins + # We now convert values of a to bin indices, under the assumption of + # equal bin widths (which is valid here). + first_edge, last_edge, n_equal_bins = uniform_bins + + # Initialize empty histogram + n = np.zeros(n_equal_bins, ntype) + + # Pre-compute histogram scaling factor + norm = n_equal_bins / (last_edge - first_edge) + + # We iterate over blocks here for two reasons: the first is that for + # large arrays, it is actually faster (for example for a 10^8 array it + # is 2x as fast) and it results in a memory footprint 3x lower in the + # limit of large arrays. + for i in np.arange(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + if weights is None: + tmp_w = None + else: + tmp_w = weights[i:i + BLOCK] + + # Only include values in the right range + keep = (tmp_a >= first_edge) + keep &= (tmp_a <= last_edge) + if not np.logical_and.reduce(keep): + tmp_a = tmp_a[keep] + if tmp_w is not None: + tmp_w = tmp_w[keep] + tmp_a_data = tmp_a.astype(float) + tmp_a = tmp_a_data - first_edge + tmp_a *= norm + + # Compute the bin indices, and for values that lie exactly on + # last_edge we need to subtract one + indices = tmp_a.astype(np.intp) + indices[indices == n_equal_bins] -= 1 + + # The index computation is not guaranteed to give exactly + # consistent results within ~1 ULP of the bin edges. + decrement = tmp_a_data < bin_edges[indices] + indices[decrement] -= 1 + # The last bin includes the right edge. The other bins do not. + increment = ((tmp_a_data >= bin_edges[indices + 1]) + & (indices != n_equal_bins - 1)) + indices[increment] += 1 + + # We now compute the histogram using bincount + if ntype.kind == 'c': + n.real += np.bincount(indices, weights=tmp_w.real, + minlength=n_equal_bins) + n.imag += np.bincount(indices, weights=tmp_w.imag, + minlength=n_equal_bins) + else: + n += np.bincount(indices, weights=tmp_w, + minlength=n_equal_bins).astype(ntype) + else: + # Compute via cumulative histogram + cum_n = np.zeros(bin_edges.shape, ntype) + if weights is None: + for i in np.arange(0, len(a), BLOCK): + sa = np.sort(a[i:i+BLOCK]) + cum_n += _search_sorted_inclusive(sa, bin_edges) + else: + zero = np.zeros(1, dtype=ntype) + for i in np.arange(0, len(a), BLOCK): + tmp_a = a[i:i+BLOCK] + tmp_w = weights[i:i+BLOCK] + sorting_index = np.argsort(tmp_a) + sa = tmp_a[sorting_index] + sw = tmp_w[sorting_index] + cw = np.concatenate((zero, sw.cumsum())) + bin_index = _search_sorted_inclusive(sa, bin_edges) + cum_n += cw[bin_index] + + n = np.diff(cum_n) + + # density overrides the normed keyword + if density is not None: + normed = False + + if density: + db = np.array(np.diff(bin_edges), float) + return n/db/n.sum(), bin_edges + elif normed: + # deprecated, buggy behavior. Remove for NumPy 2.0.0 + db = np.array(np.diff(bin_edges), float) + return n/(n*db).sum(), bin_edges + else: + return n, bin_edges + + +def histogramdd(sample, bins=10, range=None, normed=False, weights=None): + """ + Compute the multidimensional histogram of some data. + + Parameters + ---------- + sample : array_like + The data to be histogrammed. It must be an (N,D) array or data + that can be converted to such. The rows of the resulting array + are the coordinates of points in a D dimensional polytope. + bins : sequence or int, optional + The bin specification: + + * A sequence of arrays describing the bin edges along each dimension. + * The number of bins for each dimension (nx, ny, ... =bins) + * The number of bins for all dimensions (nx=ny=...=bins). + + range : sequence, optional + A sequence of lower and upper bin edges to be used if the edges are + not given explicitly in `bins`. Defaults to the minimum and maximum + values along each dimension. + normed : bool, optional + If False, returns the number of samples in each bin. If True, + returns the bin density ``bin_count / sample_count / bin_volume``. + weights : (N,) array_like, optional + An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. + Weights are normalized to 1 if normed is True. If normed is False, + the values of the returned histogram are equal to the sum of the + weights belonging to the samples falling into each bin. + + Returns + ------- + H : ndarray + The multidimensional histogram of sample x. See normed and weights + for the different possible semantics. + edges : list + A list of D arrays describing the bin edges for each dimension. + + See Also + -------- + histogram: 1-D histogram + histogram2d: 2-D histogram + + Examples + -------- + >>> r = np.random.randn(100,3) + >>> H, edges = np.histogramdd(r, bins = (5, 8, 4)) + >>> H.shape, edges[0].size, edges[1].size, edges[2].size + ((5, 8, 4), 6, 9, 5) + + """ + + try: + # Sample is an ND-array. + N, D = sample.shape + except (AttributeError, ValueError): + # Sample is a sequence of 1D arrays. + sample = np.atleast_2d(sample).T + N, D = sample.shape + + nbin = np.empty(D, int) + edges = D*[None] + dedges = D*[None] + if weights is not None: + weights = np.asarray(weights) + + try: + M = len(bins) + if M != D: + raise ValueError( + 'The dimension of bins must be equal to the dimension of the ' + ' sample x.') + except TypeError: + # bins is an integer + bins = D*[bins] + + # Select range for each dimension + # Used only if number of bins is given. + if range is None: + # Handle empty input. Range can't be determined in that case, use 0-1. + if N == 0: + smin = np.zeros(D) + smax = np.ones(D) + else: + smin = np.atleast_1d(np.array(sample.min(0), float)) + smax = np.atleast_1d(np.array(sample.max(0), float)) + else: + if not np.all(np.isfinite(range)): + raise ValueError( + 'range parameter must be finite.') + smin = np.zeros(D) + smax = np.zeros(D) + for i in np.arange(D): + smin[i], smax[i] = range[i] + + # Make sure the bins have a finite width. + for i in np.arange(len(smin)): + if smin[i] == smax[i]: + smin[i] = smin[i] - .5 + smax[i] = smax[i] + .5 + + # avoid rounding issues for comparisons when dealing with inexact types + if np.issubdtype(sample.dtype, np.inexact): + edge_dt = sample.dtype + else: + edge_dt = float + # Create edge arrays + for i in np.arange(D): + if np.isscalar(bins[i]): + if bins[i] < 1: + raise ValueError( + "Element at index %s in `bins` should be a positive " + "integer." % i) + nbin[i] = bins[i] + 2 # +2 for outlier bins + edges[i] = np.linspace(smin[i], smax[i], nbin[i]-1, dtype=edge_dt) + else: + edges[i] = np.asarray(bins[i], edge_dt) + nbin[i] = len(edges[i]) + 1 # +1 for outlier bins + dedges[i] = np.diff(edges[i]) + if np.any(np.asarray(dedges[i]) <= 0): + raise ValueError( + "Found bin edge of size <= 0. Did you specify `bins` with" + "non-monotonic sequence?") + + nbin = np.asarray(nbin) + + # Handle empty input. + if N == 0: + return np.zeros(nbin-2), edges + + # Compute the bin number each sample falls into. + Ncount = {} + for i in np.arange(D): + Ncount[i] = np.digitize(sample[:, i], edges[i]) + + # Using digitize, values that fall on an edge are put in the right bin. + # For the rightmost bin, we want values equal to the right edge to be + # counted in the last bin, and not as an outlier. + for i in np.arange(D): + # Rounding precision + mindiff = dedges[i].min() + if not np.isinf(mindiff): + decimal = int(-np.log10(mindiff)) + 6 + # Find which points are on the rightmost edge. + not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) + on_edge = (np.around(sample[:, i], decimal) == + np.around(edges[i][-1], decimal)) + # Shift these points one bin to the left. + Ncount[i][np.nonzero(on_edge & not_smaller_than_edge)[0]] -= 1 + + # Flattened histogram matrix (1D) + # Reshape is used so that overlarge arrays + # will raise an error. + hist = np.zeros(nbin, float).reshape(-1) + + # Compute the sample indices in the flattened histogram matrix. + ni = nbin.argsort() + xy = np.zeros(N, int) + for i in np.arange(0, D-1): + xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod() + xy += Ncount[ni[-1]] + + # Compute the number of repetitions in xy and assign it to the + # flattened histmat. + if len(xy) == 0: + return np.zeros(nbin-2, int), edges + + flatcount = np.bincount(xy, weights) + a = np.arange(len(flatcount)) + hist[a] = flatcount + + # Shape into a proper matrix + hist = hist.reshape(np.sort(nbin)) + for i in np.arange(nbin.size): + j = ni.argsort()[i] + hist = hist.swapaxes(i, j) + ni[i], ni[j] = ni[j], ni[i] + + # Remove outliers (indices 0 and -1 for each dimension). + core = D*[slice(1, -1)] + hist = hist[core] + + # Normalize if normed is True + if normed: + s = hist.sum() + for i in np.arange(D): + shape = np.ones(D, int) + shape[i] = nbin[i] - 2 + hist = hist / dedges[i].reshape(shape) + hist /= s + + if (hist.shape != nbin - 2).any(): + raise RuntimeError( + "Internal Shape Error") + return hist, edges diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index ca8c10031..22b295b9d 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -497,7 +497,7 @@ def nansum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): Return the sum of array elements over a given axis treating Not a Numbers (NaNs) as zero. - In NumPy versions <= 1.8.0 Nan is returned for slices that are all-NaN or + In NumPy versions <= 1.9.0 Nan is returned for slices that are all-NaN or empty. In later versions zero is returned. Parameters diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 8381c2465..dbab69436 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1617,518 +1617,6 @@ class TestSinc(object): assert_array_equal(y1, y3) -class TestHistogram(object): - - def setup(self): - pass - - def teardown(self): - pass - - def test_simple(self): - n = 100 - v = rand(n) - (a, b) = histogram(v) - # check if the sum of the bins equals the number of samples - assert_equal(np.sum(a, axis=0), n) - # check that the bin counts are evenly spaced when the data is from - # a linear function - (a, b) = histogram(np.linspace(0, 10, 100)) - assert_array_equal(a, 10) - - def test_one_bin(self): - # Ticket 632 - hist, edges = histogram([1, 2, 3, 4], [1, 2]) - assert_array_equal(hist, [2, ]) - assert_array_equal(edges, [1, 2]) - assert_raises(ValueError, histogram, [1, 2], bins=0) - h, e = histogram([1, 2], bins=1) - assert_equal(h, np.array([2])) - assert_allclose(e, np.array([1., 2.])) - - def test_normed(self): - # Check that the integral of the density equals 1. - n = 100 - v = rand(n) - a, b = histogram(v, normed=True) - area = np.sum(a * diff(b)) - assert_almost_equal(area, 1) - - # Check with non-constant bin widths (buggy but backwards - # compatible) - v = np.arange(10) - bins = [0, 1, 5, 9, 10] - a, b = histogram(v, bins, normed=True) - area = np.sum(a * diff(b)) - assert_almost_equal(area, 1) - - def test_density(self): - # Check that the integral of the density equals 1. - n = 100 - v = rand(n) - a, b = histogram(v, density=True) - area = np.sum(a * diff(b)) - assert_almost_equal(area, 1) - - # Check with non-constant bin widths - v = np.arange(10) - bins = [0, 1, 3, 6, 10] - a, b = histogram(v, bins, density=True) - assert_array_equal(a, .1) - assert_equal(np.sum(a * diff(b)), 1) - - # Variale bin widths are especially useful to deal with - # infinities. - v = np.arange(10) - bins = [0, 1, 3, 6, np.inf] - a, b = histogram(v, bins, density=True) - assert_array_equal(a, [.1, .1, .1, 0.]) - - # Taken from a bug report from N. Becker on the numpy-discussion - # mailing list Aug. 6, 2010. - counts, dmy = np.histogram( - [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True) - assert_equal(counts, [.25, 0]) - - def test_outliers(self): - # Check that outliers are not tallied - a = np.arange(10) + .5 - - # Lower outliers - h, b = histogram(a, range=[0, 9]) - assert_equal(h.sum(), 9) - - # Upper outliers - h, b = histogram(a, range=[1, 10]) - assert_equal(h.sum(), 9) - - # Normalization - h, b = histogram(a, range=[1, 9], normed=True) - assert_almost_equal((h * diff(b)).sum(), 1, decimal=15) - - # Weights - w = np.arange(10) + .5 - h, b = histogram(a, range=[1, 9], weights=w, normed=True) - assert_equal((h * diff(b)).sum(), 1) - - h, b = histogram(a, bins=8, range=[1, 9], weights=w) - assert_equal(h, w[1:-1]) - - def test_type(self): - # Check the type of the returned histogram - a = np.arange(10) + .5 - h, b = histogram(a) - assert_(np.issubdtype(h.dtype, np.integer)) - - h, b = histogram(a, normed=True) - assert_(np.issubdtype(h.dtype, np.floating)) - - h, b = histogram(a, weights=np.ones(10, int)) - assert_(np.issubdtype(h.dtype, np.integer)) - - h, b = histogram(a, weights=np.ones(10, float)) - assert_(np.issubdtype(h.dtype, np.floating)) - - def test_f32_rounding(self): - # gh-4799, check that the rounding of the edges works with float32 - x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32) - y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32) - counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100) - assert_equal(counts_hist.sum(), 3.) - - def test_weights(self): - v = rand(100) - w = np.ones(100) * 5 - a, b = histogram(v) - na, nb = histogram(v, normed=True) - wa, wb = histogram(v, weights=w) - nwa, nwb = histogram(v, weights=w, normed=True) - assert_array_almost_equal(a * 5, wa) - assert_array_almost_equal(na, nwa) - - # Check weights are properly applied. - v = np.linspace(0, 10, 10) - w = np.concatenate((np.zeros(5), np.ones(5))) - wa, wb = histogram(v, bins=np.arange(11), weights=w) - assert_array_almost_equal(wa, w) - - # Check with integer weights - wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1]) - assert_array_equal(wa, [4, 5, 0, 1]) - wa, wb = histogram( - [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True) - assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) - - # Check weights with non-uniform bin widths - a, b = histogram( - np.arange(9), [0, 1, 3, 6, 10], - weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) - assert_almost_equal(a, [.2, .1, .1, .075]) - - def test_exotic_weights(self): - - # Test the use of weights that are not integer or floats, but e.g. - # complex numbers or object types. - - # Complex weights - values = np.array([1.3, 2.5, 2.3]) - weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2]) - - # Check with custom bins - wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) - assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) - - # Check with even bins - wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) - assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) - - # Decimal weights - from decimal import Decimal - values = np.array([1.3, 2.5, 2.3]) - weights = np.array([Decimal(1), Decimal(2), Decimal(3)]) - - # Check with custom bins - wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) - assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) - - # Check with even bins - wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) - assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) - - def test_no_side_effects(self): - # This is a regression test that ensures that values passed to - # ``histogram`` are unchanged. - values = np.array([1.3, 2.5, 2.3]) - np.histogram(values, range=[-10, 10], bins=100) - assert_array_almost_equal(values, [1.3, 2.5, 2.3]) - - def test_empty(self): - a, b = histogram([], bins=([0, 1])) - assert_array_equal(a, np.array([0])) - assert_array_equal(b, np.array([0, 1])) - - def test_error_binnum_type (self): - # Tests if right Error is raised if bins argument is float - vals = np.linspace(0.0, 1.0, num=100) - histogram(vals, 5) - assert_raises(TypeError, histogram, vals, 2.4) - - def test_finite_range(self): - # Normal ranges should be fine - vals = np.linspace(0.0, 1.0, num=100) - histogram(vals, range=[0.25,0.75]) - assert_raises(ValueError, histogram, vals, range=[np.nan,0.75]) - assert_raises(ValueError, histogram, vals, range=[0.25,np.inf]) - - def test_bin_edge_cases(self): - # Ensure that floating-point computations correctly place edge cases. - arr = np.array([337, 404, 739, 806, 1007, 1811, 2012]) - hist, edges = np.histogram(arr, bins=8296, range=(2, 2280)) - mask = hist > 0 - left_edges = edges[:-1][mask] - right_edges = edges[1:][mask] - for x, left, right in zip(arr, left_edges, right_edges): - assert_(x >= left) - assert_(x < right) - - def test_last_bin_inclusive_range(self): - arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) - hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5)) - assert_equal(hist[-1], 1) - - def test_unsigned_monotonicity_check(self): - # Ensures ValueError is raised if bins not increasing monotonically - # when bins contain unsigned values (see #9222) - arr = np.array([2]) - bins = np.array([1, 3, 1], dtype='uint64') - with assert_raises(ValueError): - hist, edges = np.histogram(arr, bins=bins) - - -class TestHistogramOptimBinNums(object): - """ - Provide test coverage when using provided estimators for optimal number of - bins - """ - - def test_empty(self): - estimator_list = ['fd', 'scott', 'rice', 'sturges', - 'doane', 'sqrt', 'auto'] - # check it can deal with empty data - for estimator in estimator_list: - a, b = histogram([], bins=estimator) - assert_array_equal(a, np.array([0])) - assert_array_equal(b, np.array([0, 1])) - - def test_simple(self): - """ - Straightforward testing with a mixture of linspace data (for - consistency). All test values have been precomputed and the values - shouldn't change - """ - # Some basic sanity checking, with some fixed data. - # Checking for the correct number of bins - basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, - 'doane': 8, 'sqrt': 8, 'auto': 7}, - 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, - 'doane': 12, 'sqrt': 23, 'auto': 10}, - 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, - 'doane': 17, 'sqrt': 71, 'auto': 17}} - - for testlen, expectedResults in basic_test.items(): - # Create some sort of non uniform data to test with - # (2 peak uniform mixture) - x1 = np.linspace(-10, -1, testlen // 5 * 2) - x2 = np.linspace(1, 10, testlen // 5 * 3) - x = np.concatenate((x1, x2)) - for estimator, numbins in expectedResults.items(): - a, b = np.histogram(x, estimator) - assert_equal(len(a), numbins, err_msg="For the {0} estimator " - "with datasize of {1}".format(estimator, testlen)) - - def test_small(self): - """ - Smaller datasets have the potential to cause issues with the data - adaptive methods, especially the FD method. All bin numbers have been - precalculated. - """ - small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, - 'doane': 1, 'sqrt': 1}, - 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, - 'doane': 1, 'sqrt': 2}, - 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, - 'doane': 3, 'sqrt': 2}} - - for testlen, expectedResults in small_dat.items(): - testdat = np.arange(testlen) - for estimator, expbins in expectedResults.items(): - a, b = np.histogram(testdat, estimator) - assert_equal(len(a), expbins, err_msg="For the {0} estimator " - "with datasize of {1}".format(estimator, testlen)) - - def test_incorrect_methods(self): - """ - Check a Value Error is thrown when an unknown string is passed in - """ - check_list = ['mad', 'freeman', 'histograms', 'IQR'] - for estimator in check_list: - assert_raises(ValueError, histogram, [1, 2, 3], estimator) - - def test_novariance(self): - """ - Check that methods handle no variance in data - Primarily for Scott and FD as the SD and IQR are both 0 in this case - """ - novar_dataset = np.ones(100) - novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, - 'doane': 1, 'sqrt': 1, 'auto': 1} - - for estimator, numbins in novar_resultdict.items(): - a, b = np.histogram(novar_dataset, estimator) - assert_equal(len(a), numbins, err_msg="{0} estimator, " - "No Variance test".format(estimator)) - - def test_outlier(self): - """ - Check the FD, Scott and Doane with outliers. - - The FD estimates a smaller binwidth since it's less affected by - outliers. Since the range is so (artificially) large, this means more - bins, most of which will be empty, but the data of interest usually is - unaffected. The Scott estimator is more affected and returns fewer bins, - despite most of the variance being in one area of the data. The Doane - estimator lies somewhere between the other two. - """ - xcenter = np.linspace(-10, 10, 50) - outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) - - outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11} - - for estimator, numbins in outlier_resultdict.items(): - a, b = np.histogram(outlier_dataset, estimator) - assert_equal(len(a), numbins) - - def test_simple_range(self): - """ - Straightforward testing with a mixture of linspace data (for - consistency). Adding in a 3rd mixture that will then be - completely ignored. All test values have been precomputed and - the shouldn't change. - """ - # some basic sanity checking, with some fixed data. - # Checking for the correct number of bins - basic_test = { - 50: {'fd': 8, 'scott': 8, 'rice': 15, - 'sturges': 14, 'auto': 14}, - 500: {'fd': 15, 'scott': 16, 'rice': 32, - 'sturges': 20, 'auto': 20}, - 5000: {'fd': 33, 'scott': 33, 'rice': 69, - 'sturges': 27, 'auto': 33} - } - - for testlen, expectedResults in basic_test.items(): - # create some sort of non uniform data to test with - # (3 peak uniform mixture) - x1 = np.linspace(-10, -1, testlen // 5 * 2) - x2 = np.linspace(1, 10, testlen // 5 * 3) - x3 = np.linspace(-100, -50, testlen) - x = np.hstack((x1, x2, x3)) - for estimator, numbins in expectedResults.items(): - a, b = np.histogram(x, estimator, range = (-20, 20)) - msg = "For the {0} estimator".format(estimator) - msg += " with datasize of {0}".format(testlen) - assert_equal(len(a), numbins, err_msg=msg) - - def test_simple_weighted(self): - """ - Check that weighted data raises a TypeError - """ - estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto'] - for estimator in estimator_list: - assert_raises(TypeError, histogram, [1, 2, 3], - estimator, weights=[1, 2, 3]) - - -class TestHistogramdd(object): - - def test_simple(self): - x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], - [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]]) - H, edges = histogramdd(x, (2, 3, 3), - range=[[-1, 1], [0, 3], [0, 3]]) - answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]], - [[0, 1, 0], [0, 0, 1], [0, 0, 1]]]) - assert_array_equal(H, answer) - - # Check normalization - ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]] - H, edges = histogramdd(x, bins=ed, normed=True) - assert_(np.all(H == answer / 12.)) - - # Check that H has the correct shape. - H, edges = histogramdd(x, (2, 3, 4), - range=[[-1, 1], [0, 3], [0, 4]], - normed=True) - answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]], - [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]]) - assert_array_almost_equal(H, answer / 6., 4) - # Check that a sequence of arrays is accepted and H has the correct - # shape. - z = [np.squeeze(y) for y in split(x, 3, axis=1)] - H, edges = histogramdd( - z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]]) - answer = np.array([[[0, 0], [0, 0], [0, 0]], - [[0, 1], [0, 0], [1, 0]], - [[0, 1], [0, 0], [0, 0]], - [[0, 0], [0, 0], [0, 0]]]) - assert_array_equal(H, answer) - - Z = np.zeros((5, 5, 5)) - Z[list(range(5)), list(range(5)), list(range(5))] = 1. - H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5) - assert_array_equal(H, Z) - - def test_shape_3d(self): - # All possible permutations for bins of different lengths in 3D. - bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4), - (4, 5, 6)) - r = rand(10, 3) - for b in bins: - H, edges = histogramdd(r, b) - assert_(H.shape == b) - - def test_shape_4d(self): - # All possible permutations for bins of different lengths in 4D. - bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4), - (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6), - (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7), - (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5), - (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5), - (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4)) - - r = rand(10, 4) - for b in bins: - H, edges = histogramdd(r, b) - assert_(H.shape == b) - - def test_weights(self): - v = rand(100, 2) - hist, edges = histogramdd(v) - n_hist, edges = histogramdd(v, normed=True) - w_hist, edges = histogramdd(v, weights=np.ones(100)) - assert_array_equal(w_hist, hist) - w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True) - assert_array_equal(w_hist, n_hist) - w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2) - assert_array_equal(w_hist, 2 * hist) - - def test_identical_samples(self): - x = np.zeros((10, 2), int) - hist, edges = histogramdd(x, bins=2) - assert_array_equal(edges[0], np.array([-0.5, 0., 0.5])) - - def test_empty(self): - a, b = histogramdd([[], []], bins=([0, 1], [0, 1])) - assert_array_max_ulp(a, np.array([[0.]])) - a, b = np.histogramdd([[], [], []], bins=2) - assert_array_max_ulp(a, np.zeros((2, 2, 2))) - - def test_bins_errors(self): - # There are two ways to specify bins. Check for the right errors - # when mixing those. - x = np.arange(8).reshape(2, 4) - assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) - assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) - assert_raises( - ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]]) - assert_raises( - ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) - assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) - - def test_inf_edges(self): - # Test using +/-inf bin edges works. See #1788. - with np.errstate(invalid='ignore'): - x = np.arange(6).reshape(3, 2) - expected = np.array([[1, 0], [0, 1], [0, 1]]) - h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]]) - assert_allclose(h, expected) - h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])]) - assert_allclose(h, expected) - h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]]) - assert_allclose(h, expected) - - def test_rightmost_binedge(self): - # Test event very close to rightmost binedge. See Github issue #4266 - x = [0.9999999995] - bins = [[0., 0.5, 1.0]] - hist, _ = histogramdd(x, bins=bins) - assert_(hist[0] == 0.0) - assert_(hist[1] == 1.) - x = [1.0] - bins = [[0., 0.5, 1.0]] - hist, _ = histogramdd(x, bins=bins) - assert_(hist[0] == 0.0) - assert_(hist[1] == 1.) - x = [1.0000000001] - bins = [[0., 0.5, 1.0]] - hist, _ = histogramdd(x, bins=bins) - assert_(hist[0] == 0.0) - assert_(hist[1] == 1.) - x = [1.0001] - bins = [[0., 0.5, 1.0]] - hist, _ = histogramdd(x, bins=bins) - assert_(hist[0] == 0.0) - assert_(hist[1] == 0.0) - - def test_finite_range(self): - vals = np.random.random((100, 3)) - histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]]) - assert_raises(ValueError, histogramdd, vals, - range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) - assert_raises(ValueError, histogramdd, vals, - range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) - - class TestUnique(object): def test_simple(self): diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py new file mode 100644 index 000000000..58547dc17 --- /dev/null +++ b/numpy/lib/tests/test_histograms.py @@ -0,0 +1,588 @@ +from __future__ import division, absolute_import, print_function + +import numpy as np + +from numpy.lib.histograms import histogram, histogramdd +from numpy.testing import ( + run_module_suite, assert_, assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, assert_raises, + assert_allclose, assert_array_max_ulp, assert_warns, assert_raises_regex, + dec, suppress_warnings, HAS_REFCOUNT, +) + + +class TestHistogram(object): + + def setup(self): + pass + + def teardown(self): + pass + + def test_simple(self): + n = 100 + v = np.random.rand(n) + (a, b) = histogram(v) + # check if the sum of the bins equals the number of samples + assert_equal(np.sum(a, axis=0), n) + # check that the bin counts are evenly spaced when the data is from + # a linear function + (a, b) = histogram(np.linspace(0, 10, 100)) + assert_array_equal(a, 10) + + def test_one_bin(self): + # Ticket 632 + hist, edges = histogram([1, 2, 3, 4], [1, 2]) + assert_array_equal(hist, [2, ]) + assert_array_equal(edges, [1, 2]) + assert_raises(ValueError, histogram, [1, 2], bins=0) + h, e = histogram([1, 2], bins=1) + assert_equal(h, np.array([2])) + assert_allclose(e, np.array([1., 2.])) + + def test_normed(self): + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + + # Check with non-constant bin widths (buggy but backwards + # compatible) + v = np.arange(10) + bins = [0, 1, 5, 9, 10] + a, b = histogram(v, bins, normed=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + + def test_density(self): + # Check that the integral of the density equals 1. + n = 100 + v = np.random.rand(n) + a, b = histogram(v, density=True) + area = np.sum(a * np.diff(b)) + assert_almost_equal(area, 1) + + # Check with non-constant bin widths + v = np.arange(10) + bins = [0, 1, 3, 6, 10] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, .1) + assert_equal(np.sum(a * np.diff(b)), 1) + + # Variale bin widths are especially useful to deal with + # infinities. + v = np.arange(10) + bins = [0, 1, 3, 6, np.inf] + a, b = histogram(v, bins, density=True) + assert_array_equal(a, [.1, .1, .1, 0.]) + + # Taken from a bug report from N. Becker on the numpy-discussion + # mailing list Aug. 6, 2010. + counts, dmy = np.histogram( + [1, 2, 3, 4], [0.5, 1.5, np.inf], density=True) + assert_equal(counts, [.25, 0]) + + def test_outliers(self): + # Check that outliers are not tallied + a = np.arange(10) + .5 + + # Lower outliers + h, b = histogram(a, range=[0, 9]) + assert_equal(h.sum(), 9) + + # Upper outliers + h, b = histogram(a, range=[1, 10]) + assert_equal(h.sum(), 9) + + # Normalization + h, b = histogram(a, range=[1, 9], normed=True) + assert_almost_equal((h * np.diff(b)).sum(), 1, decimal=15) + + # Weights + w = np.arange(10) + .5 + h, b = histogram(a, range=[1, 9], weights=w, normed=True) + assert_equal((h * np.diff(b)).sum(), 1) + + h, b = histogram(a, bins=8, range=[1, 9], weights=w) + assert_equal(h, w[1:-1]) + + def test_type(self): + # Check the type of the returned histogram + a = np.arange(10) + .5 + h, b = histogram(a) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, normed=True) + assert_(np.issubdtype(h.dtype, np.floating)) + + h, b = histogram(a, weights=np.ones(10, int)) + assert_(np.issubdtype(h.dtype, np.integer)) + + h, b = histogram(a, weights=np.ones(10, float)) + assert_(np.issubdtype(h.dtype, np.floating)) + + def test_f32_rounding(self): + # gh-4799, check that the rounding of the edges works with float32 + x = np.array([276.318359, -69.593948, 21.329449], dtype=np.float32) + y = np.array([5005.689453, 4481.327637, 6010.369629], dtype=np.float32) + counts_hist, xedges, yedges = np.histogram2d(x, y, bins=100) + assert_equal(counts_hist.sum(), 3.) + + def test_weights(self): + v = np.random.rand(100) + w = np.ones(100) * 5 + a, b = histogram(v) + na, nb = histogram(v, normed=True) + wa, wb = histogram(v, weights=w) + nwa, nwb = histogram(v, weights=w, normed=True) + assert_array_almost_equal(a * 5, wa) + assert_array_almost_equal(na, nwa) + + # Check weights are properly applied. + v = np.linspace(0, 10, 10) + w = np.concatenate((np.zeros(5), np.ones(5))) + wa, wb = histogram(v, bins=np.arange(11), weights=w) + assert_array_almost_equal(wa, w) + + # Check with integer weights + wa, wb = histogram([1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1]) + assert_array_equal(wa, [4, 5, 0, 1]) + wa, wb = histogram( + [1, 2, 2, 4], bins=4, weights=[4, 3, 2, 1], normed=True) + assert_array_almost_equal(wa, np.array([4, 5, 0, 1]) / 10. / 3. * 4) + + # Check weights with non-uniform bin widths + a, b = histogram( + np.arange(9), [0, 1, 3, 6, 10], + weights=[2, 1, 1, 1, 1, 1, 1, 1, 1], density=True) + assert_almost_equal(a, [.2, .1, .1, .075]) + + def test_exotic_weights(self): + + # Test the use of weights that are not integer or floats, but e.g. + # complex numbers or object types. + + # Complex weights + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([1, -1, 2]) + 1j * np.array([2, 1, 2]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, np.array([1, 1]) + 1j * np.array([2, 3])) + + # Decimal weights + from decimal import Decimal + values = np.array([1.3, 2.5, 2.3]) + weights = np.array([Decimal(1), Decimal(2), Decimal(3)]) + + # Check with custom bins + wa, wb = histogram(values, bins=[0, 2, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + # Check with even bins + wa, wb = histogram(values, bins=2, range=[1, 3], weights=weights) + assert_array_almost_equal(wa, [Decimal(1), Decimal(5)]) + + def test_no_side_effects(self): + # This is a regression test that ensures that values passed to + # ``histogram`` are unchanged. + values = np.array([1.3, 2.5, 2.3]) + np.histogram(values, range=[-10, 10], bins=100) + assert_array_almost_equal(values, [1.3, 2.5, 2.3]) + + def test_empty(self): + a, b = histogram([], bins=([0, 1])) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_error_binnum_type (self): + # Tests if right Error is raised if bins argument is float + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, 5) + assert_raises(TypeError, histogram, vals, 2.4) + + def test_finite_range(self): + # Normal ranges should be fine + vals = np.linspace(0.0, 1.0, num=100) + histogram(vals, range=[0.25,0.75]) + assert_raises(ValueError, histogram, vals, range=[np.nan,0.75]) + assert_raises(ValueError, histogram, vals, range=[0.25,np.inf]) + + def test_bin_edge_cases(self): + # Ensure that floating-point computations correctly place edge cases. + arr = np.array([337, 404, 739, 806, 1007, 1811, 2012]) + hist, edges = np.histogram(arr, bins=8296, range=(2, 2280)) + mask = hist > 0 + left_edges = edges[:-1][mask] + right_edges = edges[1:][mask] + for x, left, right in zip(arr, left_edges, right_edges): + assert_(x >= left) + assert_(x < right) + + def test_last_bin_inclusive_range(self): + arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.]) + hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5)) + assert_equal(hist[-1], 1) + + def test_unsigned_monotonicity_check(self): + # Ensures ValueError is raised if bins not increasing monotonically + # when bins contain unsigned values (see #9222) + arr = np.array([2]) + bins = np.array([1, 3, 1], dtype='uint64') + with assert_raises(ValueError): + hist, edges = np.histogram(arr, bins=bins) + + def test_object_array_of_0d(self): + # gh-7864 + assert_raises(ValueError, + histogram, [np.array([0.4]) for i in range(10)] + [-np.inf]) + assert_raises(ValueError, + histogram, [np.array([0.4]) for i in range(10)] + [np.inf]) + + # these should not crash + np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001]) + np.histogram([np.array([0.5]) for i in range(10)] + [.5]) + + def test_some_nan_values(self): + # gh-7503 + one_nan = np.array([0, 1, np.nan]) + all_nan = np.array([np.nan, np.nan]) + + # the internal commparisons with NaN give warnings + sup = suppress_warnings() + sup.filter(RuntimeWarning) + with sup: + # can't infer range with nan + assert_raises(ValueError, histogram, one_nan, bins='auto') + assert_raises(ValueError, histogram, all_nan, bins='auto') + + # explicit range solves the problem + h, b = histogram(one_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins='auto', range=(0, 1)) + assert_equal(h.sum(), 0) # nan is not counted + + # as does an explicit set of bins + h, b = histogram(one_nan, bins=[0, 1]) + assert_equal(h.sum(), 2) # nan is not counted + h, b = histogram(all_nan, bins=[0, 1]) + assert_equal(h.sum(), 0) # nan is not counted + + def test_datetime(self): + begin = np.datetime64('2000-01-01', 'D') + offsets = np.array([0, 0, 1, 1, 2, 3, 5, 10, 20]) + bins = np.array([0, 2, 7, 20]) + dates = begin + offsets + date_bins = begin + bins + + td = np.dtype('timedelta64[D]') + + # Results should be the same for integer offsets or datetime values. + # For now, only explicit bins are supported, since linspace does not + # work on datetimes or timedeltas + d_count, d_edge = histogram(dates, bins=date_bins) + t_count, t_edge = histogram(offsets.astype(td), bins=bins.astype(td)) + i_count, i_edge = histogram(offsets, bins=bins) + + assert_equal(d_count, i_count) + assert_equal(t_count, i_count) + + assert_equal((d_edge - begin).astype(int), i_edge) + assert_equal(t_edge.astype(int), i_edge) + + assert_equal(d_edge.dtype, dates.dtype) + assert_equal(t_edge.dtype, td) + + +class TestHistogramOptimBinNums(object): + """ + Provide test coverage when using provided estimators for optimal number of + bins + """ + + def test_empty(self): + estimator_list = ['fd', 'scott', 'rice', 'sturges', + 'doane', 'sqrt', 'auto'] + # check it can deal with empty data + for estimator in estimator_list: + a, b = histogram([], bins=estimator) + assert_array_equal(a, np.array([0])) + assert_array_equal(b, np.array([0, 1])) + + def test_simple(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). All test values have been precomputed and the values + shouldn't change + """ + # Some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = {50: {'fd': 4, 'scott': 4, 'rice': 8, 'sturges': 7, + 'doane': 8, 'sqrt': 8, 'auto': 7}, + 500: {'fd': 8, 'scott': 8, 'rice': 16, 'sturges': 10, + 'doane': 12, 'sqrt': 23, 'auto': 10}, + 5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14, + 'doane': 17, 'sqrt': 71, 'auto': 17}} + + for testlen, expectedResults in basic_test.items(): + # Create some sort of non uniform data to test with + # (2 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x = np.concatenate((x1, x2)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator) + assert_equal(len(a), numbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_small(self): + """ + Smaller datasets have the potential to cause issues with the data + adaptive methods, especially the FD method. All bin numbers have been + precalculated. + """ + small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1}, + 2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2, + 'doane': 1, 'sqrt': 2}, + 3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3, + 'doane': 3, 'sqrt': 2}} + + for testlen, expectedResults in small_dat.items(): + testdat = np.arange(testlen) + for estimator, expbins in expectedResults.items(): + a, b = np.histogram(testdat, estimator) + assert_equal(len(a), expbins, err_msg="For the {0} estimator " + "with datasize of {1}".format(estimator, testlen)) + + def test_incorrect_methods(self): + """ + Check a Value Error is thrown when an unknown string is passed in + """ + check_list = ['mad', 'freeman', 'histograms', 'IQR'] + for estimator in check_list: + assert_raises(ValueError, histogram, [1, 2, 3], estimator) + + def test_novariance(self): + """ + Check that methods handle no variance in data + Primarily for Scott and FD as the SD and IQR are both 0 in this case + """ + novar_dataset = np.ones(100) + novar_resultdict = {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1, + 'doane': 1, 'sqrt': 1, 'auto': 1} + + for estimator, numbins in novar_resultdict.items(): + a, b = np.histogram(novar_dataset, estimator) + assert_equal(len(a), numbins, err_msg="{0} estimator, " + "No Variance test".format(estimator)) + + def test_outlier(self): + """ + Check the FD, Scott and Doane with outliers. + + The FD estimates a smaller binwidth since it's less affected by + outliers. Since the range is so (artificially) large, this means more + bins, most of which will be empty, but the data of interest usually is + unaffected. The Scott estimator is more affected and returns fewer bins, + despite most of the variance being in one area of the data. The Doane + estimator lies somewhere between the other two. + """ + xcenter = np.linspace(-10, 10, 50) + outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter)) + + outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11} + + for estimator, numbins in outlier_resultdict.items(): + a, b = np.histogram(outlier_dataset, estimator) + assert_equal(len(a), numbins) + + def test_simple_range(self): + """ + Straightforward testing with a mixture of linspace data (for + consistency). Adding in a 3rd mixture that will then be + completely ignored. All test values have been precomputed and + the shouldn't change. + """ + # some basic sanity checking, with some fixed data. + # Checking for the correct number of bins + basic_test = { + 50: {'fd': 8, 'scott': 8, 'rice': 15, + 'sturges': 14, 'auto': 14}, + 500: {'fd': 15, 'scott': 16, 'rice': 32, + 'sturges': 20, 'auto': 20}, + 5000: {'fd': 33, 'scott': 33, 'rice': 69, + 'sturges': 27, 'auto': 33} + } + + for testlen, expectedResults in basic_test.items(): + # create some sort of non uniform data to test with + # (3 peak uniform mixture) + x1 = np.linspace(-10, -1, testlen // 5 * 2) + x2 = np.linspace(1, 10, testlen // 5 * 3) + x3 = np.linspace(-100, -50, testlen) + x = np.hstack((x1, x2, x3)) + for estimator, numbins in expectedResults.items(): + a, b = np.histogram(x, estimator, range = (-20, 20)) + msg = "For the {0} estimator".format(estimator) + msg += " with datasize of {0}".format(testlen) + assert_equal(len(a), numbins, err_msg=msg) + + def test_simple_weighted(self): + """ + Check that weighted data raises a TypeError + """ + estimator_list = ['fd', 'scott', 'rice', 'sturges', 'auto'] + for estimator in estimator_list: + assert_raises(TypeError, histogram, [1, 2, 3], + estimator, weights=[1, 2, 3]) + + +class TestHistogramdd(object): + + def test_simple(self): + x = np.array([[-.5, .5, 1.5], [-.5, 1.5, 2.5], [-.5, 2.5, .5], + [.5, .5, 1.5], [.5, 1.5, 2.5], [.5, 2.5, 2.5]]) + H, edges = histogramdd(x, (2, 3, 3), + range=[[-1, 1], [0, 3], [0, 3]]) + answer = np.array([[[0, 1, 0], [0, 0, 1], [1, 0, 0]], + [[0, 1, 0], [0, 0, 1], [0, 0, 1]]]) + assert_array_equal(H, answer) + + # Check normalization + ed = [[-2, 0, 2], [0, 1, 2, 3], [0, 1, 2, 3]] + H, edges = histogramdd(x, bins=ed, normed=True) + assert_(np.all(H == answer / 12.)) + + # Check that H has the correct shape. + H, edges = histogramdd(x, (2, 3, 4), + range=[[-1, 1], [0, 3], [0, 4]], + normed=True) + answer = np.array([[[0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]], + [[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]]) + assert_array_almost_equal(H, answer / 6., 4) + # Check that a sequence of arrays is accepted and H has the correct + # shape. + z = [np.squeeze(y) for y in np.split(x, 3, axis=1)] + H, edges = histogramdd( + z, bins=(4, 3, 2), range=[[-2, 2], [0, 3], [0, 2]]) + answer = np.array([[[0, 0], [0, 0], [0, 0]], + [[0, 1], [0, 0], [1, 0]], + [[0, 1], [0, 0], [0, 0]], + [[0, 0], [0, 0], [0, 0]]]) + assert_array_equal(H, answer) + + Z = np.zeros((5, 5, 5)) + Z[list(range(5)), list(range(5)), list(range(5))] = 1. + H, edges = histogramdd([np.arange(5), np.arange(5), np.arange(5)], 5) + assert_array_equal(H, Z) + + def test_shape_3d(self): + # All possible permutations for bins of different lengths in 3D. + bins = ((5, 4, 6), (6, 4, 5), (5, 6, 4), (4, 6, 5), (6, 5, 4), + (4, 5, 6)) + r = np.random.rand(10, 3) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_shape_4d(self): + # All possible permutations for bins of different lengths in 4D. + bins = ((7, 4, 5, 6), (4, 5, 7, 6), (5, 6, 4, 7), (7, 6, 5, 4), + (5, 7, 6, 4), (4, 6, 7, 5), (6, 5, 7, 4), (7, 5, 4, 6), + (7, 4, 6, 5), (6, 4, 7, 5), (6, 7, 5, 4), (4, 6, 5, 7), + (4, 7, 5, 6), (5, 4, 6, 7), (5, 7, 4, 6), (6, 7, 4, 5), + (6, 5, 4, 7), (4, 7, 6, 5), (4, 5, 6, 7), (7, 6, 4, 5), + (5, 4, 7, 6), (5, 6, 7, 4), (6, 4, 5, 7), (7, 5, 6, 4)) + + r = np.random.rand(10, 4) + for b in bins: + H, edges = histogramdd(r, b) + assert_(H.shape == b) + + def test_weights(self): + v = np.random.rand(100, 2) + hist, edges = histogramdd(v) + n_hist, edges = histogramdd(v, normed=True) + w_hist, edges = histogramdd(v, weights=np.ones(100)) + assert_array_equal(w_hist, hist) + w_hist, edges = histogramdd(v, weights=np.ones(100) * 2, normed=True) + assert_array_equal(w_hist, n_hist) + w_hist, edges = histogramdd(v, weights=np.ones(100, int) * 2) + assert_array_equal(w_hist, 2 * hist) + + def test_identical_samples(self): + x = np.zeros((10, 2), int) + hist, edges = histogramdd(x, bins=2) + assert_array_equal(edges[0], np.array([-0.5, 0., 0.5])) + + def test_empty(self): + a, b = histogramdd([[], []], bins=([0, 1], [0, 1])) + assert_array_max_ulp(a, np.array([[0.]])) + a, b = np.histogramdd([[], [], []], bins=2) + assert_array_max_ulp(a, np.zeros((2, 2, 2))) + + def test_bins_errors(self): + # There are two ways to specify bins. Check for the right errors + # when mixing those. + x = np.arange(8).reshape(2, 4) + assert_raises(ValueError, np.histogramdd, x, bins=[-1, 2, 4, 5]) + assert_raises(ValueError, np.histogramdd, x, bins=[1, 0.99, 1, 1]) + assert_raises( + ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 2, 3]]) + assert_raises( + ValueError, np.histogramdd, x, bins=[1, 1, 1, [1, 2, 3, -3]]) + assert_(np.histogramdd(x, bins=[1, 1, 1, [1, 2, 3, 4]])) + + def test_inf_edges(self): + # Test using +/-inf bin edges works. See #1788. + with np.errstate(invalid='ignore'): + x = np.arange(6).reshape(3, 2) + expected = np.array([[1, 0], [0, 1], [0, 1]]) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 2, 10]]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, np.array([-1, 2, np.inf])]) + assert_allclose(h, expected) + h, e = np.histogramdd(x, bins=[3, [-np.inf, 3, np.inf]]) + assert_allclose(h, expected) + + def test_rightmost_binedge(self): + # Test event very close to rightmost binedge. See Github issue #4266 + x = [0.9999999995] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0000000001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 1.) + x = [1.0001] + bins = [[0., 0.5, 1.0]] + hist, _ = histogramdd(x, bins=bins) + assert_(hist[0] == 0.0) + assert_(hist[1] == 0.0) + + def test_finite_range(self): + vals = np.random.random((100, 3)) + histogramdd(vals, range=[[0.0, 1.0], [0.25, 0.75], [0.25, 0.5]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [0.25, 0.75], [0.25, np.inf]]) + assert_raises(ValueError, histogramdd, vals, + range=[[0.0, 1.0], [np.nan, 0.75], [0.25, 0.5]]) + + +if __name__ == "__main__": + run_module_suite() diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py index 0a6566bde..d1e032bbb 100644 --- a/numpy/linalg/tests/test_linalg.py +++ b/numpy/linalg/tests/test_linalg.py @@ -465,7 +465,7 @@ class TestSolve(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) assert_equal(linalg.solve(x, x).dtype, dtype) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): class ArraySubclass(np.ndarray): @@ -532,7 +532,7 @@ class TestInv(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype) assert_equal(linalg.inv(x).dtype, dtype) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): # Check that all kinds of 0-sized arrays work @@ -565,7 +565,7 @@ class TestEigvals(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): x = np.array([[1, 0.5], [-1, 1]], dtype=dtype) assert_equal(linalg.eigvals(x).dtype, get_complex_dtype(dtype)) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): # Check that all kinds of 0-sized arrays work @@ -608,7 +608,7 @@ class TestEig(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): assert_equal(v.dtype, get_complex_dtype(dtype)) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): # Check that all kinds of 0-sized arrays work @@ -658,7 +658,7 @@ class TestSVD(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): assert_equal(s.dtype, get_real_dtype(dtype)) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): # These raise errors currently @@ -765,7 +765,7 @@ class TestDet(LinalgSquareTestCase, LinalgGeneralizedSquareTestCase): assert_equal(s.dtype, get_real_dtype(dtype)) assert_equal(ph.dtype, dtype) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_0_size(self): a = np.zeros((0, 0), dtype=np.complex64) @@ -861,7 +861,7 @@ class TestMatrixPower(object): assert_equal(mz, identity(M.shape[0])) assert_equal(mz.dtype, M.dtype) for M in [self.Arb22, self.arbfloat, self.large]: - yield tz, M + tz(M) def testip_one(self): def tz(M): @@ -869,7 +869,7 @@ class TestMatrixPower(object): assert_equal(mz, M) assert_equal(mz.dtype, M.dtype) for M in [self.Arb22, self.arbfloat, self.large]: - yield tz, M + tz(M) def testip_two(self): def tz(M): @@ -877,14 +877,14 @@ class TestMatrixPower(object): assert_equal(mz, dot(M, M)) assert_equal(mz.dtype, M.dtype) for M in [self.Arb22, self.arbfloat, self.large]: - yield tz, M + tz(M) def testip_invert(self): def tz(M): mz = matrix_power(M, -1) assert_almost_equal(identity(M.shape[0]), dot(mz, M)) for M in [self.R90, self.Arb22, self.arbfloat, self.large]: - yield tz, M + tz(M) def test_invert_noninvertible(self): import numpy.linalg @@ -918,7 +918,7 @@ class TestEigvalsh(HermitianTestCase, HermitianGeneralizedTestCase): w = np.linalg.eigvalsh(x) assert_equal(w.dtype, get_real_dtype(dtype)) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_invalid(self): x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) @@ -995,7 +995,7 @@ class TestEigh(HermitianTestCase, HermitianGeneralizedTestCase): assert_equal(w.dtype, get_real_dtype(dtype)) assert_equal(v.dtype, dtype) for dtype in [single, double, csingle, cdouble]: - yield check, dtype + check(dtype) def test_invalid(self): x = np.array([[1, 0.5], [0.5, 1]], dtype=np.float32) @@ -1365,36 +1365,36 @@ class TestMatrixRank(object): def test_matrix_rank(self): # Full rank matrix - yield assert_equal, 4, matrix_rank(np.eye(4)) + assert_equal(4, matrix_rank(np.eye(4))) # rank deficient matrix I = np.eye(4) I[-1, -1] = 0. - yield assert_equal, matrix_rank(I), 3 + assert_equal(matrix_rank(I), 3) # All zeros - zero rank - yield assert_equal, matrix_rank(np.zeros((4, 4))), 0 + assert_equal(matrix_rank(np.zeros((4, 4))), 0) # 1 dimension - rank 1 unless all 0 - yield assert_equal, matrix_rank([1, 0, 0, 0]), 1 - yield assert_equal, matrix_rank(np.zeros((4,))), 0 + assert_equal(matrix_rank([1, 0, 0, 0]), 1) + assert_equal(matrix_rank(np.zeros((4,))), 0) # accepts array-like - yield assert_equal, matrix_rank([1]), 1 + assert_equal(matrix_rank([1]), 1) # greater than 2 dimensions treated as stacked matrices ms = np.array([I, np.eye(4), np.zeros((4,4))]) - yield assert_equal, matrix_rank(ms), np.array([3, 4, 0]) + assert_equal(matrix_rank(ms), np.array([3, 4, 0])) # works on scalar - yield assert_equal, matrix_rank(1), 1 + assert_equal(matrix_rank(1), 1) def test_symmetric_rank(self): - yield assert_equal, 4, matrix_rank(np.eye(4), hermitian=True) - yield assert_equal, 1, matrix_rank(np.ones((4, 4)), hermitian=True) - yield assert_equal, 0, matrix_rank(np.zeros((4, 4)), hermitian=True) + assert_equal(4, matrix_rank(np.eye(4), hermitian=True)) + assert_equal(1, matrix_rank(np.ones((4, 4)), hermitian=True)) + assert_equal(0, matrix_rank(np.zeros((4, 4)), hermitian=True)) # rank deficient matrix I = np.eye(4) I[-1, -1] = 0. - yield assert_equal, 3, matrix_rank(I, hermitian=True) + assert_equal(3, matrix_rank(I, hermitian=True)) # manually supplied tolerance I[-1, -1] = 1e-8 - yield assert_equal, 4, matrix_rank(I, hermitian=True, tol=0.99e-8) - yield assert_equal, 3, matrix_rank(I, hermitian=True, tol=1.01e-8) + assert_equal(4, matrix_rank(I, hermitian=True, tol=0.99e-8)) + assert_equal(3, matrix_rank(I, hermitian=True, tol=1.01e-8)) def test_reduced_rank(): diff --git a/numpy/ma/core.py b/numpy/ma/core.py index db8bc7d53..fe092f552 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -1624,7 +1624,7 @@ def make_mask(m, copy=False, shrink=True, dtype=MaskType): # Make sure the input dtype is valid. dtype = make_mask_descr(dtype) - + # legacy boolean special case: "existence of fields implies true" if isinstance(m, ndarray) and m.dtype.fields and dtype == np.bool_: return np.ones(m.shape, dtype=dtype) @@ -2245,12 +2245,14 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True): Mask using floating point equality. Return a MaskedArray, masked where the data in array `x` are approximately - equal to `value`, i.e. where the following condition is True + equal to `value`, determined using `isclose`. The default tolerances for + `masked_values` are the same as those for `isclose`. - (abs(x - value) <= atol+rtol*abs(value)) + For integer types, exact equality is used, in the same way as + `masked_equal`. The fill_value is set to `value` and the mask is set to ``nomask`` if - possible. For integers, consider using ``masked_equal``. + possible. Parameters ---------- @@ -2258,10 +2260,8 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True): Array to mask. value : float Masking value. - rtol : float, optional - Tolerance parameter. - atol : float, optional - Tolerance parameter (1e-8). + rtol, atol : float, optional + Tolerance parameters passed on to `isclose` copy : bool, optional Whether to return a copy of `x`. shrink : bool, optional @@ -2309,17 +2309,13 @@ def masked_values(x, value, rtol=1e-5, atol=1e-8, copy=True, shrink=True): fill_value=999999) """ - mabs = umath.absolute xnew = filled(x, value) - if issubclass(xnew.dtype.type, np.floating): - condition = umath.less_equal( - mabs(xnew - value), atol + rtol * mabs(value)) - mask = getmask(x) + if np.issubdtype(xnew.dtype, np.floating): + mask = np.isclose(xnew, value, atol=atol, rtol=rtol) else: - condition = umath.equal(xnew, value) - mask = nomask - mask = mask_or(mask, make_mask(condition, shrink=shrink), shrink=shrink) - return masked_array(xnew, mask=mask, copy=copy, fill_value=value) + mask = umath.equal(xnew, value) + return masked_array( + xnew, mask=mask, copy=copy, fill_value=value, shrink=shrink) def masked_invalid(a, copy=True): @@ -2978,11 +2974,30 @@ class MaskedArray(ndarray): # heuristic it's not bad.) In all other cases, we make a copy of # the mask, so that future modifications to 'self' do not end up # side-effecting 'obj' as well. - if (obj.__array_interface__["data"][0] + if (_mask is not nomask and obj.__array_interface__["data"][0] != self.__array_interface__["data"][0]): - _mask = _mask.copy() + # We should make a copy. But we could get here via astype, + # in which case the mask might need a new dtype as well + # (e.g., changing to or from a structured dtype), and the + # order could have changed. So, change the mask type if + # needed and use astype instead of copy. + if self.dtype == obj.dtype: + _mask_dtype = _mask.dtype + else: + _mask_dtype = make_mask_descr(self.dtype) + + if self.flags.c_contiguous: + order = "C" + elif self.flags.f_contiguous: + order = "F" + else: + order = "K" + + _mask = _mask.astype(_mask_dtype, order) + else: _mask = nomask + self._mask = _mask # Finalize the mask if self._mask is not nomask: @@ -3140,45 +3155,6 @@ class MaskedArray(ndarray): return output view.__doc__ = ndarray.view.__doc__ - def astype(self, newtype): - """ - Returns a copy of the MaskedArray cast to given newtype. - - Returns - ------- - output : MaskedArray - A copy of self cast to input newtype. - The returned record shape matches self.shape. - - Examples - -------- - >>> x = np.ma.array([[1,2,3.1],[4,5,6],[7,8,9]], mask=[0] + [1,0]*4) - >>> print(x) - [[1.0 -- 3.1] - [-- 5.0 --] - [7.0 -- 9.0]] - >>> print(x.astype(int32)) - [[1 -- 3] - [-- 5 --] - [7 -- 9]] - - """ - newtype = np.dtype(newtype) - newmasktype = make_mask_descr(newtype) - - output = self._data.astype(newtype).view(type(self)) - output._update_from(self) - - if self._mask is nomask: - output._mask = nomask - else: - output._mask = self._mask.astype(newmasktype) - - # Don't check _fill_value if it's None, that'll speed things up - if self._fill_value is not None: - output._fill_value = _check_fill_value(self._fill_value, newtype) - return output - def __getitem__(self, indx): """ x.__getitem__(y) <==> x[y] @@ -4303,7 +4279,7 @@ class MaskedArray(ndarray): elif self._mask: raise MaskError('Cannot convert masked element to a Python int.') return int(self.item()) - + def __long__(self): """ Convert to long. @@ -4314,7 +4290,7 @@ class MaskedArray(ndarray): elif self._mask: raise MaskError('Cannot convert masked element to a Python long.') return long(self.item()) - + def get_imag(self): """ @@ -6961,6 +6937,7 @@ def transpose(a, axes=None): [[False False] [False True]], fill_value = 999999) + >>> ma.transpose(x) masked_array(data = [[0 2] diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 323fbce38..360d50d8a 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -19,7 +19,7 @@ __all__ = [ 'hsplit', 'hstack', 'isin', 'in1d', 'intersect1d', 'mask_cols', 'mask_rowcols', 'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_', 'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack', - 'setdiff1d', 'setxor1d', 'unique', 'union1d', 'vander', 'vstack', + 'setdiff1d', 'setxor1d', 'stack', 'unique', 'union1d', 'vander', 'vstack', ] import itertools @@ -357,6 +357,7 @@ vstack = row_stack = _fromnxfunction_seq('vstack') hstack = _fromnxfunction_seq('hstack') column_stack = _fromnxfunction_seq('column_stack') dstack = _fromnxfunction_seq('dstack') +stack = _fromnxfunction_seq('stack') hsplit = _fromnxfunction_single('hsplit') diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index cc447e37e..d5622e4bb 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -4982,6 +4982,21 @@ class TestMaskedConstant(object): assert_not_equal(repr(a), 'masked') +class TestMaskedWhereAliases(object): + + # TODO: Test masked_object, masked_equal, ... + + def test_masked_values(self): + res = masked_values(np.array([-32768.0]), np.int16(-32768)) + assert_equal(res.mask, [True]) + + res = masked_values(np.inf, np.inf) + assert_equal(res.mask, True) + + res = np.ma.masked_values(np.inf, -np.inf) + assert_equal(res.mask, False) + + def test_masked_array(): a = np.ma.array([0, 1, 2, 3], mask=[0, 0, 1, 0]) assert_equal(np.argwhere(a), [[1], [3]]) @@ -5037,10 +5052,37 @@ def test_ufunc_with_output(): y = np.add(x, 1., out=x) assert_(y is x) + def test_astype(): descr = [('v', int, 3), ('x', [('y', float)])] - x = array(([1, 2, 3], (1.0,)), dtype=descr) - assert_equal(x, x.astype(descr)) + x = array([ + [([1, 2, 3], (1.0,)), ([1, 2, 3], (2.0,))], + [([1, 2, 3], (3.0,)), ([1, 2, 3], (4.0,))]], dtype=descr) + x[0]['v'][0] = np.ma.masked + + x_a = x.astype(descr) + assert x_a.dtype.names == np.dtype(descr).names + assert x_a.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a) + + assert_(x is x.astype(x.dtype, copy=False)) + assert_equal(type(x.astype(x.dtype, subok=False)), np.ndarray) + + x_f = x.astype(x.dtype, order='F') + assert_(x_f.flags.f_contiguous) + assert_(x_f.mask.flags.f_contiguous) + + # Also test the same indirectly, via np.array + x_a2 = np.array(x, dtype=descr, subok=True) + assert x_a2.dtype.names == np.dtype(descr).names + assert x_a2.mask.dtype.names == np.dtype(descr).names + assert_equal(x, x_a2) + + assert_(x is np.array(x, dtype=descr, copy=False, subok=True)) + + x_f2 = np.array(x, dtype=x.dtype, order='F', subok=True) + assert_(x_f2.flags.f_contiguous) + assert_(x_f2.mask.flags.f_contiguous) ############################################################################### diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index af9f42c2a..7687514fa 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -29,7 +29,7 @@ from numpy.ma.extras import ( ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols, mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous, notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin, - diagflat + diagflat, stack, vstack, hstack ) import numpy.ma.extras as mae @@ -1589,5 +1589,87 @@ class TestShapeBase(object): assert_equal(b.mask.shape, b.data.shape) +class TestStack(object): + + def test_stack_1d(self): + a = masked_array([0, 1, 2], mask=[0, 1, 0]) + b = masked_array([9, 8, 7], mask=[1, 0, 0]) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_masks(self): + a = masked_array([0, 1, 2], mask=True) + b = masked_array([9, 8, 7], mask=False) + + c = stack([a, b], axis=0) + assert_equal(c.shape, (2, 3)) + assert_array_equal(a.mask, c[0].mask) + assert_array_equal(b.mask, c[1].mask) + + d = vstack([a, b]) + assert_array_equal(c.data, d.data) + assert_array_equal(c.mask, d.mask) + + c = stack([a, b], axis=1) + assert_equal(c.shape, (3, 2)) + assert_array_equal(a.mask, c[:, 0].mask) + assert_array_equal(b.mask, c[:, 1].mask) + + def test_stack_nd(self): + # 2D + shp = (3, 2) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) + + # 4D + shp = (3, 2, 4, 5,) + d1 = np.random.randint(0, 10, shp) + d2 = np.random.randint(0, 10, shp) + m1 = np.random.randint(0, 2, shp).astype(bool) + m2 = np.random.randint(0, 2, shp).astype(bool) + a1 = masked_array(d1, mask=m1) + a2 = masked_array(d2, mask=m2) + + c = stack([a1, a2], axis=0) + c_shp = (2,) + shp + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[0].mask) + assert_array_equal(a2.mask, c[1].mask) + + c = stack([a1, a2], axis=-1) + c_shp = shp + (2,) + assert_equal(c.shape, c_shp) + assert_array_equal(a1.mask, c[..., 0].mask) + assert_array_equal(a2.mask, c[..., 1].mask) + + if __name__ == "__main__": run_module_suite() diff --git a/numpy/testing/decorators.py b/numpy/testing/decorators.py index b63850090..21bcdd798 100644 --- a/numpy/testing/decorators.py +++ b/numpy/testing/decorators.py @@ -3,4 +3,6 @@ Back compatibility decorators module. It will import the appropriate set of tools """ +import os + from .nose_tools.decorators import * diff --git a/numpy/testing/nose_tools/decorators.py b/numpy/testing/nose_tools/decorators.py index 12531e734..243c0c8c1 100644 --- a/numpy/testing/nose_tools/decorators.py +++ b/numpy/testing/nose_tools/decorators.py @@ -19,6 +19,9 @@ import collections from .utils import SkipTest, assert_warns +__all__ = ['slow', 'setastest', 'skipif', 'knownfailureif', 'deprecated', + 'parametrize',] + def slow(t): """ diff --git a/numpy/testing/nose_tools/parameterized.py b/numpy/testing/nose_tools/parameterized.py index 372928e3d..d094f7c7f 100644 --- a/numpy/testing/nose_tools/parameterized.py +++ b/numpy/testing/nose_tools/parameterized.py @@ -252,7 +252,8 @@ def default_name_func(func, num, p): return base_name + name_suffix -_test_runner_override = None +# force nose for numpy purposes. +_test_runner_override = 'nose' _test_runner_guess = False _test_runners = set(["unittest", "unittest2", "nose", "nose2", "pytest"]) _test_runner_aliases = { diff --git a/numpy/testing/nose_tools/utils.py b/numpy/testing/nose_tools/utils.py index 6c77e5e21..2d97b5c1e 100644 --- a/numpy/testing/nose_tools/utils.py +++ b/numpy/testing/nose_tools/utils.py @@ -1849,6 +1849,7 @@ def _gen_alignment_data(dtype=float32, type='binary', max_size=24): class IgnoreException(Exception): "Ignoring this exception due to disabled feature" + pass @contextlib.contextmanager diff --git a/numpy/testing/noseclasses.py b/numpy/testing/noseclasses.py index 563ed14ea..144c4e7e4 100644 --- a/numpy/testing/noseclasses.py +++ b/numpy/testing/noseclasses.py @@ -1,6 +1,5 @@ """ Back compatibility noseclasses module. It will import the appropriate set of tools - """ -from .nose_tools.noseclasses import * +from .nose_tools.noseclasses import *
\ No newline at end of file diff --git a/numpy/testing/nosetester.py b/numpy/testing/nosetester.py index b726684c9..949fae03e 100644 --- a/numpy/testing/nosetester.py +++ b/numpy/testing/nosetester.py @@ -3,8 +3,11 @@ Back compatibility nosetester module. It will import the appropriate set of tools """ +import os + from .nose_tools.nosetester import * + __all__ = ['get_package_name', 'run_module_suite', 'NoseTester', '_numpy_tester', 'get_package_name', 'import_nose', 'suppress_warnings'] diff --git a/numpy/testing/pytest_tools/__init__.py b/numpy/testing/pytest_tools/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/numpy/testing/pytest_tools/__init__.py diff --git a/numpy/testing/pytest_tools/decorators.py b/numpy/testing/pytest_tools/decorators.py new file mode 100644 index 000000000..08a39e0c0 --- /dev/null +++ b/numpy/testing/pytest_tools/decorators.py @@ -0,0 +1,278 @@ +""" +Compatibility shim for pytest compatibility with the nose decorators. + +Decorators for labeling and modifying behavior of test objects. + +Decorators that merely return a modified version of the original +function object are straightforward. + +Decorators that return a new function will not preserve meta-data such as +function name, setup and teardown functions and so on. + +""" +from __future__ import division, absolute_import, print_function + +import collections + +from .utils import SkipTest, assert_warns + +__all__ = ['slow', 'setastest', 'skipif', 'knownfailureif', 'deprecated', + 'parametrize',] + + +def slow(t): + """ + Label a test as 'slow'. + + The exact definition of a slow test is obviously both subjective and + hardware-dependent, but in general any individual test that requires more + than a second or two should be labeled as slow (the whole suite consits of + thousands of tests, so even a second is significant). + + Parameters + ---------- + t : callable + The test to mark as slow. + + Returns + ------- + t : callable + The decorated test `t`. + + Examples + -------- + The `numpy.testing` module includes ``import decorators as dec``. + A test can be decorated as slow like this:: + + from numpy.testing import * + + @dec.slow + def test_big(self): + print('Big, slow test') + + """ + import pytest + + return pytest.mark.slow(t) + + +def setastest(tf=True): + """ + Signals to nose that this function is or is not a test. + + Parameters + ---------- + tf : bool + If True, specifies that the decorated callable is a test. + If False, specifies that the decorated callable is not a test. + Default is True. + + Examples + -------- + `setastest` can be used in the following way:: + + from numpy.testing.decorators import setastest + + @setastest(False) + def func_with_test_in_name(arg1, arg2): + pass + + """ + def set_test(t): + t.__test__ = tf + return t + return set_test + + +def skipif(skip_condition, msg=None): + """ + Make function raise SkipTest exception if a given condition is true. + + If the condition is a callable, it is used at runtime to dynamically + make the decision. This is useful for tests that may require costly + imports, to delay the cost until the test suite is actually executed. + + Parameters + ---------- + skip_condition : bool or callable + Flag to determine whether to skip the decorated test. + msg : str, optional + Message to give on raising a SkipTest exception. Default is None. + + Returns + ------- + decorator : function + Decorator which, when applied to a function, causes SkipTest + to be raised when `skip_condition` is True, and the function + to be called normally otherwise. + + Notes + ----- + Undecorated functions are returned and that may lead to some lost + information. Note that this function differ from the pytest fixture + ``pytest.mark.skipif``. The latter marks test functions on import and the + skip is handled during collection, hence it cannot be used for non-test + functions, nor does it handle callable conditions. + + """ + def skip_decorator(f): + # Local import to avoid a hard pytest dependency and only incur the + # import time overhead at actual test-time. + import inspect + import pytest + + if msg is None: + out = 'Test skipped due to test condition' + else: + out = msg + + # Allow for both boolean or callable skip conditions. + if isinstance(skip_condition, collections.Callable): + skip_val = lambda: skip_condition() + else: + skip_val = lambda: skip_condition + + # We need to define *two* skippers because Python doesn't allow both + # return with value and yield inside the same function. + def get_msg(func,msg=None): + """Skip message with information about function being skipped.""" + if msg is None: + out = 'Test skipped due to test condition' + else: + out = msg + return "Skipping test: %s: %s" % (func.__name__, out) + + def skipper_func(*args, **kwargs): + """Skipper for normal test functions.""" + if skip_val(): + raise SkipTest(get_msg(f, msg)) + else: + return f(*args, **kwargs) + + def skipper_gen(*args, **kwargs): + """Skipper for test generators.""" + if skip_val(): + raise SkipTest(get_msg(f, msg)) + else: + for x in f(*args, **kwargs): + yield x + + # Choose the right skipper to use when building the actual decorator. + if inspect.isgeneratorfunction(f): + skipper = skipper_gen + else: + skipper = skipper_func + return skipper + + return skip_decorator + + +def knownfailureif(fail_condition, msg=None): + """ + Make function raise KnownFailureException exception if given condition is true. + + If the condition is a callable, it is used at runtime to dynamically + make the decision. This is useful for tests that may require costly + imports, to delay the cost until the test suite is actually executed. + + Parameters + ---------- + fail_condition : bool or callable + Flag to determine whether to mark the decorated test as a known + failure (if True) or not (if False). + msg : str, optional + Message to give on raising a KnownFailureException exception. + Default is None. + + Returns + ------- + decorator : function + Decorator, which, when applied to a function, causes + KnownFailureException to be raised when `fail_condition` is True, + and the function to be called normally otherwise. + + Notes + ----- + The decorator itself is not decorated in the pytest case unlike for nose. + + """ + import pytest + from .utils import KnownFailureException + + if msg is None: + msg = 'Test skipped due to known failure' + + # Allow for both boolean or callable known failure conditions. + if isinstance(fail_condition, collections.Callable): + fail_val = lambda: fail_condition() + else: + fail_val = lambda: fail_condition + + def knownfail_decorator(f): + + def knownfailer(*args, **kwargs): + if fail_val(): + raise KnownFailureException(msg) + return f(*args, **kwargs) + + return knownfailer + + return knownfail_decorator + + +def deprecated(conditional=True): + """ + Filter deprecation warnings while running the test suite. + + This decorator can be used to filter DeprecationWarning's, to avoid + printing them during the test suite run, while checking that the test + actually raises a DeprecationWarning. + + Parameters + ---------- + conditional : bool or callable, optional + Flag to determine whether to mark test as deprecated or not. If the + condition is a callable, it is used at runtime to dynamically make the + decision. Default is True. + + Returns + ------- + decorator : function + The `deprecated` decorator itself. + + Notes + ----- + .. versionadded:: 1.4.0 + + """ + def deprecate_decorator(f): + + def _deprecated_imp(*args, **kwargs): + # Poor man's replacement for the with statement + with assert_warns(DeprecationWarning): + f(*args, **kwargs) + + if isinstance(conditional, collections.Callable): + cond = conditional() + else: + cond = conditional + if cond: + return _deprecated_imp + else: + return f + return deprecate_decorator + + +def parametrize(vars, input): + """ + Pytest compatibility class. This implements the simplest level of + pytest.mark.parametrize for use in nose as an aid in making the transition + to pytest. It achieves that by adding a dummy var parameter and ignoring + the doc_func parameter of the base class. It does not support variable + substitution by name, nor does it support nesting or classes. See the + pytest documentation for usage. + + """ + import pytest + + return pytest.mark.parametrize(vars, input) diff --git a/numpy/testing/pytest_tools/noseclasses.py b/numpy/testing/pytest_tools/noseclasses.py new file mode 100644 index 000000000..2486029fe --- /dev/null +++ b/numpy/testing/pytest_tools/noseclasses.py @@ -0,0 +1,342 @@ +# These classes implement a doctest runner plugin for nose, a "known failure" +# error class, and a customized TestProgram for NumPy. + +# Because this module imports nose directly, it should not +# be used except by nosetester.py to avoid a general NumPy +# dependency on nose. +from __future__ import division, absolute_import, print_function + +import os +import doctest +import inspect + +import numpy +import pytest +from .utils import KnownFailureException, SkipTest +import _pytest.runner +import _pytest.skipping + + +class NpyPlugin(object): + + def pytest_runtest_makereport(self, call): + if call.excinfo: + if call.excinfo.errisinstance(KnownFailureException): + #let's substitute the excinfo with a pytest.xfail one + call2 = call.__class__( + lambda: _pytest.runner.skip(str(call.excinfo.value)), + call.when) + print() + print() + print(call.excinfo._getreprcrash()) + print() + print(call.excinfo) + print() + print(call2.excinfo) + print() + call.excinfo = call2.excinfo + if call.excinfo.errisinstance(SkipTest): + #let's substitute the excinfo with a pytest.skip one + call2 = call.__class__( + lambda: _pytest.runner.skip(str(call.excinfo.value)), + call.when) + call.excinfo = call2.excinfo + + +if False: + from nose.plugins import doctests as npd + from nose.plugins.errorclass import ErrorClass, ErrorClassPlugin + from nose.plugins.base import Plugin + from nose.util import src + from .nosetester import get_package_name + # Some of the classes in this module begin with 'Numpy' to clearly distinguish + # them from the plethora of very similar names from nose/unittest/doctest + + #----------------------------------------------------------------------------- + # Modified version of the one in the stdlib, that fixes a python bug (doctests + # not found in extension modules, http://bugs.python.org/issue3158) + class NumpyDocTestFinder(doctest.DocTestFinder): + + def _from_module(self, module, object): + """ + Return true if the given object is defined in the given + module. + """ + if module is None: + return True + elif inspect.isfunction(object): + return module.__dict__ is object.__globals__ + elif inspect.isbuiltin(object): + return module.__name__ == object.__module__ + elif inspect.isclass(object): + return module.__name__ == object.__module__ + elif inspect.ismethod(object): + # This one may be a bug in cython that fails to correctly set the + # __module__ attribute of methods, but since the same error is easy + # to make by extension code writers, having this safety in place + # isn't such a bad idea + return module.__name__ == object.__self__.__class__.__module__ + elif inspect.getmodule(object) is not None: + return module is inspect.getmodule(object) + elif hasattr(object, '__module__'): + return module.__name__ == object.__module__ + elif isinstance(object, property): + return True # [XX] no way not be sure. + else: + raise ValueError("object must be a class or function") + + def _find(self, tests, obj, name, module, source_lines, globs, seen): + """ + Find tests for the given object and any contained objects, and + add them to `tests`. + """ + + doctest.DocTestFinder._find(self, tests, obj, name, module, + source_lines, globs, seen) + + # Below we re-run pieces of the above method with manual modifications, + # because the original code is buggy and fails to correctly identify + # doctests in extension modules. + + # Local shorthands + from inspect import ( + isroutine, isclass, ismodule, isfunction, ismethod + ) + + # Look for tests in a module's contained objects. + if ismodule(obj) and self._recurse: + for valname, val in obj.__dict__.items(): + valname1 = '%s.%s' % (name, valname) + if ( (isroutine(val) or isclass(val)) + and self._from_module(module, val)): + + self._find(tests, val, valname1, module, source_lines, + globs, seen) + + # Look for tests in a class's contained objects. + if isclass(obj) and self._recurse: + for valname, val in obj.__dict__.items(): + # Special handling for staticmethod/classmethod. + if isinstance(val, staticmethod): + val = getattr(obj, valname) + if isinstance(val, classmethod): + val = getattr(obj, valname).__func__ + + # Recurse to methods, properties, and nested classes. + if ((isfunction(val) or isclass(val) or + ismethod(val) or isinstance(val, property)) and + self._from_module(module, val)): + valname = '%s.%s' % (name, valname) + self._find(tests, val, valname, module, source_lines, + globs, seen) + + + # second-chance checker; if the default comparison doesn't + # pass, then see if the expected output string contains flags that + # tell us to ignore the output + class NumpyOutputChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + ret = doctest.OutputChecker.check_output(self, want, got, + optionflags) + if not ret: + if "#random" in want: + return True + + # it would be useful to normalize endianness so that + # bigendian machines don't fail all the tests (and there are + # actually some bigendian examples in the doctests). Let's try + # making them all little endian + got = got.replace("'>", "'<") + want = want.replace("'>", "'<") + + # try to normalize out 32 and 64 bit default int sizes + for sz in [4, 8]: + got = got.replace("'<i%d'" % sz, "int") + want = want.replace("'<i%d'" % sz, "int") + + ret = doctest.OutputChecker.check_output(self, want, + got, optionflags) + + return ret + + + # Subclass nose.plugins.doctests.DocTestCase to work around a bug in + # its constructor that blocks non-default arguments from being passed + # down into doctest.DocTestCase + class NumpyDocTestCase(npd.DocTestCase): + def __init__(self, test, optionflags=0, setUp=None, tearDown=None, + checker=None, obj=None, result_var='_'): + self._result_var = result_var + self._nose_obj = obj + doctest.DocTestCase.__init__(self, test, + optionflags=optionflags, + setUp=setUp, tearDown=tearDown, + checker=checker) + + + print_state = numpy.get_printoptions() + + class NumpyDoctest(npd.Doctest): + name = 'numpydoctest' # call nosetests with --with-numpydoctest + score = 1000 # load late, after doctest builtin + + # always use whitespace and ellipsis options for doctests + doctest_optflags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS + + # files that should be ignored for doctests + doctest_ignore = ['generate_numpy_api.py', + 'setup.py'] + + # Custom classes; class variables to allow subclassing + doctest_case_class = NumpyDocTestCase + out_check_class = NumpyOutputChecker + test_finder_class = NumpyDocTestFinder + + # Don't use the standard doctest option handler; hard-code the option values + def options(self, parser, env=os.environ): + Plugin.options(self, parser, env) + # Test doctests in 'test' files / directories. Standard plugin default + # is False + self.doctest_tests = True + # Variable name; if defined, doctest results stored in this variable in + # the top-level namespace. None is the standard default + self.doctest_result_var = None + + def configure(self, options, config): + # parent method sets enabled flag from command line --with-numpydoctest + Plugin.configure(self, options, config) + self.finder = self.test_finder_class() + self.parser = doctest.DocTestParser() + if self.enabled: + # Pull standard doctest out of plugin list; there's no reason to run + # both. In practice the Unplugger plugin above would cover us when + # run from a standard numpy.test() call; this is just in case + # someone wants to run our plugin outside the numpy.test() machinery + config.plugins.plugins = [p for p in config.plugins.plugins + if p.name != 'doctest'] + + def set_test_context(self, test): + """ Configure `test` object to set test context + + We set the numpy / scipy standard doctest namespace + + Parameters + ---------- + test : test object + with ``globs`` dictionary defining namespace + + Returns + ------- + None + + Notes + ----- + `test` object modified in place + """ + # set the namespace for tests + pkg_name = get_package_name(os.path.dirname(test.filename)) + + # Each doctest should execute in an environment equivalent to + # starting Python and executing "import numpy as np", and, + # for SciPy packages, an additional import of the local + # package (so that scipy.linalg.basic.py's doctests have an + # implicit "from scipy import linalg" as well. + # + # Note: __file__ allows the doctest in NoseTester to run + # without producing an error + test.globs = {'__builtins__':__builtins__, + '__file__':'__main__', + '__name__':'__main__', + 'np':numpy} + # add appropriate scipy import for SciPy tests + if 'scipy' in pkg_name: + p = pkg_name.split('.') + p2 = p[-1] + test.globs[p2] = __import__(pkg_name, test.globs, {}, [p2]) + + # Override test loading to customize test context (with set_test_context + # method), set standard docstring options, and install our own test output + # checker + def loadTestsFromModule(self, module): + if not self.matches(module.__name__): + npd.log.debug("Doctest doesn't want module %s", module) + return + try: + tests = self.finder.find(module) + except AttributeError: + # nose allows module.__test__ = False; doctest does not and + # throws AttributeError + return + if not tests: + return + tests.sort() + module_file = src(module.__file__) + for test in tests: + if not test.examples: + continue + if not test.filename: + test.filename = module_file + # Set test namespace; test altered in place + self.set_test_context(test) + yield self.doctest_case_class(test, + optionflags=self.doctest_optflags, + checker=self.out_check_class(), + result_var=self.doctest_result_var) + + # Add an afterContext method to nose.plugins.doctests.Doctest in order + # to restore print options to the original state after each doctest + def afterContext(self): + numpy.set_printoptions(**print_state) + + # Ignore NumPy-specific build files that shouldn't be searched for tests + def wantFile(self, file): + bn = os.path.basename(file) + if bn in self.doctest_ignore: + return False + return npd.Doctest.wantFile(self, file) + + + class Unplugger(object): + """ Nose plugin to remove named plugin late in loading + + By default it removes the "doctest" plugin. + """ + name = 'unplugger' + enabled = True # always enabled + score = 4000 # load late in order to be after builtins + + def __init__(self, to_unplug='doctest'): + self.to_unplug = to_unplug + + def options(self, parser, env): + pass + + def configure(self, options, config): + # Pull named plugin out of plugins list + config.plugins.plugins = [p for p in config.plugins.plugins + if p.name != self.to_unplug] + + + + # Class allows us to save the results of the tests in runTests - see runTests + # method docstring for details + class NumpyTestProgram(nose.core.TestProgram): + def runTests(self): + """Run Tests. Returns true on success, false on failure, and + sets self.success to the same value. + + Because nose currently discards the test result object, but we need + to return it to the user, override TestProgram.runTests to retain + the result + """ + if self.testRunner is None: + self.testRunner = nose.core.TextTestRunner(stream=self.config.stream, + verbosity=self.config.verbosity, + config=self.config) + plug_runner = self.config.plugins.prepareTestRunner(self.testRunner) + if plug_runner is not None: + self.testRunner = plug_runner + self.result = self.testRunner.run(self.test) + self.success = self.result.wasSuccessful() + return self.success + diff --git a/numpy/testing/pytest_tools/nosetester.py b/numpy/testing/pytest_tools/nosetester.py new file mode 100644 index 000000000..46e2b9b8c --- /dev/null +++ b/numpy/testing/pytest_tools/nosetester.py @@ -0,0 +1,566 @@ +""" +Nose test running. + +This module implements ``test()`` and ``bench()`` functions for NumPy modules. + +""" +from __future__ import division, absolute_import, print_function + +import os +import sys +import warnings +from numpy.compat import basestring +import numpy as np + +from .utils import import_nose, suppress_warnings + + +__all__ = ['get_package_name', 'run_module_suite', 'NoseTester', + '_numpy_tester', 'get_package_name', 'import_nose', + 'suppress_warnings'] + + +def get_package_name(filepath): + """ + Given a path where a package is installed, determine its name. + + Parameters + ---------- + filepath : str + Path to a file. If the determination fails, "numpy" is returned. + + Examples + -------- + >>> np.testing.nosetester.get_package_name('nonsense') + 'numpy' + + """ + + fullpath = filepath[:] + pkg_name = [] + while 'site-packages' in filepath or 'dist-packages' in filepath: + filepath, p2 = os.path.split(filepath) + if p2 in ('site-packages', 'dist-packages'): + break + pkg_name.append(p2) + + # if package name determination failed, just default to numpy/scipy + if not pkg_name: + if 'scipy' in fullpath: + return 'scipy' + else: + return 'numpy' + + # otherwise, reverse to get correct order and return + pkg_name.reverse() + + # don't include the outer egg directory + if pkg_name[0].endswith('.egg'): + pkg_name.pop(0) + + return '.'.join(pkg_name) + + +def run_module_suite(file_to_run=None, argv=None): + """ + Run a test module. + + Equivalent to calling ``$ nosetests <argv> <file_to_run>`` from + the command line. This version is for pytest rather than nose. + + Parameters + ---------- + file_to_run : str, optional + Path to test module, or None. + By default, run the module from which this function is called. + argv : list of strings + Arguments to be passed to the pytest runner. ``argv[0]`` is + ignored. All command line arguments accepted by ``pytest`` + will work. If it is the default value None, sys.argv is used. + + .. versionadded:: 1.14.0 + + Examples + -------- + Adding the following:: + + if __name__ == "__main__" : + run_module_suite(argv=sys.argv) + + at the end of a test module will run the tests when that module is + called in the python interpreter. + + Alternatively, calling:: + + >>> run_module_suite(file_to_run="numpy/tests/test_matlib.py") + + from an interpreter will run all the test routine in 'test_matlib.py'. + """ + import pytest + if file_to_run is None: + f = sys._getframe(1) + file_to_run = f.f_locals.get('__file__', None) + if file_to_run is None: + raise AssertionError + + if argv is None: + argv = sys.argv[1:] + [file_to_run] + else: + argv = argv + [file_to_run] + + pytest.main(argv) + +if False: + # disable run_module_suite and NoseTester + # until later + class NoseTester(object): + """ + Nose test runner. + + This class is made available as numpy.testing.Tester, and a test function + is typically added to a package's __init__.py like so:: + + from numpy.testing import Tester + test = Tester().test + + Calling this test function finds and runs all tests associated with the + package and all its sub-packages. + + Attributes + ---------- + package_path : str + Full path to the package to test. + package_name : str + Name of the package to test. + + Parameters + ---------- + package : module, str or None, optional + The package to test. If a string, this should be the full path to + the package. If None (default), `package` is set to the module from + which `NoseTester` is initialized. + raise_warnings : None, str or sequence of warnings, optional + This specifies which warnings to configure as 'raise' instead + of being shown once during the test execution. Valid strings are: + + - "develop" : equals ``(Warning,)`` + - "release" : equals ``()``, don't raise on any warnings. + + Default is "release". + depth : int, optional + If `package` is None, then this can be used to initialize from the + module of the caller of (the caller of (...)) the code that + initializes `NoseTester`. Default of 0 means the module of the + immediate caller; higher values are useful for utility routines that + want to initialize `NoseTester` objects on behalf of other code. + + """ + def __init__(self, package=None, raise_warnings="release", depth=0): + # Back-compat: 'None' used to mean either "release" or "develop" + # depending on whether this was a release or develop version of + # numpy. Those semantics were fine for testing numpy, but not so + # helpful for downstream projects like scipy that use + # numpy.testing. (They want to set this based on whether *they* are a + # release or develop version, not whether numpy is.) So we continue to + # accept 'None' for back-compat, but it's now just an alias for the + # default "release". + if raise_warnings is None: + raise_warnings = "release" + + package_name = None + if package is None: + f = sys._getframe(1 + depth) + package_path = f.f_locals.get('__file__', None) + if package_path is None: + raise AssertionError + package_path = os.path.dirname(package_path) + package_name = f.f_locals.get('__name__', None) + elif isinstance(package, type(os)): + package_path = os.path.dirname(package.__file__) + package_name = getattr(package, '__name__', None) + else: + package_path = str(package) + + self.package_path = package_path + + # Find the package name under test; this name is used to limit coverage + # reporting (if enabled). + if package_name is None: + package_name = get_package_name(package_path) + self.package_name = package_name + + # Set to "release" in constructor in maintenance branches. + self.raise_warnings = raise_warnings + + def _test_argv(self, label, verbose, extra_argv): + ''' Generate argv for nosetests command + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + see ``test`` docstring + verbose : int, optional + Integer in range 1..3, bigger means more verbose. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + + Returns + ------- + argv : list + command line arguments that will be passed to nose + ''' + argv = [__file__, self.package_path, '-s'] + if label and label != 'full': + if not isinstance(label, basestring): + raise TypeError('Selection label should be a string') + if label == 'fast': + label = 'not slow' + argv += ['-A', label] + + argv += [['-q'], [''], ['-v']][min(verbose - 1, 2)] + + # FIXME is this true of pytest + # When installing with setuptools, and also in some other cases, the + # test_*.py files end up marked +x executable. Nose, by default, does + # not run files marked with +x as they might be scripts. However, in + # our case nose only looks for test_*.py files under the package + # directory, which should be safe. + # argv += ['--exe'] + if extra_argv: + argv += extra_argv + return argv + + def _show_system_info(self): + import pytest + import numpy + + print("NumPy version %s" % numpy.__version__) + relaxed_strides = numpy.ones((10, 1), order="C").flags.f_contiguous + print("NumPy relaxed strides checking option:", relaxed_strides) + npdir = os.path.dirname(numpy.__file__) + print("NumPy is installed in %s" % npdir) + + if 'scipy' in self.package_name: + import scipy + print("SciPy version %s" % scipy.__version__) + spdir = os.path.dirname(scipy.__file__) + print("SciPy is installed in %s" % spdir) + + pyversion = sys.version.replace('\n', '') + print("Python version %s" % pyversion) + print("pytest version %d.%d.%d" % pytest.__versioninfo__) + + def _get_custom_doctester(self): + """ Return instantiated plugin for doctests + + Allows subclassing of this class to override doctester + + A return value of None means use the nose builtin doctest plugin + """ + from .noseclasses import NumpyDoctest + return NumpyDoctest() + + def prepare_test_args(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, timer=False): + """ + Run tests for module using nose. + + This method does the heavy lifting for the `test` method. It takes all + the same arguments, for details see `test`. + + See Also + -------- + test + + """ + # fail with nice error message if nose is not present + import_nose() + # compile argv + argv = self._test_argv(label, verbose, extra_argv) + # our way of doing coverage + if coverage: + argv += ['--cover-package=%s' % self.package_name, '--with-coverage', + '--cover-tests', '--cover-erase'] + + if timer: + if timer is True: + argv += ['--with-timer'] + elif isinstance(timer, int): + argv += ['--with-timer', '--timer-top-n', str(timer)] + + # construct list of plugins + import nose.plugins.builtin + from nose.plugins import EntryPointPluginManager + from .noseclasses import KnownFailurePlugin, Unplugger + plugins = [KnownFailurePlugin()] + plugins += [p() for p in nose.plugins.builtin.plugins] + try: + # External plugins (like nose-timer) + entrypoint_manager = EntryPointPluginManager() + entrypoint_manager.loadPlugins() + plugins += [p for p in entrypoint_manager.plugins] + except ImportError: + # Relies on pkg_resources, not a hard dependency + pass + + # add doctesting if required + doctest_argv = '--with-doctest' in argv + if doctests == False and doctest_argv: + doctests = True + plug = self._get_custom_doctester() + if plug is None: + # use standard doctesting + if doctests and not doctest_argv: + argv += ['--with-doctest'] + else: # custom doctesting + if doctest_argv: # in fact the unplugger would take care of this + argv.remove('--with-doctest') + plugins += [Unplugger('doctest'), plug] + if doctests: + argv += ['--with-' + plug.name] + return argv, plugins + + def test(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, raise_warnings=None, + timer=False): + """ + Run tests for module using nose. + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + Identifies the tests to run. This can be a string to pass to + the nosetests executable with the '-A' option, or one of several + special values. Special values are: + * 'fast' - the default - which corresponds to the ``nosetests -A`` + option of 'not slow'. + * 'full' - fast (as above) and slow tests as in the + 'no -A' option to nosetests - this is the same as ''. + * None or '' - run all tests. + attribute_identifier - string passed directly to nosetests as '-A'. + verbose : int, optional + Verbosity value for test outputs, in the range 1..3. Default is 1. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + doctests : bool, optional + If True, run doctests in module. Default is False. + coverage : bool, optional + If True, report coverage of NumPy code. Default is False. + (This requires the `coverage module: + <http://nedbatchelder.com/code/modules/coverage.html>`_). + raise_warnings : None, str or sequence of warnings, optional + This specifies which warnings to configure as 'raise' instead + of being shown once during the test execution. Valid strings are: + + - "develop" : equals ``(Warning,)`` + - "release" : equals ``()``, don't raise on any warnings. + + The default is to use the class initialization value. + timer : bool or int, optional + Timing of individual tests with ``nose-timer`` (which needs to be + installed). If True, time tests and report on all of them. + If an integer (say ``N``), report timing results for ``N`` slowest + tests. + + Returns + ------- + result : object + Returns the result of running the tests as a + ``nose.result.TextTestResult`` object. + + Notes + ----- + Each NumPy module exposes `test` in its namespace to run all tests for it. + For example, to run all tests for numpy.lib: + + >>> np.lib.test() #doctest: +SKIP + + Examples + -------- + >>> result = np.lib.test() #doctest: +SKIP + Running unit tests for numpy.lib + ... + Ran 976 tests in 3.933s + + OK + + >>> result.errors #doctest: +SKIP + [] + >>> result.knownfail #doctest: +SKIP + [] + """ + + # cap verbosity at 3 because nose becomes *very* verbose beyond that + verbose = min(verbose, 3) + + from . import utils + utils.verbose = verbose + + argv, plugins = self.prepare_test_args( + label, verbose, extra_argv, doctests, coverage, timer) + + if doctests: + print("Running unit tests and doctests for %s" % self.package_name) + else: + print("Running unit tests for %s" % self.package_name) + + self._show_system_info() + + # reset doctest state on every run + import doctest + doctest.master = None + + if raise_warnings is None: + raise_warnings = self.raise_warnings + + _warn_opts = dict(develop=(Warning,), + release=()) + if isinstance(raise_warnings, basestring): + raise_warnings = _warn_opts[raise_warnings] + + with suppress_warnings("location") as sup: + # Reset the warning filters to the default state, + # so that running the tests is more repeatable. + warnings.resetwarnings() + # Set all warnings to 'warn', this is because the default 'once' + # has the bad property of possibly shadowing later warnings. + warnings.filterwarnings('always') + # Force the requested warnings to raise + for warningtype in raise_warnings: + warnings.filterwarnings('error', category=warningtype) + # Filter out annoying import messages. + sup.filter(message='Not importing directory') + sup.filter(message="numpy.dtype size changed") + sup.filter(message="numpy.ufunc size changed") + sup.filter(category=np.ModuleDeprecationWarning) + # Filter out boolean '-' deprecation messages. This allows + # older versions of scipy to test without a flood of messages. + sup.filter(message=".*boolean negative.*") + sup.filter(message=".*boolean subtract.*") + # Filter out distutils cpu warnings (could be localized to + # distutils tests). ASV has problems with top level import, + # so fetch module for suppression here. + with warnings.catch_warnings(): + warnings.simplefilter("always") + from ...distutils import cpuinfo + sup.filter(category=UserWarning, module=cpuinfo) + # See #7949: Filter out deprecation warnings due to the -3 flag to + # python 2 + if sys.version_info.major == 2 and sys.py3kwarning: + # This is very specific, so using the fragile module filter + # is fine + import threading + sup.filter(DeprecationWarning, + r"sys\.exc_clear\(\) not supported in 3\.x", + module=threading) + sup.filter(DeprecationWarning, message=r"in 3\.x, __setslice__") + sup.filter(DeprecationWarning, message=r"in 3\.x, __getslice__") + sup.filter(DeprecationWarning, message=r"buffer\(\) not supported in 3\.x") + sup.filter(DeprecationWarning, message=r"CObject type is not supported in 3\.x") + sup.filter(DeprecationWarning, message=r"comparing unequal types not supported in 3\.x") + # Filter out some deprecation warnings inside nose 1.3.7 when run + # on python 3.5b2. See + # https://github.com/nose-devs/nose/issues/929 + # Note: it is hard to filter based on module for sup (lineno could + # be implemented). + warnings.filterwarnings("ignore", message=".*getargspec.*", + category=DeprecationWarning, + module=r"nose\.") + + from .noseclasses import NumpyTestProgram + + t = NumpyTestProgram(argv=argv, exit=False, plugins=plugins) + + return t.result + + def bench(self, label='fast', verbose=1, extra_argv=None): + """ + Run benchmarks for module using nose. + + Parameters + ---------- + label : {'fast', 'full', '', attribute identifier}, optional + Identifies the benchmarks to run. This can be a string to pass to + the nosetests executable with the '-A' option, or one of several + special values. Special values are: + * 'fast' - the default - which corresponds to the ``nosetests -A`` + option of 'not slow'. + * 'full' - fast (as above) and slow benchmarks as in the + 'no -A' option to nosetests - this is the same as ''. + * None or '' - run all tests. + attribute_identifier - string passed directly to nosetests as '-A'. + verbose : int, optional + Integer in range 1..3, bigger means more verbose. + extra_argv : list, optional + List with any extra arguments to pass to nosetests. + + Returns + ------- + success : bool + Returns True if running the benchmarks works, False if an error + occurred. + + Notes + ----- + Benchmarks are like tests, but have names starting with "bench" instead + of "test", and can be found under the "benchmarks" sub-directory of the + module. + + Each NumPy module exposes `bench` in its namespace to run all benchmarks + for it. + + Examples + -------- + >>> success = np.lib.bench() #doctest: +SKIP + Running benchmarks for numpy.lib + ... + using 562341 items: + unique: + 0.11 + unique1d: + 0.11 + ratio: 1.0 + nUnique: 56230 == 56230 + ... + OK + + >>> success #doctest: +SKIP + True + + """ + + print("Running benchmarks for %s" % self.package_name) + self._show_system_info() + + argv = self._test_argv(label, verbose, extra_argv) + argv += ['--match', r'(?:^|[\\b_\\.%s-])[Bb]ench' % os.sep] + + # import nose or make informative error + nose = import_nose() + + # get plugin to disable doctests + from .noseclasses import Unplugger + add_plugins = [Unplugger('doctest')] + + return nose.run(argv=argv, addplugins=add_plugins) +else: + + class NoseTester(object): + def __init__(self, package=None, raise_warnings="release", depth=0): + pass + + def test(self, label='fast', verbose=1, extra_argv=None, + doctests=False, coverage=False, raise_warnings=None, + timer=False): + pass + + def bench(self, label='fast', verbose=1, extra_argv=None): + pass + + +def _numpy_tester(): + if hasattr(np, "__version__") and ".dev0" in np.__version__: + mode = "develop" + else: + mode = "release" + return NoseTester(raise_warnings=mode, depth=1) diff --git a/numpy/testing/pytest_tools/utils.py b/numpy/testing/pytest_tools/utils.py new file mode 100644 index 000000000..19982ec54 --- /dev/null +++ b/numpy/testing/pytest_tools/utils.py @@ -0,0 +1,2275 @@ +""" +Utility function to facilitate testing. + +""" +from __future__ import division, absolute_import, print_function + +import os +import sys +import re +import operator +import warnings +from functools import partial, wraps +import shutil +import contextlib +from tempfile import mkdtemp, mkstemp + +from numpy.core import( + float32, empty, arange, array_repr, ndarray, isnat, array) +from numpy.lib.utils import deprecate + +if sys.version_info[0] >= 3: + from io import StringIO +else: + from StringIO import StringIO + +__all__ = [ + 'assert_equal', 'assert_almost_equal', 'assert_approx_equal', + 'assert_array_equal', 'assert_array_less', 'assert_string_equal', + 'assert_array_almost_equal', 'assert_raises', 'build_err_msg', + 'decorate_methods', 'jiffies', 'memusage', 'print_assert_equal', + 'raises', 'rand', 'rundocs', 'runstring', 'verbose', 'measure', + 'assert_', 'assert_array_almost_equal_nulp', 'assert_raises_regex', + 'assert_array_max_ulp', 'assert_warns', 'assert_no_warnings', + 'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings', + 'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY', + 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', + '_assert_valid_refcount', '_gen_alignment_data', + ] + + +class KnownFailureException(Exception): + """Raise this exception to mark a test as a known failing test. + + """ + def __new__(cls, *args, **kwargs): + # import _pytest here to avoid hard dependency + import _pytest + return _pytest.skipping.xfail(*args, **kwargs) + + +class SkipTest(Exception): + """Raise this exception to mark a skipped test. + + """ + def __new__(cls, *args, **kwargs): + # import _pytest here to avoid hard dependency + import _pytest + return _pytest.runner.Skipped(*args, **kwargs) + + +class IgnoreException(Exception): + """Ignoring this exception due to disabled feature + + This exception seems unused and can be removed. + + """ + pass + + +KnownFailureTest = KnownFailureException # backwards compat + +verbose = 0 + +IS_PYPY = '__pypy__' in sys.modules +HAS_REFCOUNT = getattr(sys, 'getrefcount', None) is not None + + +def import_nose(): + """ Not wanted for pytest, make it a dummy function + + """ + pass + + +def assert_(val, msg=''): + """ + Assert that works in release mode. + Accepts callable msg to allow deferring evaluation until failure. + + The Python built-in ``assert`` does not work when executing code in + optimized mode (the ``-O`` flag) - no byte-code is generated for it. + + For documentation on usage, refer to the Python documentation. + + """ + __tracebackhide__ = True # Hide traceback for py.test + if not val: + try: + smsg = msg() + except TypeError: + smsg = msg + raise AssertionError(smsg) + + +def gisnan(x): + """like isnan, but always raise an error if type not supported instead of + returning a TypeError object. + + Notes + ----- + isnan and other ufunc sometimes return a NotImplementedType object instead + of raising any exception. This function is a wrapper to make sure an + exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isnan + st = isnan(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isnan not supported for this type") + return st + + +def gisfinite(x): + """like isfinite, but always raise an error if type not supported instead of + returning a TypeError object. + + Notes + ----- + isfinite and other ufunc sometimes return a NotImplementedType object instead + of raising any exception. This function is a wrapper to make sure an + exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isfinite, errstate + with errstate(invalid='ignore'): + st = isfinite(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isfinite not supported for this type") + return st + + +def gisinf(x): + """like isinf, but always raise an error if type not supported instead of + returning a TypeError object. + + Notes + ----- + isinf and other ufunc sometimes return a NotImplementedType object instead + of raising any exception. This function is a wrapper to make sure an + exception is always raised. + + This should be removed once this problem is solved at the Ufunc level.""" + from numpy.core import isinf, errstate + with errstate(invalid='ignore'): + st = isinf(x) + if isinstance(st, type(NotImplemented)): + raise TypeError("isinf not supported for this type") + return st + + +@deprecate(message="numpy.testing.rand is deprecated in numpy 1.11. " + "Use numpy.random.rand instead.") +def rand(*args): + """Returns an array of random numbers with the given shape. + + This only uses the standard library, so it is useful for testing purposes. + """ + import random + from numpy.core import zeros, float64 + results = zeros(args, float64) + f = results.flat + for i in range(len(f)): + f[i] = random.random() + return results + + +if os.name == 'nt': + # Code "stolen" from enthought/debug/memusage.py + def GetPerformanceAttributes(object, counter, instance=None, + inum=-1, format=None, machine=None): + # NOTE: Many counters require 2 samples to give accurate results, + # including "% Processor Time" (as by definition, at any instant, a + # thread's CPU usage is either 0 or 100). To read counters like this, + # you should copy this function, but keep the counter open, and call + # CollectQueryData() each time you need to know. + # See http://msdn.microsoft.com/library/en-us/dnperfmo/html/perfmonpt2.asp + # My older explanation for this was that the "AddCounter" process forced + # the CPU to 100%, but the above makes more sense :) + import win32pdh + if format is None: + format = win32pdh.PDH_FMT_LONG + path = win32pdh.MakeCounterPath( (machine, object, instance, None, inum, counter)) + hq = win32pdh.OpenQuery() + try: + hc = win32pdh.AddCounter(hq, path) + try: + win32pdh.CollectQueryData(hq) + type, val = win32pdh.GetFormattedCounterValue(hc, format) + return val + finally: + win32pdh.RemoveCounter(hc) + finally: + win32pdh.CloseQuery(hq) + + def memusage(processName="python", instance=0): + # from win32pdhutil, part of the win32all package + import win32pdh + return GetPerformanceAttributes("Process", "Virtual Bytes", + processName, instance, + win32pdh.PDH_FMT_LONG, None) +elif sys.platform[:5] == 'linux': + + def memusage(_proc_pid_stat='/proc/%s/stat' % (os.getpid())): + """ + Return virtual memory size in bytes of the running python. + + """ + try: + f = open(_proc_pid_stat, 'r') + l = f.readline().split(' ') + f.close() + return int(l[22]) + except Exception: + return +else: + def memusage(): + """ + Return memory usage of running python. [Not implemented] + + """ + raise NotImplementedError + + +if sys.platform[:5] == 'linux': + def jiffies(_proc_pid_stat='/proc/%s/stat' % (os.getpid()), + _load_time=[]): + """ + Return number of jiffies elapsed. + + Return number of jiffies (1/100ths of a second) that this + process has been scheduled in user mode. See man 5 proc. + + """ + import time + if not _load_time: + _load_time.append(time.time()) + try: + f = open(_proc_pid_stat, 'r') + l = f.readline().split(' ') + f.close() + return int(l[13]) + except Exception: + return int(100*(time.time()-_load_time[0])) +else: + # os.getpid is not in all platforms available. + # Using time is safe but inaccurate, especially when process + # was suspended or sleeping. + def jiffies(_load_time=[]): + """ + Return number of jiffies elapsed. + + Return number of jiffies (1/100ths of a second) that this + process has been scheduled in user mode. See man 5 proc. + + """ + import time + if not _load_time: + _load_time.append(time.time()) + return int(100*(time.time()-_load_time[0])) + + +def build_err_msg(arrays, err_msg, header='Items are not equal:', + verbose=True, names=('ACTUAL', 'DESIRED'), precision=8): + msg = ['\n' + header] + if err_msg: + if err_msg.find('\n') == -1 and len(err_msg) < 79-len(header): + msg = [msg[0] + ' ' + err_msg] + else: + msg.append(err_msg) + if verbose: + for i, a in enumerate(arrays): + + if isinstance(a, ndarray): + # precision argument is only needed if the objects are ndarrays + r_func = partial(array_repr, precision=precision) + else: + r_func = repr + + try: + r = r_func(a) + except Exception as exc: + r = '[repr failed for <{}>: {}]'.format(type(a).__name__, exc) + if r.count('\n') > 3: + r = '\n'.join(r.splitlines()[:3]) + r += '...' + msg.append(' %s: %s' % (names[i], r)) + return '\n'.join(msg) + + +def assert_equal(actual, desired, err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal. + + Given two objects (scalars, lists, tuples, dictionaries or numpy arrays), + check that all elements of these objects are equal. An exception is raised + at the first conflicting values. + + Parameters + ---------- + actual : array_like + The object to check. + desired : array_like + The expected object. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal. + + Examples + -------- + >>> np.testing.assert_equal([4,5], [4,6]) + ... + <type 'exceptions.AssertionError'>: + Items are not equal: + item=1 + ACTUAL: 5 + DESIRED: 6 + + """ + __tracebackhide__ = True # Hide traceback for py.test + if isinstance(desired, dict): + if not isinstance(actual, dict): + raise AssertionError(repr(type(actual))) + assert_equal(len(actual), len(desired), err_msg, verbose) + for k, i in desired.items(): + if k not in actual: + raise AssertionError(repr(k)) + assert_equal(actual[k], desired[k], 'key=%r\n%s' % (k, err_msg), verbose) + return + if isinstance(desired, (list, tuple)) and isinstance(actual, (list, tuple)): + assert_equal(len(actual), len(desired), err_msg, verbose) + for k in range(len(desired)): + assert_equal(actual[k], desired[k], 'item=%r\n%s' % (k, err_msg), verbose) + return + from numpy.core import ndarray, isscalar, signbit + from numpy.lib import iscomplexobj, real, imag + if isinstance(actual, ndarray) or isinstance(desired, ndarray): + return assert_array_equal(actual, desired, err_msg, verbose) + msg = build_err_msg([actual, desired], err_msg, verbose=verbose) + + # Handle complex numbers: separate into real/imag to handle + # nan/inf/negative zero correctly + # XXX: catch ValueError for subclasses of ndarray where iscomplex fail + try: + usecomplex = iscomplexobj(actual) or iscomplexobj(desired) + except ValueError: + usecomplex = False + + if usecomplex: + if iscomplexobj(actual): + actualr = real(actual) + actuali = imag(actual) + else: + actualr = actual + actuali = 0 + if iscomplexobj(desired): + desiredr = real(desired) + desiredi = imag(desired) + else: + desiredr = desired + desiredi = 0 + try: + assert_equal(actualr, desiredr) + assert_equal(actuali, desiredi) + except AssertionError: + raise AssertionError(msg) + + # isscalar test to check cases such as [np.nan] != np.nan + if isscalar(desired) != isscalar(actual): + raise AssertionError(msg) + + # Inf/nan/negative zero handling + try: + # If one of desired/actual is not finite, handle it specially here: + # check that both are nan if any is a nan, and test for equality + # otherwise + if not (gisfinite(desired) and gisfinite(actual)): + isdesnan = gisnan(desired) + isactnan = gisnan(actual) + if isdesnan or isactnan: + if not (isdesnan and isactnan): + raise AssertionError(msg) + else: + if not desired == actual: + raise AssertionError(msg) + return + elif desired == 0 and actual == 0: + if not signbit(desired) == signbit(actual): + raise AssertionError(msg) + # If TypeError or ValueError raised while using isnan and co, just handle + # as before + except (TypeError, ValueError, NotImplementedError): + pass + + try: + # If both are NaT (and have the same dtype -- datetime or timedelta) + # they are considered equal. + if (isnat(desired) == isnat(actual) and + array(desired).dtype.type == array(actual).dtype.type): + return + else: + raise AssertionError(msg) + + # If TypeError or ValueError raised while using isnan and co, just handle + # as before + except (TypeError, ValueError, NotImplementedError): + pass + + # Explicitly use __eq__ for comparison, ticket #2552 + if not (desired == actual): + raise AssertionError(msg) + + +def print_assert_equal(test_string, actual, desired): + """ + Test if two objects are equal, and print an error message if test fails. + + The test is performed with ``actual == desired``. + + Parameters + ---------- + test_string : str + The message supplied to AssertionError. + actual : object + The object to test for equality against `desired`. + desired : object + The expected result. + + Examples + -------- + >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 1]) + >>> np.testing.print_assert_equal('Test XYZ of func xyz', [0, 1], [0, 2]) + Traceback (most recent call last): + ... + AssertionError: Test XYZ of func xyz failed + ACTUAL: + [0, 1] + DESIRED: + [0, 2] + + """ + __tracebackhide__ = True # Hide traceback for py.test + import pprint + + if not (actual == desired): + msg = StringIO() + msg.write(test_string) + msg.write(' failed\nACTUAL: \n') + pprint.pprint(actual, msg) + msg.write('DESIRED: \n') + pprint.pprint(desired, msg) + raise AssertionError(msg.getvalue()) + + +def assert_almost_equal(actual,desired,decimal=7,err_msg='',verbose=True): + """ + Raises an AssertionError if two items are not equal up to desired + precision. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + The test verifies that the elements of ``actual`` and ``desired`` satisfy. + + ``abs(desired-actual) < 1.5 * 10**(-decimal)`` + + That is a looser test than originally documented, but agrees with what the + actual implementation in `assert_array_almost_equal` did up to rounding + vagaries. An exception is raised at conflicting values. For ndarrays this + delegates to assert_array_almost_equal + + Parameters + ---------- + actual : array_like + The object to check. + desired : array_like + The expected object. + decimal : int, optional + Desired precision, default is 7. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + >>> import numpy.testing as npt + >>> npt.assert_almost_equal(2.3333333333333, 2.33333334) + >>> npt.assert_almost_equal(2.3333333333333, 2.33333334, decimal=10) + ... + <type 'exceptions.AssertionError'>: + Items are not equal: + ACTUAL: 2.3333333333333002 + DESIRED: 2.3333333399999998 + + >>> npt.assert_almost_equal(np.array([1.0,2.3333333333333]), + ... np.array([1.0,2.33333334]), decimal=9) + ... + <type 'exceptions.AssertionError'>: + Arrays are not almost equal + <BLANKLINE> + (mismatch 50.0%) + x: array([ 1. , 2.33333333]) + y: array([ 1. , 2.33333334]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import ndarray + from numpy.lib import iscomplexobj, real, imag + + # Handle complex numbers: separate into real/imag to handle + # nan/inf/negative zero correctly + # XXX: catch ValueError for subclasses of ndarray where iscomplex fail + try: + usecomplex = iscomplexobj(actual) or iscomplexobj(desired) + except ValueError: + usecomplex = False + + def _build_err_msg(): + header = ('Arrays are not almost equal to %d decimals' % decimal) + return build_err_msg([actual, desired], err_msg, verbose=verbose, + header=header) + + if usecomplex: + if iscomplexobj(actual): + actualr = real(actual) + actuali = imag(actual) + else: + actualr = actual + actuali = 0 + if iscomplexobj(desired): + desiredr = real(desired) + desiredi = imag(desired) + else: + desiredr = desired + desiredi = 0 + try: + assert_almost_equal(actualr, desiredr, decimal=decimal) + assert_almost_equal(actuali, desiredi, decimal=decimal) + except AssertionError: + raise AssertionError(_build_err_msg()) + + if isinstance(actual, (ndarray, tuple, list)) \ + or isinstance(desired, (ndarray, tuple, list)): + return assert_array_almost_equal(actual, desired, decimal, err_msg) + try: + # If one of desired/actual is not finite, handle it specially here: + # check that both are nan if any is a nan, and test for equality + # otherwise + if not (gisfinite(desired) and gisfinite(actual)): + if gisnan(desired) or gisnan(actual): + if not (gisnan(desired) and gisnan(actual)): + raise AssertionError(_build_err_msg()) + else: + if not desired == actual: + raise AssertionError(_build_err_msg()) + return + except (NotImplementedError, TypeError): + pass + if abs(desired - actual) >= 1.5 * 10.0**(-decimal): + raise AssertionError(_build_err_msg()) + + +def assert_approx_equal(actual,desired,significant=7,err_msg='',verbose=True): + """ + Raises an AssertionError if two items are not equal up to significant + digits. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + Given two numbers, check that they are approximately equal. + Approximately equal is defined as the number of significant digits + that agree. + + Parameters + ---------- + actual : scalar + The object to check. + desired : scalar + The expected object. + significant : int, optional + Desired precision, default is 7. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + >>> np.testing.assert_approx_equal(0.12345677777777e-20, 0.1234567e-20) + >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345671e-20, + significant=8) + >>> np.testing.assert_approx_equal(0.12345670e-20, 0.12345672e-20, + significant=8) + ... + <type 'exceptions.AssertionError'>: + Items are not equal to 8 significant digits: + ACTUAL: 1.234567e-021 + DESIRED: 1.2345672000000001e-021 + + the evaluated condition that raises the exception is + + >>> abs(0.12345670e-20/1e-21 - 0.12345672e-20/1e-21) >= 10**-(8-1) + True + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + + (actual, desired) = map(float, (actual, desired)) + if desired == actual: + return + # Normalized the numbers to be in range (-10.0,10.0) + # scale = float(pow(10,math.floor(math.log10(0.5*(abs(desired)+abs(actual)))))) + with np.errstate(invalid='ignore'): + scale = 0.5*(np.abs(desired) + np.abs(actual)) + scale = np.power(10, np.floor(np.log10(scale))) + try: + sc_desired = desired/scale + except ZeroDivisionError: + sc_desired = 0.0 + try: + sc_actual = actual/scale + except ZeroDivisionError: + sc_actual = 0.0 + msg = build_err_msg([actual, desired], err_msg, + header='Items are not equal to %d significant digits:' % + significant, + verbose=verbose) + try: + # If one of desired/actual is not finite, handle it specially here: + # check that both are nan if any is a nan, and test for equality + # otherwise + if not (gisfinite(desired) and gisfinite(actual)): + if gisnan(desired) or gisnan(actual): + if not (gisnan(desired) and gisnan(actual)): + raise AssertionError(msg) + else: + if not desired == actual: + raise AssertionError(msg) + return + except (TypeError, NotImplementedError): + pass + if np.abs(sc_desired - sc_actual) >= np.power(10., -(significant-1)): + raise AssertionError(msg) + + +def assert_array_compare(comparison, x, y, err_msg='', verbose=True, + header='', precision=6, equal_nan=True, + equal_inf=True): + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import array, isnan, isinf, any, inf + x = array(x, copy=False, subok=True) + y = array(y, copy=False, subok=True) + + def isnumber(x): + return x.dtype.char in '?bhilqpBHILQPefdgFDG' + + def istime(x): + return x.dtype.char in "Mm" + + def chk_same_position(x_id, y_id, hasval='nan'): + """Handling nan/inf: check that x and y have the nan/inf at the same + locations.""" + try: + assert_array_equal(x_id, y_id) + except AssertionError: + msg = build_err_msg([x, y], + err_msg + '\nx and y %s location mismatch:' + % (hasval), verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise AssertionError(msg) + + try: + cond = (x.shape == () or y.shape == ()) or x.shape == y.shape + if not cond: + msg = build_err_msg([x, y], + err_msg + + '\n(shapes %s, %s mismatch)' % (x.shape, + y.shape), + verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise AssertionError(msg) + + if isnumber(x) and isnumber(y): + has_nan = has_inf = False + if equal_nan: + x_isnan, y_isnan = isnan(x), isnan(y) + # Validate that NaNs are in the same place + has_nan = any(x_isnan) or any(y_isnan) + if has_nan: + chk_same_position(x_isnan, y_isnan, hasval='nan') + + if equal_inf: + x_isinf, y_isinf = isinf(x), isinf(y) + # Validate that infinite values are in the same place + has_inf = any(x_isinf) or any(y_isinf) + if has_inf: + # Check +inf and -inf separately, since they are different + chk_same_position(x == +inf, y == +inf, hasval='+inf') + chk_same_position(x == -inf, y == -inf, hasval='-inf') + + if has_nan and has_inf: + x = x[~(x_isnan | x_isinf)] + y = y[~(y_isnan | y_isinf)] + elif has_nan: + x = x[~x_isnan] + y = y[~y_isnan] + elif has_inf: + x = x[~x_isinf] + y = y[~y_isinf] + + # Only do the comparison if actual values are left + if x.size == 0: + return + + elif istime(x) and istime(y): + # If one is datetime64 and the other timedelta64 there is no point + if equal_nan and x.dtype.type == y.dtype.type: + x_isnat, y_isnat = isnat(x), isnat(y) + + if any(x_isnat) or any(y_isnat): + chk_same_position(x_isnat, y_isnat, hasval="NaT") + + if any(x_isnat) or any(y_isnat): + x = x[~x_isnat] + y = y[~y_isnat] + + val = comparison(x, y) + + if isinstance(val, bool): + cond = val + reduced = [0] + else: + reduced = val.ravel() + cond = reduced.all() + reduced = reduced.tolist() + if not cond: + match = 100-100.0*reduced.count(1)/len(reduced) + msg = build_err_msg([x, y], + err_msg + + '\n(mismatch %s%%)' % (match,), + verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + if not cond: + raise AssertionError(msg) + except ValueError: + import traceback + efmt = traceback.format_exc() + header = 'error during assertion:\n\n%s\n\n%s' % (efmt, header) + + msg = build_err_msg([x, y], err_msg, verbose=verbose, header=header, + names=('x', 'y'), precision=precision) + raise ValueError(msg) + + +def assert_array_equal(x, y, err_msg='', verbose=True): + """ + Raises an AssertionError if two array_like objects are not equal. + + Given two array_like objects, check that the shape is equal and all + elements of these objects are equal. An exception is raised at + shape mismatch or conflicting values. In contrast to the standard usage + in numpy, NaNs are compared like numbers, no assertion is raised if + both objects have NaNs in the same positions. + + The usual caution for verifying equality with floating point numbers is + advised. + + Parameters + ---------- + x : array_like + The actual object to check. + y : array_like + The desired, expected object. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired objects are not equal. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + The first assert does not raise an exception: + + >>> np.testing.assert_array_equal([1.0,2.33333,np.nan], + ... [np.exp(0),2.33333, np.nan]) + + Assert fails with numerical inprecision with floats: + + >>> np.testing.assert_array_equal([1.0,np.pi,np.nan], + ... [1, np.sqrt(np.pi)**2, np.nan]) + ... + <type 'exceptions.ValueError'>: + AssertionError: + Arrays are not equal + <BLANKLINE> + (mismatch 50.0%) + x: array([ 1. , 3.14159265, NaN]) + y: array([ 1. , 3.14159265, NaN]) + + Use `assert_allclose` or one of the nulp (number of floating point values) + functions for these cases instead: + + >>> np.testing.assert_allclose([1.0,np.pi,np.nan], + ... [1, np.sqrt(np.pi)**2, np.nan], + ... rtol=1e-10, atol=0) + + """ + __tracebackhide__ = True # Hide traceback for py.test + assert_array_compare(operator.__eq__, x, y, err_msg=err_msg, + verbose=verbose, header='Arrays are not equal') + + +def assert_array_almost_equal(x, y, decimal=6, err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal up to desired + precision. + + .. note:: It is recommended to use one of `assert_allclose`, + `assert_array_almost_equal_nulp` or `assert_array_max_ulp` + instead of this function for more consistent floating point + comparisons. + + The test verifies identical shapes and that the elements of ``actual`` and + ``desired`` satisfy. + + ``abs(desired-actual) < 1.5 * 10**(-decimal)`` + + That is a looser test than originally documented, but agrees with what the + actual implementation did up to rounding vagaries. An exception is raised + at shape mismatch or conflicting values. In contrast to the standard usage + in numpy, NaNs are compared like numbers, no assertion is raised if both + objects have NaNs in the same positions. + + Parameters + ---------- + x : array_like + The actual object to check. + y : array_like + The desired, expected object. + decimal : int, optional + Desired precision, default is 6. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_allclose: Compare two array_like objects for equality with desired + relative and/or absolute precision. + assert_array_almost_equal_nulp, assert_array_max_ulp, assert_equal + + Examples + -------- + the first assert does not raise an exception + + >>> np.testing.assert_array_almost_equal([1.0,2.333,np.nan], + [1.0,2.333,np.nan]) + + >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan], + ... [1.0,2.33339,np.nan], decimal=5) + ... + <type 'exceptions.AssertionError'>: + AssertionError: + Arrays are not almost equal + <BLANKLINE> + (mismatch 50.0%) + x: array([ 1. , 2.33333, NaN]) + y: array([ 1. , 2.33339, NaN]) + + >>> np.testing.assert_array_almost_equal([1.0,2.33333,np.nan], + ... [1.0,2.33333, 5], decimal=5) + <type 'exceptions.ValueError'>: + ValueError: + Arrays are not almost equal + x: array([ 1. , 2.33333, NaN]) + y: array([ 1. , 2.33333, 5. ]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + from numpy.core import around, number, float_, result_type, array + from numpy.core.numerictypes import issubdtype + from numpy.core.fromnumeric import any as npany + + def compare(x, y): + try: + if npany(gisinf(x)) or npany( gisinf(y)): + xinfid = gisinf(x) + yinfid = gisinf(y) + if not (xinfid == yinfid).all(): + return False + # if one item, x and y is +- inf + if x.size == y.size == 1: + return x == y + x = x[~xinfid] + y = y[~yinfid] + except (TypeError, NotImplementedError): + pass + + # make sure y is an inexact type to avoid abs(MIN_INT); will cause + # casting of x later. + dtype = result_type(y, 1.) + y = array(y, dtype=dtype, copy=False, subok=True) + z = abs(x - y) + + if not issubdtype(z.dtype, number): + z = z.astype(float_) # handle object arrays + + return z < 1.5 * 10.0**(-decimal) + + assert_array_compare(compare, x, y, err_msg=err_msg, verbose=verbose, + header=('Arrays are not almost equal to %d decimals' % decimal), + precision=decimal) + + +def assert_array_less(x, y, err_msg='', verbose=True): + """ + Raises an AssertionError if two array_like objects are not ordered by less + than. + + Given two array_like objects, check that the shape is equal and all + elements of the first object are strictly smaller than those of the + second object. An exception is raised at shape mismatch or incorrectly + ordered values. Shape mismatch does not raise if an object has zero + dimension. In contrast to the standard usage in numpy, NaNs are + compared, no assertion is raised if both objects have NaNs in the same + positions. + + + + Parameters + ---------- + x : array_like + The smaller object to check. + y : array_like + The larger object to compare. + err_msg : string + The error message to be printed in case of failure. + verbose : bool + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired objects are not equal. + + See Also + -------- + assert_array_equal: tests objects for equality + assert_array_almost_equal: test objects for equality up to precision + + + + Examples + -------- + >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1.1, 2.0, np.nan]) + >>> np.testing.assert_array_less([1.0, 1.0, np.nan], [1, 2.0, np.nan]) + ... + <type 'exceptions.ValueError'>: + Arrays are not less-ordered + (mismatch 50.0%) + x: array([ 1., 1., NaN]) + y: array([ 1., 2., NaN]) + + >>> np.testing.assert_array_less([1.0, 4.0], 3) + ... + <type 'exceptions.ValueError'>: + Arrays are not less-ordered + (mismatch 50.0%) + x: array([ 1., 4.]) + y: array(3) + + >>> np.testing.assert_array_less([1.0, 2.0, 3.0], [4]) + ... + <type 'exceptions.ValueError'>: + Arrays are not less-ordered + (shapes (3,), (1,) mismatch) + x: array([ 1., 2., 3.]) + y: array([4]) + + """ + __tracebackhide__ = True # Hide traceback for py.test + assert_array_compare(operator.__lt__, x, y, err_msg=err_msg, + verbose=verbose, + header='Arrays are not less-ordered', + equal_inf=False) + + +def runstring(astr, dict): + exec(astr, dict) + + +def assert_string_equal(actual, desired): + """ + Test if two strings are equal. + + If the given strings are equal, `assert_string_equal` does nothing. + If they are not equal, an AssertionError is raised, and the diff + between the strings is shown. + + Parameters + ---------- + actual : str + The string to test for equality against the expected string. + desired : str + The expected string. + + Examples + -------- + >>> np.testing.assert_string_equal('abc', 'abc') + >>> np.testing.assert_string_equal('abc', 'abcd') + Traceback (most recent call last): + File "<stdin>", line 1, in <module> + ... + AssertionError: Differences in strings: + - abc+ abcd? + + + """ + # delay import of difflib to reduce startup time + __tracebackhide__ = True # Hide traceback for py.test + import difflib + + if not isinstance(actual, str): + raise AssertionError(repr(type(actual))) + if not isinstance(desired, str): + raise AssertionError(repr(type(desired))) + if re.match(r'\A'+desired+r'\Z', actual, re.M): + return + + diff = list(difflib.Differ().compare(actual.splitlines(1), desired.splitlines(1))) + diff_list = [] + while diff: + d1 = diff.pop(0) + if d1.startswith(' '): + continue + if d1.startswith('- '): + l = [d1] + d2 = diff.pop(0) + if d2.startswith('? '): + l.append(d2) + d2 = diff.pop(0) + if not d2.startswith('+ '): + raise AssertionError(repr(d2)) + l.append(d2) + if diff: + d3 = diff.pop(0) + if d3.startswith('? '): + l.append(d3) + else: + diff.insert(0, d3) + if re.match(r'\A'+d2[2:]+r'\Z', d1[2:]): + continue + diff_list.extend(l) + continue + raise AssertionError(repr(d1)) + if not diff_list: + return + msg = 'Differences in strings:\n%s' % (''.join(diff_list)).rstrip() + if actual != desired: + raise AssertionError(msg) + + +def rundocs(filename=None, raise_on_error=True): + """ + Run doctests found in the given file. + + By default `rundocs` raises an AssertionError on failure. + + Parameters + ---------- + filename : str + The path to the file for which the doctests are run. + raise_on_error : bool + Whether to raise an AssertionError when a doctest fails. Default is + True. + + Notes + ----- + The doctests can be run by the user/developer by adding the ``doctests`` + argument to the ``test()`` call. For example, to run all tests (including + doctests) for `numpy.lib`: + + >>> np.lib.test(doctests=True) #doctest: +SKIP + """ + from numpy.compat import npy_load_module + import doctest + if filename is None: + f = sys._getframe(1) + filename = f.f_globals['__file__'] + name = os.path.splitext(os.path.basename(filename))[0] + m = npy_load_module(name, filename) + + tests = doctest.DocTestFinder().find(m) + runner = doctest.DocTestRunner(verbose=False) + + msg = [] + if raise_on_error: + out = lambda s: msg.append(s) + else: + out = None + + for test in tests: + runner.run(test, out=out) + + if runner.failures > 0 and raise_on_error: + raise AssertionError("Some doctests failed:\n%s" % "\n".join(msg)) + + +def raises(*exceptions): + """ + This is actually a decorator and belongs in decorators.py. + + """ + import pytest + + def raises_decorator(f): + + def raiser(*args, **kwargs): + try: + f(*args, **kwargs) + except exceptions: + return + raise AssertionError() + + return raiser + + + return raises_decorator + + +def assert_raises(exception_class, fn=None, *args, **kwargs): + """ + assert_raises(exception_class, callable, *args, **kwargs) + assert_raises(exception_class) + + Fail unless an exception of class exception_class is thrown + by callable when invoked with arguments args and keyword + arguments kwargs. If a different type of exception is + thrown, it will not be caught, and the test case will be + deemed to have suffered an error, exactly as for an + unexpected exception. + + Alternatively, `assert_raises` can be used as a context manager: + + >>> from numpy.testing import assert_raises + >>> with assert_raises(ZeroDivisionError): + ... 1 / 0 + + is equivalent to + + >>> def div(x, y): + ... return x / y + >>> assert_raises(ZeroDivisionError, div, 1, 0) + + """ + import pytest + + __tracebackhide__ = True # Hide traceback for py.test + + if fn is not None: + pytest.raises(exception_class, fn, *args,**kwargs) + else: + @contextlib.contextmanager + def assert_raises_context(): + try: + yield + except BaseException as raised_exception: + assert isinstance(raised_exception, exception_class) + else: + raise ValueError('Function did not raise an exception') + + return assert_raises_context() + + +def assert_raises_regex(exception_class, expected_regexp, *args, **kwargs): + """ + assert_raises_regex(exception_class, expected_regexp, callable, *args, + **kwargs) + assert_raises_regex(exception_class, expected_regexp) + + Fail unless an exception of class exception_class and with message that + matches expected_regexp is thrown by callable when invoked with arguments + args and keyword arguments kwargs. + + Alternatively, can be used as a context manager like `assert_raises`. + + Name of this function adheres to Python 3.2+ reference, but should work in + all versions down to 2.6. + + Notes + ----- + .. versionadded:: 1.9.0 + + """ + import pytest + import unittest + + class Dummy(unittest.TestCase): + def do_nothing(self): + pass + + tmp = Dummy('do_nothing') + + __tracebackhide__ = True # Hide traceback for py.test + res = pytest.raises(exception_class, *args, **kwargs) + + if sys.version_info.major >= 3: + funcname = tmp.assertRaisesRegex + else: + # Only present in Python 2.7, missing from unittest in 2.6 + funcname = tmp.assertRaisesRegexp + + return funcname(exception_class, expected_regexp, *args, **kwargs) + + +def decorate_methods(cls, decorator, testmatch=None): + """ + Apply a decorator to all methods in a class matching a regular expression. + + The given decorator is applied to all public methods of `cls` that are + matched by the regular expression `testmatch` + (``testmatch.search(methodname)``). Methods that are private, i.e. start + with an underscore, are ignored. + + Parameters + ---------- + cls : class + Class whose methods to decorate. + decorator : function + Decorator to apply to methods + testmatch : compiled regexp or str, optional + The regular expression. Default value is None, in which case the + nose default (``re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep)``) + is used. + If `testmatch` is a string, it is compiled to a regular expression + first. + + """ + if testmatch is None: + testmatch = re.compile(r'(?:^|[\\b_\\.%s-])[Tt]est' % os.sep) + else: + testmatch = re.compile(testmatch) + cls_attr = cls.__dict__ + + # delayed import to reduce startup time + from inspect import isfunction + + methods = [_m for _m in cls_attr.values() if isfunction(_m)] + for function in methods: + try: + if hasattr(function, 'compat_func_name'): + funcname = function.compat_func_name + else: + funcname = function.__name__ + except AttributeError: + # not a function + continue + if testmatch.search(funcname) and not funcname.startswith('_'): + setattr(cls, funcname, decorator(function)) + return + + +def measure(code_str,times=1,label=None): + """ + Return elapsed time for executing code in the namespace of the caller. + + The supplied code string is compiled with the Python builtin ``compile``. + The precision of the timing is 10 milli-seconds. If the code will execute + fast on this timescale, it can be executed many times to get reasonable + timing accuracy. + + Parameters + ---------- + code_str : str + The code to be timed. + times : int, optional + The number of times the code is executed. Default is 1. The code is + only compiled once. + label : str, optional + A label to identify `code_str` with. This is passed into ``compile`` + as the second argument (for run-time error messages). + + Returns + ------- + elapsed : float + Total elapsed time in seconds for executing `code_str` `times` times. + + Examples + -------- + >>> etime = np.testing.measure('for i in range(1000): np.sqrt(i**2)', + ... times=times) + >>> print("Time for a single execution : ", etime / times, "s") + Time for a single execution : 0.005 s + + """ + frame = sys._getframe(1) + locs, globs = frame.f_locals, frame.f_globals + + code = compile(code_str, + 'Test name: %s ' % label, + 'exec') + i = 0 + elapsed = jiffies() + while i < times: + i += 1 + exec(code, globs, locs) + elapsed = jiffies() - elapsed + return 0.01*elapsed + + +def _assert_valid_refcount(op): + """ + Check that ufuncs don't mishandle refcount of object `1`. + Used in a few regression tests. + """ + if not HAS_REFCOUNT: + return True + import numpy as np + + b = np.arange(100*100).reshape(100, 100) + c = b + i = 1 + + rc = sys.getrefcount(i) + for j in range(15): + d = op(b, c) + assert_(sys.getrefcount(i) >= rc) + del d # for pyflakes + + +def assert_allclose(actual, desired, rtol=1e-7, atol=0, equal_nan=True, + err_msg='', verbose=True): + """ + Raises an AssertionError if two objects are not equal up to desired + tolerance. + + The test is equivalent to ``allclose(actual, desired, rtol, atol)``. + It compares the difference between `actual` and `desired` to + ``atol + rtol * abs(desired)``. + + .. versionadded:: 1.5.0 + + Parameters + ---------- + actual : array_like + Array obtained. + desired : array_like + Array desired. + rtol : float, optional + Relative tolerance. + atol : float, optional + Absolute tolerance. + equal_nan : bool, optional. + If True, NaNs will compare equal. + err_msg : str, optional + The error message to be printed in case of failure. + verbose : bool, optional + If True, the conflicting values are appended to the error message. + + Raises + ------ + AssertionError + If actual and desired are not equal up to specified precision. + + See Also + -------- + assert_array_almost_equal_nulp, assert_array_max_ulp + + Examples + -------- + >>> x = [1e-5, 1e-3, 1e-1] + >>> y = np.arccos(np.cos(x)) + >>> assert_allclose(x, y, rtol=1e-5, atol=0) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + + def compare(x, y): + return np.core.numeric.isclose(x, y, rtol=rtol, atol=atol, + equal_nan=equal_nan) + + actual, desired = np.asanyarray(actual), np.asanyarray(desired) + header = 'Not equal to tolerance rtol=%g, atol=%g' % (rtol, atol) + assert_array_compare(compare, actual, desired, err_msg=str(err_msg), + verbose=verbose, header=header, equal_nan=equal_nan) + + +def assert_array_almost_equal_nulp(x, y, nulp=1): + """ + Compare two arrays relatively to their spacing. + + This is a relatively robust method to compare two arrays whose amplitude + is variable. + + Parameters + ---------- + x, y : array_like + Input arrays. + nulp : int, optional + The maximum number of unit in the last place for tolerance (see Notes). + Default is 1. + + Returns + ------- + None + + Raises + ------ + AssertionError + If the spacing between `x` and `y` for one or more elements is larger + than `nulp`. + + See Also + -------- + assert_array_max_ulp : Check that all items of arrays differ in at most + N Units in the Last Place. + spacing : Return the distance between x and the nearest adjacent number. + + Notes + ----- + An assertion is raised if the following condition is not met:: + + abs(x - y) <= nulps * spacing(maximum(abs(x), abs(y))) + + Examples + -------- + >>> x = np.array([1., 1e-10, 1e-20]) + >>> eps = np.finfo(x.dtype).eps + >>> np.testing.assert_array_almost_equal_nulp(x, x*eps/2 + x) + + >>> np.testing.assert_array_almost_equal_nulp(x, x*eps + x) + Traceback (most recent call last): + ... + AssertionError: X and Y are not equal to 1 ULP (max is 2) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + ax = np.abs(x) + ay = np.abs(y) + ref = nulp * np.spacing(np.where(ax > ay, ax, ay)) + if not np.all(np.abs(x-y) <= ref): + if np.iscomplexobj(x) or np.iscomplexobj(y): + msg = "X and Y are not equal to %d ULP" % nulp + else: + max_nulp = np.max(nulp_diff(x, y)) + msg = "X and Y are not equal to %d ULP (max is %g)" % (nulp, max_nulp) + raise AssertionError(msg) + + +def assert_array_max_ulp(a, b, maxulp=1, dtype=None): + """ + Check that all items of arrays differ in at most N Units in the Last Place. + + Parameters + ---------- + a, b : array_like + Input arrays to be compared. + maxulp : int, optional + The maximum number of units in the last place that elements of `a` and + `b` can differ. Default is 1. + dtype : dtype, optional + Data-type to convert `a` and `b` to if given. Default is None. + + Returns + ------- + ret : ndarray + Array containing number of representable floating point numbers between + items in `a` and `b`. + + Raises + ------ + AssertionError + If one or more elements differ by more than `maxulp`. + + See Also + -------- + assert_array_almost_equal_nulp : Compare two arrays relatively to their + spacing. + + Examples + -------- + >>> a = np.linspace(0., 1., 100) + >>> res = np.testing.assert_array_max_ulp(a, np.arcsin(np.sin(a))) + + """ + __tracebackhide__ = True # Hide traceback for py.test + import numpy as np + ret = nulp_diff(a, b, dtype) + if not np.all(ret <= maxulp): + raise AssertionError("Arrays are not almost equal up to %g ULP" % + maxulp) + return ret + + +def nulp_diff(x, y, dtype=None): + """For each item in x and y, return the number of representable floating + points between them. + + Parameters + ---------- + x : array_like + first input array + y : array_like + second input array + dtype : dtype, optional + Data-type to convert `x` and `y` to if given. Default is None. + + Returns + ------- + nulp : array_like + number of representable floating point numbers between each item in x + and y. + + Examples + -------- + # By definition, epsilon is the smallest number such as 1 + eps != 1, so + # there should be exactly one ULP between 1 and 1 + eps + >>> nulp_diff(1, 1 + np.finfo(x.dtype).eps) + 1.0 + """ + import numpy as np + if dtype: + x = np.array(x, dtype=dtype) + y = np.array(y, dtype=dtype) + else: + x = np.array(x) + y = np.array(y) + + t = np.common_type(x, y) + if np.iscomplexobj(x) or np.iscomplexobj(y): + raise NotImplementedError("_nulp not implemented for complex array") + + x = np.array(x, dtype=t) + y = np.array(y, dtype=t) + + if not x.shape == y.shape: + raise ValueError("x and y do not have the same shape: %s - %s" % + (x.shape, y.shape)) + + def _diff(rx, ry, vdt): + diff = np.array(rx-ry, dtype=vdt) + return np.abs(diff) + + rx = integer_repr(x) + ry = integer_repr(y) + return _diff(rx, ry, t) + + +def _integer_repr(x, vdt, comp): + # Reinterpret binary representation of the float as sign-magnitude: + # take into account two-complement representation + # See also + # http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm + rx = x.view(vdt) + if not (rx.size == 1): + rx[rx < 0] = comp - rx[rx < 0] + else: + if rx < 0: + rx = comp - rx + + return rx + + +def integer_repr(x): + """Return the signed-magnitude interpretation of the binary representation of + x.""" + import numpy as np + if x.dtype == np.float32: + return _integer_repr(x, np.int32, np.int32(-2**31)) + elif x.dtype == np.float64: + return _integer_repr(x, np.int64, np.int64(-2**63)) + else: + raise ValueError("Unsupported dtype %s" % x.dtype) + + +# The following two classes are copied from python 2.6 warnings module (context +# manager) +class WarningMessage(object): + + """ + Holds the result of a single showwarning() call. + + Deprecated in 1.8.0 + + Notes + ----- + `WarningMessage` is copied from the Python 2.6 warnings module, + so it can be used in NumPy with older Python versions. + + """ + + _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", + "line") + + def __init__(self, message, category, filename, lineno, file=None, + line=None): + local_values = locals() + for attr in self._WARNING_DETAILS: + setattr(self, attr, local_values[attr]) + if category: + self._category_name = category.__name__ + else: + self._category_name = None + + def __str__(self): + return ("{message : %r, category : %r, filename : %r, lineno : %s, " + "line : %r}" % (self.message, self._category_name, + self.filename, self.lineno, self.line)) + + +class WarningManager(object): + """ + A context manager that copies and restores the warnings filter upon + exiting the context. + + The 'record' argument specifies whether warnings should be captured by a + custom implementation of ``warnings.showwarning()`` and be appended to a + list returned by the context manager. Otherwise None is returned by the + context manager. The objects appended to the list are arguments whose + attributes mirror the arguments to ``showwarning()``. + + The 'module' argument is to specify an alternative module to the module + named 'warnings' and imported under that name. This argument is only useful + when testing the warnings module itself. + + Deprecated in 1.8.0 + + Notes + ----- + `WarningManager` is a copy of the ``catch_warnings`` context manager + from the Python 2.6 warnings module, with slight modifications. + It is copied so it can be used in NumPy with older Python versions. + + """ + + def __init__(self, record=False, module=None): + self._record = record + if module is None: + self._module = sys.modules['warnings'] + else: + self._module = module + self._entered = False + + def __enter__(self): + if self._entered: + raise RuntimeError("Cannot enter %r twice" % self) + self._entered = True + self._filters = self._module.filters + self._module.filters = self._filters[:] + self._showwarning = self._module.showwarning + if self._record: + log = [] + + def showwarning(*args, **kwargs): + log.append(WarningMessage(*args, **kwargs)) + self._module.showwarning = showwarning + return log + else: + return None + + def __exit__(self): + if not self._entered: + raise RuntimeError("Cannot exit %r without entering first" % self) + self._module.filters = self._filters + self._module.showwarning = self._showwarning + + +@contextlib.contextmanager +def _assert_warns_context(warning_class, name=None): + __tracebackhide__ = True # Hide traceback for py.test + with suppress_warnings() as sup: + l = sup.record(warning_class) + yield + if not len(l) > 0: + name_str = " when calling %s" % name if name is not None else "" + raise AssertionError("No warning raised" + name_str) + + +def assert_warns(warning_class, *args, **kwargs): + """ + Fail unless the given callable throws the specified warning. + + A warning of class warning_class should be thrown by the callable when + invoked with arguments args and keyword arguments kwargs. + If a different type of warning is thrown, it will not be caught. + + If called with all arguments other than the warning class omitted, may be + used as a context manager: + + with assert_warns(SomeWarning): + do_something() + + The ability to be used as a context manager is new in NumPy v1.11.0. + + .. versionadded:: 1.4.0 + + Parameters + ---------- + warning_class : class + The class defining the warning that `func` is expected to throw. + func : callable + The callable to test. + \\*args : Arguments + Arguments passed to `func`. + \\*\\*kwargs : Kwargs + Keyword arguments passed to `func`. + + Returns + ------- + The value returned by `func`. + + """ + if not args: + return _assert_warns_context(warning_class) + + func = args[0] + args = args[1:] + with _assert_warns_context(warning_class, name=func.__name__): + return func(*args, **kwargs) + + +@contextlib.contextmanager +def _assert_no_warnings_context(name=None): + __tracebackhide__ = True # Hide traceback for py.test + with warnings.catch_warnings(record=True) as l: + warnings.simplefilter('always') + yield + if len(l) > 0: + name_str = " when calling %s" % name if name is not None else "" + raise AssertionError("Got warnings%s: %s" % (name_str, l)) + + +def assert_no_warnings(*args, **kwargs): + """ + Fail if the given callable produces any warnings. + + If called with all arguments omitted, may be used as a context manager: + + with assert_no_warnings(): + do_something() + + The ability to be used as a context manager is new in NumPy v1.11.0. + + .. versionadded:: 1.7.0 + + Parameters + ---------- + func : callable + The callable to test. + \\*args : Arguments + Arguments passed to `func`. + \\*\\*kwargs : Kwargs + Keyword arguments passed to `func`. + + Returns + ------- + The value returned by `func`. + + """ + if not args: + return _assert_no_warnings_context() + + func = args[0] + args = args[1:] + with _assert_no_warnings_context(name=func.__name__): + return func(*args, **kwargs) + + +def _gen_alignment_data(dtype=float32, type='binary', max_size=24): + """ + generator producing data with different alignment and offsets + to test simd vectorization + + Parameters + ---------- + dtype : dtype + data type to produce + type : string + 'unary': create data for unary operations, creates one input + and output array + 'binary': create data for unary operations, creates two input + and output array + max_size : integer + maximum size of data to produce + + Returns + ------- + if type is 'unary' yields one output, one input array and a message + containing information on the data + if type is 'binary' yields one output array, two input array and a message + containing information on the data + + """ + ufmt = 'unary offset=(%d, %d), size=%d, dtype=%r, %s' + bfmt = 'binary offset=(%d, %d, %d), size=%d, dtype=%r, %s' + for o in range(3): + for s in range(o + 2, max(o + 3, max_size)): + if type == 'unary': + inp = lambda: arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp(), ufmt % (o, o, s, dtype, 'out of place') + d = inp() + yield d, d, ufmt % (o, o, s, dtype, 'in place') + yield out[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'out of place') + yield out[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'out of place') + yield inp()[:-1], inp()[1:], ufmt % \ + (o, o + 1, s - 1, dtype, 'aliased') + yield inp()[1:], inp()[:-1], ufmt % \ + (o + 1, o, s - 1, dtype, 'aliased') + if type == 'binary': + inp1 = lambda: arange(s, dtype=dtype)[o:] + inp2 = lambda: arange(s, dtype=dtype)[o:] + out = empty((s,), dtype=dtype)[o:] + yield out, inp1(), inp2(), bfmt % \ + (o, o, o, s, dtype, 'out of place') + d = inp1() + yield d, d, inp2(), bfmt % \ + (o, o, o, s, dtype, 'in place1') + d = inp2() + yield d, inp1(), d, bfmt % \ + (o, o, o, s, dtype, 'in place2') + yield out[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'out of place') + yield out[:-1], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'out of place') + yield out[:-1], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'out of place') + yield inp1()[1:], inp1()[:-1], inp2()[:-1], bfmt % \ + (o + 1, o, o, s - 1, dtype, 'aliased') + yield inp1()[:-1], inp1()[1:], inp2()[:-1], bfmt % \ + (o, o + 1, o, s - 1, dtype, 'aliased') + yield inp1()[:-1], inp1()[:-1], inp2()[1:], bfmt % \ + (o, o, o + 1, s - 1, dtype, 'aliased') + + + +@contextlib.contextmanager +def tempdir(*args, **kwargs): + """Context manager to provide a temporary test folder. + + All arguments are passed as this to the underlying tempfile.mkdtemp + function. + + """ + tmpdir = mkdtemp(*args, **kwargs) + try: + yield tmpdir + finally: + shutil.rmtree(tmpdir) + + +@contextlib.contextmanager +def temppath(*args, **kwargs): + """Context manager for temporary files. + + Context manager that returns the path to a closed temporary file. Its + parameters are the same as for tempfile.mkstemp and are passed directly + to that function. The underlying file is removed when the context is + exited, so it should be closed at that time. + + Windows does not allow a temporary file to be opened if it is already + open, so the underlying file must be closed after opening before it + can be opened again. + + """ + fd, path = mkstemp(*args, **kwargs) + os.close(fd) + try: + yield path + finally: + os.remove(path) + + +class clear_and_catch_warnings(warnings.catch_warnings): + """ Context manager that resets warning registry for catching warnings + + Warnings can be slippery, because, whenever a warning is triggered, Python + adds a ``__warningregistry__`` member to the *calling* module. This makes + it impossible to retrigger the warning in this module, whatever you put in + the warnings filters. This context manager accepts a sequence of `modules` + as a keyword argument to its constructor and: + + * stores and removes any ``__warningregistry__`` entries in given `modules` + on entry; + * resets ``__warningregistry__`` to its previous state on exit. + + This makes it possible to trigger any warning afresh inside the context + manager without disturbing the state of warnings outside. + + For compatibility with Python 3.0, please consider all arguments to be + keyword-only. + + Parameters + ---------- + record : bool, optional + Specifies whether warnings should be captured by a custom + implementation of ``warnings.showwarning()`` and be appended to a list + returned by the context manager. Otherwise None is returned by the + context manager. The objects appended to the list are arguments whose + attributes mirror the arguments to ``showwarning()``. + modules : sequence, optional + Sequence of modules for which to reset warnings registry on entry and + restore on exit. To work correctly, all 'ignore' filters should + filter by one of these modules. + + Examples + -------- + >>> import warnings + >>> with clear_and_catch_warnings(modules=[np.core.fromnumeric]): + ... warnings.simplefilter('always') + ... warnings.filterwarnings('ignore', module='np.core.fromnumeric') + ... # do something that raises a warning but ignore those in + ... # np.core.fromnumeric + """ + class_modules = () + + def __init__(self, record=False, modules=()): + self.modules = set(modules).union(self.class_modules) + self._warnreg_copies = {} + super(clear_and_catch_warnings, self).__init__(record=record) + + def __enter__(self): + for mod in self.modules: + if hasattr(mod, '__warningregistry__'): + mod_reg = mod.__warningregistry__ + self._warnreg_copies[mod] = mod_reg.copy() + mod_reg.clear() + return super(clear_and_catch_warnings, self).__enter__() + + def __exit__(self, *exc_info): + super(clear_and_catch_warnings, self).__exit__(*exc_info) + for mod in self.modules: + if hasattr(mod, '__warningregistry__'): + mod.__warningregistry__.clear() + if mod in self._warnreg_copies: + mod.__warningregistry__.update(self._warnreg_copies[mod]) + + +class suppress_warnings(object): + """ + Context manager and decorator doing much the same as + ``warnings.catch_warnings``. + + However, it also provides a filter mechanism to work around + http://bugs.python.org/issue4180. + + This bug causes Python before 3.4 to not reliably show warnings again + after they have been ignored once (even within catch_warnings). It + means that no "ignore" filter can be used easily, since following + tests might need to see the warning. Additionally it allows easier + specificity for testing warnings and can be nested. + + Parameters + ---------- + forwarding_rule : str, optional + One of "always", "once", "module", or "location". Analogous to + the usual warnings module filter mode, it is useful to reduce + noise mostly on the outmost level. Unsuppressed and unrecorded + warnings will be forwarded based on this rule. Defaults to "always". + "location" is equivalent to the warnings "default", match by exact + location the warning warning originated from. + + Notes + ----- + Filters added inside the context manager will be discarded again + when leaving it. Upon entering all filters defined outside a + context will be applied automatically. + + When a recording filter is added, matching warnings are stored in the + ``log`` attribute as well as in the list returned by ``record``. + + If filters are added and the ``module`` keyword is given, the + warning registry of this module will additionally be cleared when + applying it, entering the context, or exiting it. This could cause + warnings to appear a second time after leaving the context if they + were configured to be printed once (default) and were already + printed before the context was entered. + + Nesting this context manager will work as expected when the + forwarding rule is "always" (default). Unfiltered and unrecorded + warnings will be passed out and be matched by the outer level. + On the outmost level they will be printed (or caught by another + warnings context). The forwarding rule argument can modify this + behaviour. + + Like ``catch_warnings`` this context manager is not threadsafe. + + Examples + -------- + >>> with suppress_warnings() as sup: + ... sup.filter(DeprecationWarning, "Some text") + ... sup.filter(module=np.ma.core) + ... log = sup.record(FutureWarning, "Does this occur?") + ... command_giving_warnings() + ... # The FutureWarning was given once, the filtered warnings were + ... # ignored. All other warnings abide outside settings (may be + ... # printed/error) + ... assert_(len(log) == 1) + ... assert_(len(sup.log) == 1) # also stored in log attribute + + Or as a decorator: + + >>> sup = suppress_warnings() + >>> sup.filter(module=np.ma.core) # module must match exact + >>> @sup + >>> def some_function(): + ... # do something which causes a warning in np.ma.core + ... pass + """ + def __init__(self, forwarding_rule="always"): + self._entered = False + + # Suppressions are either instance or defined inside one with block: + self._suppressions = [] + + if forwarding_rule not in {"always", "module", "once", "location"}: + raise ValueError("unsupported forwarding rule.") + self._forwarding_rule = forwarding_rule + + def _clear_registries(self): + if hasattr(warnings, "_filters_mutated"): + # clearing the registry should not be necessary on new pythons, + # instead the filters should be mutated. + warnings._filters_mutated() + return + # Simply clear the registry, this should normally be harmless, + # note that on new pythons it would be invalidated anyway. + for module in self._tmp_modules: + if hasattr(module, "__warningregistry__"): + module.__warningregistry__.clear() + + def _filter(self, category=Warning, message="", module=None, record=False): + if record: + record = [] # The log where to store warnings + else: + record = None + if self._entered: + if module is None: + warnings.filterwarnings( + "always", category=category, message=message) + else: + module_regex = module.__name__.replace('.', r'\.') + '$' + warnings.filterwarnings( + "always", category=category, message=message, + module=module_regex) + self._tmp_modules.add(module) + self._clear_registries() + + self._tmp_suppressions.append( + (category, message, re.compile(message, re.I), module, record)) + else: + self._suppressions.append( + (category, message, re.compile(message, re.I), module, record)) + + return record + + def filter(self, category=Warning, message="", module=None): + """ + Add a new suppressing filter or apply it if the state is entered. + + Parameters + ---------- + category : class, optional + Warning class to filter + message : string, optional + Regular expression matching the warning message. + module : module, optional + Module to filter for. Note that the module (and its file) + must match exactly and cannot be a submodule. This may make + it unreliable for external modules. + + Notes + ----- + When added within a context, filters are only added inside + the context and will be forgotten when the context is exited. + """ + self._filter(category=category, message=message, module=module, + record=False) + + def record(self, category=Warning, message="", module=None): + """ + Append a new recording filter or apply it if the state is entered. + + All warnings matching will be appended to the ``log`` attribute. + + Parameters + ---------- + category : class, optional + Warning class to filter + message : string, optional + Regular expression matching the warning message. + module : module, optional + Module to filter for. Note that the module (and its file) + must match exactly and cannot be a submodule. This may make + it unreliable for external modules. + + Returns + ------- + log : list + A list which will be filled with all matched warnings. + + Notes + ----- + When added within a context, filters are only added inside + the context and will be forgotten when the context is exited. + """ + return self._filter(category=category, message=message, module=module, + record=True) + + def __enter__(self): + if self._entered: + raise RuntimeError("cannot enter suppress_warnings twice.") + + self._orig_show = warnings.showwarning + self._filters = warnings.filters + warnings.filters = self._filters[:] + + self._entered = True + self._tmp_suppressions = [] + self._tmp_modules = set() + self._forwarded = set() + + self.log = [] # reset global log (no need to keep same list) + + for cat, mess, _, mod, log in self._suppressions: + if log is not None: + del log[:] # clear the log + if mod is None: + warnings.filterwarnings( + "always", category=cat, message=mess) + else: + module_regex = mod.__name__.replace('.', r'\.') + '$' + warnings.filterwarnings( + "always", category=cat, message=mess, + module=module_regex) + self._tmp_modules.add(mod) + warnings.showwarning = self._showwarning + self._clear_registries() + + return self + + def __exit__(self, *exc_info): + warnings.showwarning = self._orig_show + warnings.filters = self._filters + self._clear_registries() + self._entered = False + del self._orig_show + del self._filters + + def _showwarning(self, message, category, filename, lineno, + *args, **kwargs): + use_warnmsg = kwargs.pop("use_warnmsg", None) + for cat, _, pattern, mod, rec in ( + self._suppressions + self._tmp_suppressions)[::-1]: + if (issubclass(category, cat) and + pattern.match(message.args[0]) is not None): + if mod is None: + # Message and category match, either recorded or ignored + if rec is not None: + msg = WarningMessage(message, category, filename, + lineno, **kwargs) + self.log.append(msg) + rec.append(msg) + return + # Use startswith, because warnings strips the c or o from + # .pyc/.pyo files. + elif mod.__file__.startswith(filename): + # The message and module (filename) match + if rec is not None: + msg = WarningMessage(message, category, filename, + lineno, **kwargs) + self.log.append(msg) + rec.append(msg) + return + + # There is no filter in place, so pass to the outside handler + # unless we should only pass it once + if self._forwarding_rule == "always": + if use_warnmsg is None: + self._orig_show(message, category, filename, lineno, + *args, **kwargs) + else: + self._orig_showmsg(use_warnmsg) + return + + if self._forwarding_rule == "once": + signature = (message.args, category) + elif self._forwarding_rule == "module": + signature = (message.args, category, filename) + elif self._forwarding_rule == "location": + signature = (message.args, category, filename, lineno) + + if signature in self._forwarded: + return + self._forwarded.add(signature) + if use_warnmsg is None: + self._orig_show(message, category, filename, lineno, *args, + **kwargs) + else: + self._orig_showmsg(use_warnmsg) + + def __call__(self, func): + """ + Function decorator to apply certain suppressions to a whole + function. + """ + @wraps(func) + def new_func(*args, **kwargs): + with self: + return func(*args, **kwargs) + + return new_func diff --git a/numpy/testing/setup.py b/numpy/testing/setup.py index a5e9656a3..5a0f977d9 100755 --- a/numpy/testing/setup.py +++ b/numpy/testing/setup.py @@ -7,6 +7,7 @@ def configuration(parent_package='',top_path=None): config = Configuration('testing', parent_package, top_path) config.add_subpackage('nose_tools') + config.add_subpackage('pytest_tools') config.add_data_dir('tests') return config diff --git a/numpy/testing/tests/test_decorators.py b/numpy/testing/tests/test_decorators.py index 1258a9296..62329ab7d 100644 --- a/numpy/testing/tests/test_decorators.py +++ b/numpy/testing/tests/test_decorators.py @@ -48,7 +48,7 @@ def test_skip_functions_hardcoded(): f1('a') except DidntSkipException: raise Exception('Failed to skip') - except SkipTest: + except SkipTest().__class__: pass @dec.skipif(False) @@ -59,7 +59,7 @@ def test_skip_functions_hardcoded(): f2('a') except DidntSkipException: pass - except SkipTest: + except SkipTest().__class__: raise Exception('Skipped when not expected to') @@ -76,7 +76,7 @@ def test_skip_functions_callable(): f1('a') except DidntSkipException: raise Exception('Failed to skip') - except SkipTest: + except SkipTest().__class__: pass @dec.skipif(skip_tester) @@ -88,7 +88,7 @@ def test_skip_functions_callable(): f2('a') except DidntSkipException: pass - except SkipTest: + except SkipTest().__class__: raise Exception('Skipped when not expected to') @@ -101,7 +101,7 @@ def test_skip_generators_hardcoded(): try: for j in g1(10): pass - except KnownFailureException: + except KnownFailureException().__class__: pass else: raise Exception('Failed to mark as known failure') @@ -115,7 +115,7 @@ def test_skip_generators_hardcoded(): try: for j in g2(10): pass - except KnownFailureException: + except KnownFailureException().__class__: raise Exception('Marked incorrectly as known failure') except DidntSkipException: pass @@ -134,7 +134,7 @@ def test_skip_generators_callable(): skip_flag = 'skip me!' for j in g1(10): pass - except KnownFailureException: + except KnownFailureException().__class__: pass else: raise Exception('Failed to mark as known failure') @@ -149,7 +149,7 @@ def test_skip_generators_callable(): skip_flag = 'do not skip' for j in g2(10): pass - except KnownFailureException: + except KnownFailureException().__class__: raise Exception('Marked incorrectly as known failure') except DidntSkipException: pass diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index 7ecb68f47..a0218c4e6 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -3,6 +3,10 @@ Back compatibility utils module. It will import the appropriate set of tools """ +import os + +from .nose_tools.utils import * + __all__ = [ 'assert_equal', 'assert_almost_equal', 'assert_approx_equal', 'assert_array_equal', 'assert_array_less', 'assert_string_equal', @@ -16,5 +20,3 @@ __all__ = [ 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', '_assert_valid_refcount', '_gen_alignment_data', ] - -from .nose_tools.utils import * |
