diff options
37 files changed, 961 insertions, 268 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst index d076074ce..a6b23b892 100644 --- a/doc/release/1.15.0-notes.rst +++ b/doc/release/1.15.0-notes.rst @@ -16,6 +16,9 @@ New functions common multiple. * `np.ma.stack`, the `np.stack` array-joining function generalized to masked arrays. +* ``quantile`` function, an interface to ``percentile`` without factors of 100 +* ``nanquantile`` function, an interface to ``nanpercentile`` without factors + of 100 * `np.printoptions`, the context manager which sets print options temporarily for the scope of the ``with`` block:: @@ -141,6 +144,13 @@ Creating a full iOS-compatible NumPy package requires building for the 5 architectures supported by iOS (i386, x86_64, armv7, armv7s and arm64), and combining these 5 compiled builds products into a single "fat" binary. +``np.quantile`` and ``np.nanquantile`` +-------------------------------------- +Like ``np.percentile`` and ``np.nanpercentile``, but takes quantiles in [0, 1] +rather than percentiles in [0, 100]. ``np.percentile`` is now a thin wrapper +around ``np.quantile`` with the extra step of dividing by 100. + + Build system ------------ Added experimental support for the 64-bit RISC-V architecture. @@ -148,6 +158,12 @@ Added experimental support for the 64-bit RISC-V architecture. Improvements ============ +``np.ufunc.reduce`` and related functions now accept an initial value +--------------------------------------------------------------------- +``np.ufunc.reduce``, ``np.sum``, ``np.prod``, ``np.min`` and ``np.max`` all +now accept an ``initial`` keyword argument that specifies the value to start +the reduction with. + ``np.flip`` can operate over multiple axes ------------------------------------------ ``np.flip`` now accepts None, or tuples of int, in its ``axis`` argument. If diff --git a/doc/source/reference/arrays.classes.rst b/doc/source/reference/arrays.classes.rst index 2719f9239..f17cb932a 100644 --- a/doc/source/reference/arrays.classes.rst +++ b/doc/source/reference/arrays.classes.rst @@ -215,6 +215,13 @@ Matrix objects .. index:: single: matrix +.. note:: + It is strongly advised *not* to use the matrix subclass. As described + below, it makes writing functions that deal consistently with matrices + and regular arrays very difficult. Currently, they are mainly used for + interacting with ``scipy.sparse``. We hope to provide an alternative + for this use, however, and eventually remove the ``matrix`` subclass. + :class:`matrix` objects inherit from the ndarray and therefore, they have the same attributes and methods of ndarrays. There are six important differences of matrix objects, however, that may lead to diff --git a/doc/source/reference/arrays.nditer.rst b/doc/source/reference/arrays.nditer.rst index 911ff6671..acad29b11 100644 --- a/doc/source/reference/arrays.nditer.rst +++ b/doc/source/reference/arrays.nditer.rst @@ -394,10 +394,10 @@ parameter support. .. admonition:: Example >>> def square(a): - ... it = np.nditer([a, None]) - ... for x, y in it: - ... y[...] = x*x - ... return it.operands[1] + ... with np.nditer([a, None]) as it: + ... for x, y in it: + ... y[...] = x*x + ... return it.operands[1] ... >>> square([1,2,3]) array([1, 4, 9]) @@ -490,10 +490,12 @@ Everything to do with the outer product is handled by the iterator setup. >>> b = np.arange(8).reshape(2,4) >>> it = np.nditer([a, b, None], flags=['external_loop'], ... op_axes=[[0, -1, -1], [-1, 0, 1], None]) - >>> for x, y, z in it: - ... z[...] = x*y + >>> with it: + ... for x, y, z in it: + ... z[...] = x*y + ... result = it.operands[2] # same as z ... - >>> it.operands[2] + >>> result array([[[ 0, 0, 0, 0], [ 0, 0, 0, 0]], [[ 0, 1, 2, 3], @@ -501,6 +503,9 @@ Everything to do with the outer product is handled by the iterator setup. [[ 0, 2, 4, 6], [ 8, 10, 12, 14]]]) +Note that once the iterator is closed we can not access :func:`operands <nditer.operands>` +and must use a reference created inside the context manager. + Reduction Iteration ------------------- @@ -540,8 +545,9 @@ sums along the last axis of `a`. ... it.operands[1][...] = 0 ... for x, y in it: ... y[...] += x + ... result = it.operands[1] ... - ... it.operands[1] + >>> result array([[ 6, 22, 38], [54, 70, 86]]) >>> np.sum(a, axis=2) @@ -575,8 +581,9 @@ buffering. ... it.reset() ... for x, y in it: ... y[...] += x + ... result = it.operands[1] ... - ... it.operands[1] + >>> result array([[ 6, 22, 38], [54, 70, 86]]) diff --git a/doc/source/reference/c-api.array.rst b/doc/source/reference/c-api.array.rst index ad7c725a8..5ea7bfcfc 100644 --- a/doc/source/reference/c-api.array.rst +++ b/doc/source/reference/c-api.array.rst @@ -1360,7 +1360,7 @@ Special functions for NPY_OBJECT .. c:function:: int PyArray_SetWritebackIfCopyBase(PyArrayObject* arr, PyArrayObject* base) Precondition: ``arr`` is a copy of ``base`` (though possibly with different - strides, ordering, etc.) Sets the :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` flag + strides, ordering, etc.) Sets the :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` flag and ``arr->base``, and set ``base`` to READONLY. Call :c:func:`PyArray_ResolveWritebackIfCopy` before calling `Py_DECREF`` in order copy any changes back to ``base`` and @@ -3260,12 +3260,14 @@ Memory management .. c:function:: int PyArray_ResolveWritebackIfCopy(PyArrayObject* obj) If ``obj.flags`` has :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` or (deprecated) - :c:data:`NPY_ARRAY_UPDATEIFCOPY`, this function copies ``obj->data`` to - `obj->base->data`, clears the flags, `DECREF` s `obj->base` and makes it - writeable, and sets ``obj->base`` to NULL. This is the opposite of + :c:data:`NPY_ARRAY_UPDATEIFCOPY`, this function clears the flags, `DECREF` s + `obj->base` and makes it writeable, and sets ``obj->base`` to NULL. It then + copies ``obj->data`` to `obj->base->data`, and returns the error state of + the copy operation. This is the opposite of :c:func:`PyArray_SetWritebackIfCopyBase`. Usually this is called once you are finished with ``obj``, just before ``Py_DECREF(obj)``. It may be called - multiple times, or with ``NULL`` input. + multiple times, or with ``NULL`` input. See also + :c:func:`PyArray_DiscardWritebackIfCopy`. Returns 0 if nothing was done, -1 on error, and 1 if action was taken. @@ -3487,12 +3489,14 @@ Miscellaneous Macros .. c:function:: PyArray_DiscardWritebackIfCopy(PyObject* obj) - Reset the :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` and deprecated - :c:data:`NPY_ARRAY_UPDATEIFCOPY` flag. Resets the - :c:data:`NPY_ARRAY_WRITEABLE` flag on the base object. It also - discards pending changes to the base object. This is - useful for recovering from an error condition when - writeback semantics are used. + If ``obj.flags`` has :c:data:`NPY_ARRAY_WRITEBACKIFCOPY` or (deprecated) + :c:data:`NPY_ARRAY_UPDATEIFCOPY`, this function clears the flags, `DECREF` s + `obj->base` and makes it writeable, and sets ``obj->base`` to NULL. In + contrast to :c:func:`PyArray_DiscardWritebackIfCopy` it makes no attempt + to copy the data from `obj->base` This undoes + :c:func:`PyArray_SetWritebackIfCopyBase`. Usually this is called after an + error when you are finished with ``obj``, just before ``Py_DECREF(obj)``. + It may be called multiple times, or with ``NULL`` input. .. c:function:: PyArray_XDECREF_ERR(PyObject* obj) diff --git a/doc/source/reference/c-api.iterator.rst b/doc/source/reference/c-api.iterator.rst index 314b62a16..17f1c45f2 100644 --- a/doc/source/reference/c-api.iterator.rst +++ b/doc/source/reference/c-api.iterator.rst @@ -773,7 +773,7 @@ Construction and Destruction `NpyIter_Close` should be called before this. If not, and if writeback is needed, it will be performed at this point in order to maintain backward-compatibility with older code, and a deprecation warning will be - emmitted. Old code shuold be updated to call `NpyIter_Close` beforehand. + emitted. Old code should be updated to call `NpyIter_Close` beforehand. Returns ``NPY_SUCCEED`` or ``NPY_FAIL``. diff --git a/doc/source/reference/routines.statistics.rst b/doc/source/reference/routines.statistics.rst index d359541aa..e287fe9c8 100644 --- a/doc/source/reference/routines.statistics.rst +++ b/doc/source/reference/routines.statistics.rst @@ -17,6 +17,8 @@ Order statistics ptp percentile nanpercentile + quantile + nanquantile Averages and variances ---------------------- diff --git a/doc/source/user/numpy-for-matlab-users.rst b/doc/source/user/numpy-for-matlab-users.rst index ae379624e..475c68c04 100644 --- a/doc/source/user/numpy-for-matlab-users.rst +++ b/doc/source/user/numpy-for-matlab-users.rst @@ -32,9 +32,9 @@ Some Key Differences in linear algebra. - In NumPy the basic type is a multidimensional ``array``. Operations on these arrays in all dimensionalities including 2D are element-wise - operations. However, there is a special ``matrix`` type for doing - linear algebra, which is just a subclass of the ``array`` class. - Operations on matrix-class arrays are linear algebra operations. + operations. One needs to use specific functions for linear algebra + (though for matrix multiplication, one can use the ``@`` operator + in python 3.5 and above). * - MATLABĀ® uses 1 (one) based indexing. The initial element of a sequence is found using a(1). @@ -50,8 +50,8 @@ Some Key Differences an excellent general-purpose programming language. While Matlab's syntax for some array manipulations is more compact than NumPy's, NumPy (by virtue of being an add-on to Python) can do many - things that Matlab just cannot, for instance subclassing the main - array type to do both array and matrix math cleanly. + things that Matlab just cannot, for instance dealing properly with + stacks of matrices. * - In MATLABĀ®, arrays have pass-by-value semantics, with a lazy copy-on-write scheme to prevent actually creating copies until they @@ -63,8 +63,10 @@ Some Key Differences 'array' or 'matrix'? Which should I use? ======================================== -NumPy provides, in addition to ``np.ndarray``, an additional matrix type -that you may see used in some existing code. Which one to use? +Historically, NumPy has provided a special matrix type, `np.matrix`, which +is a subclass of ndarray which makes binary operations linear algebra +operations. You may see it used in some existing code instead of `np.array`. +So, which one to use? Short answer ------------ @@ -82,6 +84,8 @@ had to use ``dot`` instead of ``*`` to multiply (reduce) two tensors (scalar product, matrix vector multiplication etc.). Since Python 3.5 you can use the matrix multiplication ``@`` operator. +Given the above, we intend to deprecate ``matrix`` eventually. + Long answer ----------- @@ -91,12 +95,14 @@ for many kinds of numerical computing, while ``matrix`` is intended to facilitate linear algebra computations specifically. In practice there are only a handful of key differences between the two. -- Operator ``*``, ``dot()``, and ``multiply()``: +- Operators ``*`` and ``@``, functions ``dot()``, and ``multiply()``: - - For ``array``, **'``*``\ ' means element-wise multiplication**, - and the ``dot()`` function is used for matrix multiplication. - - For ``matrix``, **'``*``\ ' means matrix multiplication**, and the - ``multiply()`` function is used for element-wise multiplication. + - For ``array``, **``*`` means element-wise multiplication**, while + **``@`` means matrix multiplication**; they have associated functions + ``multiply()`` and ``dot()``. (Before python 3.5, ``@`` did not exist + and one had to use ``dot()`` for matrix multiplication). + - For ``matrix``, **``*`` means matrix multiplication**, and for + element-wise multiplication one has to use the ``multiply()`` function. - Handling of vectors (one-dimensional arrays) @@ -132,15 +138,13 @@ There are pros and cons to using both: - ``array`` + - ``:)`` Element-wise multiplication is easy: ``A*B``. + - ``:(`` You have to remember that matrix multiplication has its own + operator, ``@``. - ``:)`` You can treat one-dimensional arrays as *either* row or column - vectors. ``dot(A,v)`` treats ``v`` as a column vector, while - ``dot(v,A)`` treats ``v`` as a row vector. This can save you having to + vectors. ``A @ v`` treats ``v`` as a column vector, while + ``v @ A`` treats ``v`` as a row vector. This can save you having to type a lot of transposes. - - ``<:(`` Having to use the ``dot()`` function for matrix-multiply is - messy -- ``dot(dot(A,B),C)`` vs. ``A*B*C``. This isn't an issue with - Python >= 3.5 because the ``@`` operator allows it to be written as - ``A @ B @ C``. - - ``:)`` Element-wise multiplication is easy: ``A*B``. - ``:)`` ``array`` is the "default" NumPy type, so it gets the most testing, and is the type most likely to be returned by 3rd party code that uses NumPy. @@ -149,6 +153,8 @@ There are pros and cons to using both: with that. - ``:)`` *All* operations (``*``, ``/``, ``+``, ``-`` etc.) are element-wise. + - ``:(`` Sparse matrices from ``scipy.sparse`` do not interact as well + with arrays. - ``matrix`` @@ -162,35 +168,17 @@ There are pros and cons to using both: argument. This shouldn't happen with NumPy functions (if it does it's a bug), but 3rd party code based on NumPy may not honor type preservation like NumPy does. - - ``:)`` ``A*B`` is matrix multiplication, so more convenient for - linear algebra (For Python >= 3.5 plain arrays have the same convenience - with the ``@`` operator). + - ``:)`` ``A*B`` is matrix multiplication, so it looks just like you write + it in linear algebra (For Python >= 3.5 plain arrays have the same + convenience with the ``@`` operator). - ``<:(`` Element-wise multiplication requires calling a function, ``multiply(A,B)``. - ``<:(`` The use of operator overloading is a bit illogical: ``*`` does not work element-wise but ``/`` does. + - Interaction with ``scipy.sparse`` is a bit cleaner. -The ``array`` is thus much more advisable to use. - -Facilities for Matrix Users -=========================== - -NumPy has some features that facilitate the use of the ``matrix`` type, -which hopefully make things easier for Matlab converts. - -- A ``matlib`` module has been added that contains matrix versions of - common array constructors like ``ones()``, ``zeros()``, ``empty()``, - ``eye()``, ``rand()``, ``repmat()``, etc. Normally these functions - return ``array``\ s, but the ``matlib`` versions return ``matrix`` - objects. -- ``mat`` has been changed to be a synonym for ``asmatrix``, rather - than ``matrix``, thus making it a concise way to convert an ``array`` - to a ``matrix`` without copying the data. -- Some top-level functions have been removed. For example - ``numpy.rand()`` now needs to be accessed as ``numpy.random.rand()``. - Or use the ``rand()`` from the ``matlib`` module. But the - "numpythonic" way is to use ``numpy.random.random()``, which takes a - tuple for the shape, like other numpy functions. +The ``array`` is thus much more advisable to use. Indeed, we intend to +deprecate ``matrix`` eventually. Table of Rough MATLAB-NumPy Equivalents ======================================= @@ -200,23 +188,6 @@ expressions. **These are not exact equivalents**, but rather should be taken as hints to get you going in the right direction. For more detail read the built-in documentation on the NumPy functions. -Some care is necessary when writing functions that take arrays or -matrices as arguments --- if you are expecting an ``array`` and are -given a ``matrix``, or vice versa, then '\*' (multiplication) will give -you unexpected results. You can convert back and forth between arrays -and matrices using - -- ``asarray``: always returns an object of type ``array`` -- ``asmatrix`` or ``mat``: always return an object of type - ``matrix`` -- ``asanyarray``: always returns an ``array`` object or a subclass - derived from it, depending on the input. For instance if you pass in - a ``matrix`` it returns a ``matrix``. - -These functions all accept both arrays and matrices (among other things -like Python lists), and thus are useful when writing functions that -should accept any array-like object. - In the table below, it is assumed that you have executed the following commands in Python: @@ -309,8 +280,7 @@ Linear Algebra Equivalents - 2x3 matrix literal * - ``[ a b; c d ]`` - - ``vstack([hstack([a,b]), hstack([c,d])])`` or - ``block([[a, b], [c, d])`` + - ``block([[a,b], [c,d]])`` - construct a matrix from blocks ``a``, ``b``, ``c``, and ``d`` * - ``a(end)`` @@ -369,7 +339,7 @@ Linear Algebra Equivalents - conjugate transpose of ``a`` * - ``a * b`` - - ``a.dot(b)`` or ``a@b`` (Python 3.5 or newer) + - ``a @ b`` - matrix multiply * - ``a .* b`` @@ -520,7 +490,7 @@ Linear Algebra Equivalents from each pair * - ``norm(v)`` - - ``sqrt(dot(v,v))`` or ``np.linalg.norm(v)`` + - ``sqrt(v @ v)`` or ``np.linalg.norm(v)`` - L2 norm of vector ``v`` * - ``a & b`` diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst index de4079080..57a7004cc 100644 --- a/doc/source/user/quickstart.rst +++ b/doc/source/user/quickstart.rst @@ -297,19 +297,19 @@ created and filled with the result. Unlike in many matrix languages, the product operator ``*`` operates elementwise in NumPy arrays. The matrix product can be performed using -the ``dot`` function or method:: +the ``@`` operator (in python >=3.5) or the ``dot`` function or method:: >>> A = np.array( [[1,1], ... [0,1]] ) >>> B = np.array( [[2,0], ... [3,4]] ) - >>> A*B # elementwise product + >>> A * B # elementwise product array([[2, 0], [0, 4]]) - >>> A.dot(B) # matrix product + >>> A @ B # matrix product array([[5, 4], [3, 4]]) - >>> np.dot(A, B) # another matrix product + >>> A.dot(B) # another matrix product array([[5, 4], [3, 4]]) @@ -1357,7 +1357,7 @@ See linalg.py in numpy folder for more. [ 0., 1.]]) >>> j = np.array([[0.0, -1.0], [1.0, 0.0]]) - >>> np.dot (j, j) # matrix product + >>> j @ j # matrix product array([[-1., 0.], [ 0., -1.]]) diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py index 8e8339355..c187f8e31 100644 --- a/numpy/add_newdocs.py +++ b/numpy/add_newdocs.py @@ -257,6 +257,7 @@ add_newdoc('numpy.core', 'nditer', dtypes : tuple of dtype(s) The data types of the values provided in `value`. This may be different from the operand data types if buffering is enabled. + Valid only before the iterator is closed. finished : bool Whether the iteration over the operands is finished or not. has_delayed_bufalloc : bool @@ -282,7 +283,8 @@ add_newdoc('numpy.core', 'nditer', Size of the iterator. itviews Structured view(s) of `operands` in memory, matching the reordered - and optimized iterator access pattern. + and optimized iterator access pattern. Valid only before the iterator + is closed. multi_index When the "multi_index" flag was used, this property provides access to the index. Raises a ValueError if accessed @@ -292,7 +294,8 @@ add_newdoc('numpy.core', 'nditer', nop : int The number of iterator operands. operands : tuple of operand(s) - The array(s) to be iterated over. + The array(s) to be iterated over. Valid only before the iterator is + closed. shape : tuple of ints Shape tuple, the shape of the iterator. value @@ -331,12 +334,12 @@ add_newdoc('numpy.core', 'nditer', it = np.nditer([x, y, out], [], [['readonly'], ['readonly'], ['writeonly','allocate']]) + with it: + while not it.finished: + addop(it[0], it[1], out=it[2]) + it.iternext() - while not it.finished: - addop(it[0], it[1], out=it[2]) - it.iternext() - - return it.operands[2] + return it.operands[2] Here is an example outer product function:: @@ -351,7 +354,7 @@ add_newdoc('numpy.core', 'nditer', with it: for (a, b, c) in it: mulop(a, b, out=c) - return it.operands[2] + return it.operands[2] >>> a = np.arange(2)+1 >>> b = np.arange(3)+1 @@ -374,7 +377,7 @@ add_newdoc('numpy.core', 'nditer', while not it.finished: it[0] = lamdaexpr(*it[1:]) it.iternext() - return it.operands[0] + return it.operands[0] >>> a = np.arange(5) >>> b = np.ones(5) @@ -430,6 +433,13 @@ add_newdoc('numpy.core', 'nditer', ('copy', """)) +add_newdoc('numpy.core', 'nditer', ('operands', + """ + operands[`Slice`] + + The array(s) to be iterated over. Valid only before the iterator is closed. + """)) + add_newdoc('numpy.core', 'nditer', ('debug_print', """ debug_print() @@ -5840,7 +5850,7 @@ add_newdoc('numpy.core', 'ufunc', ('signature', add_newdoc('numpy.core', 'ufunc', ('reduce', """ - reduce(a, axis=0, dtype=None, out=None, keepdims=False) + reduce(a, axis=0, dtype=None, out=None, keepdims=False, initial) Reduces `a`'s dimension by one, by applying ufunc along one axis. @@ -5896,6 +5906,14 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', the result will broadcast correctly against the original `arr`. .. versionadded:: 1.7.0 + initial : scalar, optional + The value with which to start the reduction. + If the ufunc has no identity or the dtype is object, this defaults + to None - otherwise it defaults to ufunc.identity. + If ``None`` is given, the first element of the reduction is used, + and an error is thrown if the reduction is empty. + + .. versionadded:: 1.15.0 Returns ------- @@ -5927,7 +5945,24 @@ add_newdoc('numpy.core', 'ufunc', ('reduce', >>> np.add.reduce(X, 2) array([[ 1, 5], [ 9, 13]]) - + + You can use the ``initial`` keyword argument to initialize the reduction with a + different value. + + >>> np.add.reduce([10], initial=5) + 15 + >>> np.add.reduce(np.ones((2, 2, 2)), axis=(0, 2), initializer=10) + array([14., 14.]) + + Allows reductions of empty arrays where they would normally fail, i.e. + for ufuncs without an identity. + + >>> np.minimum.reduce([], initial=np.inf) + inf + >>> np.minimum.reduce([]) + Traceback (most recent call last): + ... + ValueError: zero-size array to reduction operation minimum which has no identity """)) add_newdoc('numpy.core', 'ufunc', ('accumulate', diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 0f928676b..33f6d01a8 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -11,6 +11,7 @@ from numpy.core import multiarray as mu from numpy.core import umath as um from numpy.core.numeric import asanyarray from numpy.core import numerictypes as nt +from numpy._globals import _NoValue # save those O(100) nanoseconds! umr_maximum = um.maximum.reduce @@ -22,17 +23,21 @@ umr_all = um.logical_and.reduce # avoid keyword arguments to speed up parsing, saves about 15%-20% for very # small reductions -def _amax(a, axis=None, out=None, keepdims=False): - return umr_maximum(a, axis, None, out, keepdims) +def _amax(a, axis=None, out=None, keepdims=False, + initial=_NoValue): + return umr_maximum(a, axis, None, out, keepdims, initial) -def _amin(a, axis=None, out=None, keepdims=False): - return umr_minimum(a, axis, None, out, keepdims) +def _amin(a, axis=None, out=None, keepdims=False, + initial=_NoValue): + return umr_minimum(a, axis, None, out, keepdims, initial) -def _sum(a, axis=None, dtype=None, out=None, keepdims=False): - return umr_sum(a, axis, dtype, out, keepdims) +def _sum(a, axis=None, dtype=None, out=None, keepdims=False, + initial=_NoValue): + return umr_sum(a, axis, dtype, out, keepdims, initial) -def _prod(a, axis=None, dtype=None, out=None, keepdims=False): - return umr_prod(a, axis, dtype, out, keepdims) +def _prod(a, axis=None, dtype=None, out=None, keepdims=False, + initial=_NoValue): + return umr_prod(a, axis, dtype, out, keepdims, initial) def _any(a, axis=None, dtype=None, out=None, keepdims=False): return umr_any(a, axis, dtype, out, keepdims) diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index adbbab6ed..f39248bd0 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -647,6 +647,9 @@ def array2string(a, max_line_width=None, precision=None, options.update(overrides) if options['legacy'] == '1.13': + if style is np._NoValue: + style = repr + if a.shape == () and not a.dtype.names: return style(a.item()) elif style is not np._NoValue: diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 948c2139d..75bcedd81 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -1812,7 +1812,7 @@ def clip(a, a_min, a_max, out=None): return _wrapfunc(a, 'clip', a_min, a_max, out=out) -def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue): """ Sum of array elements over a given axis. @@ -1851,6 +1851,10 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. + initial : scalar, optional + Starting value for the sum. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 Returns ------- @@ -1898,6 +1902,10 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8) -128 + You can also start the sum with a value other than zero: + + >>> np.sum([10], initial=5) + 15 """ if isinstance(a, _gentype): # 2018-02-25, 1.15.0 @@ -1912,7 +1920,8 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): return out return res - return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims) + return _wrapreduction(a, np.add, 'sum', axis, dtype, out, keepdims=keepdims, + initial=initial) def any(a, axis=None, out=None, keepdims=np._NoValue): @@ -2209,7 +2218,7 @@ def ptp(a, axis=None, out=None, keepdims=np._NoValue): return _methods._ptp(a, axis=axis, out=out, **kwargs) -def amax(a, axis=None, out=None, keepdims=np._NoValue): +def amax(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue): """ Return the maximum of an array or maximum along an axis. @@ -2241,6 +2250,13 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue): sub-class' method does not implement `keepdims` any exceptions will be raised. + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + + Returns ------- amax : ndarray or scalar @@ -2293,11 +2309,26 @@ def amax(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmax(b) 4.0 + You can use an initial value to compute the maximum of an empty slice, or + to initialize it to a different value: + + >>> np.max([[-50], [10]], axis=-1, initial=0) + array([ 0, 10]) + + Notice that the initial value is used as one of the elements for which the + maximum is determined, unlike for the default argument Python's max + function, which is only used for empty iterables. + + >>> np.max([5], initial=6) + 6 + >>> max([5], default=6) + 5 """ - return _wrapreduction(a, np.maximum, 'max', axis, None, out, keepdims=keepdims) + return _wrapreduction(a, np.maximum, 'max', axis, None, out, keepdims=keepdims, + initial=initial) -def amin(a, axis=None, out=None, keepdims=np._NoValue): +def amin(a, axis=None, out=None, keepdims=np._NoValue, initial=np._NoValue): """ Return the minimum of an array or minimum along an axis. @@ -2329,6 +2360,12 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue): sub-class' method does not implement `keepdims` any exceptions will be raised. + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 + Returns ------- amin : ndarray or scalar @@ -2381,8 +2418,22 @@ def amin(a, axis=None, out=None, keepdims=np._NoValue): >>> np.nanmin(b) 0.0 + >>> np.min([[-50], [10]], axis=-1, initial=0) + array([-50, 0]) + + Notice that the initial value is used as one of the elements for which the + minimum is determined, unlike for the default argument Python's max + function, which is only used for empty iterables. + + Notice that this isn't the same as Python's ``default`` argument. + + >>> np.min([6], initial=5) + 5 + >>> min([6], default=5) + 6 """ - return _wrapreduction(a, np.minimum, 'min', axis, None, out, keepdims=keepdims) + return _wrapreduction(a, np.minimum, 'min', axis, None, out, keepdims=keepdims, + initial=initial) def alen(a): @@ -2418,7 +2469,7 @@ def alen(a): return len(array(a, ndmin=1)) -def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): +def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue): """ Return the product of array elements over a given axis. @@ -2458,6 +2509,10 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. + initial : scalar, optional + The starting value for this product. See `~numpy.ufunc.reduce` for details. + + .. versionadded:: 1.15.0 Returns ------- @@ -2515,8 +2570,13 @@ def prod(a, axis=None, dtype=None, out=None, keepdims=np._NoValue): >>> np.prod(x).dtype == int True + You can also start the product with a value other than one: + + >>> np.prod([1, 2], initial=5) + 10 """ - return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out, keepdims=keepdims) + return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out, keepdims=keepdims, + initial=initial) def cumprod(a, axis=None, dtype=None, out=None): diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h index ec0fd1ee9..12fc7098c 100644 --- a/numpy/core/include/numpy/ndarrayobject.h +++ b/numpy/core/include/numpy/ndarrayobject.h @@ -170,14 +170,17 @@ extern "C" CONFUSE_EMACS (k)*PyArray_STRIDES(obj)[2] + \ (l)*PyArray_STRIDES(obj)[3])) +/* Move to arrayobject.c once PyArray_XDECREF_ERR is removed */ static NPY_INLINE void PyArray_DiscardWritebackIfCopy(PyArrayObject *arr) { - if (arr != NULL) { - if ((PyArray_FLAGS(arr) & NPY_ARRAY_WRITEBACKIFCOPY) || - (PyArray_FLAGS(arr) & NPY_ARRAY_UPDATEIFCOPY)) { - PyArrayObject *base = (PyArrayObject *)PyArray_BASE(arr); - PyArray_ENABLEFLAGS(base, NPY_ARRAY_WRITEABLE); + PyArrayObject_fields *fa = (PyArrayObject_fields *)arr; + if (fa && fa->base) { + if ((fa->flags & NPY_ARRAY_UPDATEIFCOPY) || + (fa->flags & NPY_ARRAY_WRITEBACKIFCOPY)) { + PyArray_ENABLEFLAGS((PyArrayObject*)fa->base, NPY_ARRAY_WRITEABLE); + Py_DECREF(fa->base); + fa->base = NULL; PyArray_CLEARFLAGS(arr, NPY_ARRAY_WRITEBACKIFCOPY); PyArray_CLEARFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY); } diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 38698887a..0299f1a1b 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -687,6 +687,18 @@ npy_resolve(PyObject* NPY_UNUSED(self), PyObject* args) Py_RETURN_NONE; } +/* resolve WRITEBACKIFCOPY */ +static PyObject* +npy_discard(PyObject* NPY_UNUSED(self), PyObject* args) +{ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + PyArray_DiscardWritebackIfCopy((PyArrayObject*)args); + Py_RETURN_NONE; +} + #if !defined(NPY_PY3K) static PyObject * int_subclass(PyObject *dummy, PyObject *args) @@ -1857,6 +1869,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"npy_resolve", npy_resolve, METH_O, NULL}, + {"npy_discard", + npy_discard, + METH_O, NULL}, #if !defined(NPY_PY3K) {"test_int_subclass", int_subclass, diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 59eb2457c..5d3cee647 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1276,42 +1276,31 @@ PyArray_New(PyTypeObject *subtype, int nd, npy_intp *dims, int type_num, } -NPY_NO_EXPORT int -_array_from_buffer_3118(PyObject *obj, PyObject **out) +/* Steals a reference to the memory view */ +NPY_NO_EXPORT PyObject * +_array_from_buffer_3118(PyObject *memoryview) { /* PEP 3118 */ - PyObject *memoryview; Py_buffer *view; PyArray_Descr *descr = NULL; - PyObject *r; - int nd, flags, k; + PyObject *r = NULL; + int nd, flags; Py_ssize_t d; npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; - memoryview = PyMemoryView_FromObject(obj); - if (memoryview == NULL) { - PyErr_Clear(); - return -1; - } - view = PyMemoryView_GET_BUFFER(memoryview); if (view->format != NULL) { descr = _descriptor_from_pep3118_format(view->format); if (descr == NULL) { - PyObject *msg; - msg = PyBytes_FromFormat("Invalid PEP 3118 format string: '%s'", - view->format); - PyErr_WarnEx(PyExc_RuntimeWarning, PyBytes_AS_STRING(msg), 0); - Py_DECREF(msg); goto fail; } /* Sanity check */ if (descr->elsize != view->itemsize) { - PyErr_WarnEx(PyExc_RuntimeWarning, - "Item size computed from the PEP 3118 buffer format " - "string does not match the actual item size.", - 0); + PyErr_SetString( + PyExc_RuntimeError, + "Item size computed from the PEP 3118 buffer format " + "string does not match the actual item size."); goto fail; } } @@ -1322,13 +1311,13 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) nd = view->ndim; if (view->shape != NULL) { - if (nd >= NPY_MAXDIMS || nd < 0) { + int k; + if (nd > NPY_MAXDIMS || nd < 0) { + PyErr_Format(PyExc_RuntimeError, + "PEP3118 dimensions do not satisfy 0 <= ndim <= NPY_MAXDIMS"); goto fail; } for (k = 0; k < nd; ++k) { - if (k >= NPY_MAXDIMS) { - goto fail; - } shape[k] = view->shape[k]; } if (view->strides != NULL) { @@ -1352,10 +1341,9 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) strides[0] = view->itemsize; } else if (nd > 1) { - PyErr_WarnEx(PyExc_RuntimeWarning, - "ndim computed from the PEP 3118 buffer format " - "is greater than 1, but shape is NULL.", - 0); + PyErr_SetString(PyExc_RuntimeError, + "ndim computed from the PEP 3118 buffer format " + "is greater than 1, but shape is NULL."); goto fail; } } @@ -1364,21 +1352,21 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) r = PyArray_NewFromDescr(&PyArray_Type, descr, nd, shape, strides, view->buf, flags, NULL); - if (r == NULL || - PyArray_SetBaseObject((PyArrayObject *)r, memoryview) < 0) { - Py_XDECREF(r); - Py_DECREF(memoryview); - return -1; + if (r == NULL) { + goto fail; + } + if (PyArray_SetBaseObject((PyArrayObject *)r, memoryview) < 0) { + goto fail; } PyArray_UpdateFlags((PyArrayObject *)r, NPY_ARRAY_UPDATE_ALL); - *out = r; - return 0; + return r; fail: + Py_XDECREF(r); Py_XDECREF(descr); Py_DECREF(memoryview); - return -1; + return NULL; } @@ -1490,14 +1478,25 @@ PyArray_GetArrayParamsFromObject(PyObject *op, } /* If op supports the PEP 3118 buffer interface */ - if (!PyBytes_Check(op) && !PyUnicode_Check(op) && - _array_from_buffer_3118(op, (PyObject **)out_arr) == 0) { - if (writeable - && PyArray_FailUnlessWriteable(*out_arr, "PEP 3118 buffer") < 0) { - Py_DECREF(*out_arr); - return -1; + if (!PyBytes_Check(op) && !PyUnicode_Check(op)) { + + PyObject *memoryview = PyMemoryView_FromObject(op); + if (memoryview == NULL) { + PyErr_Clear(); + } + else { + PyObject *arr = _array_from_buffer_3118(memoryview); + if (arr == NULL) { + return -1; + } + if (writeable + && PyArray_FailUnlessWriteable((PyArrayObject *)arr, "PEP 3118 buffer") < 0) { + Py_DECREF(arr); + return -1; + } + *out_arr = (PyArrayObject *)arr; + return 0; } - return (*out_arr) == NULL ? -1 : 0; } /* If op supports the __array_struct__ or __array_interface__ interface */ diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c index c1c1ce568..bb3cc9d4e 100644 --- a/numpy/core/src/multiarray/descriptor.c +++ b/numpy/core/src/multiarray/descriptor.c @@ -18,6 +18,7 @@ #include "templ_common.h" /* for npy_mul_with_overflow_intp */ #include "descriptor.h" #include "alloc.h" +#include "assert.h" /* * offset: A starting offset. @@ -1938,33 +1939,26 @@ arraydescr_shape_get(PyArray_Descr *self) if (!PyDataType_HASSUBARRAY(self)) { return PyTuple_New(0); } - /*TODO - * self->subarray->shape should always be a tuple, - * so this check should be unnecessary - */ - if (PyTuple_Check(self->subarray->shape)) { - Py_INCREF(self->subarray->shape); - return (PyObject *)(self->subarray->shape); - } - return Py_BuildValue("(O)", self->subarray->shape); + assert(PyTuple_Check(self->subarray->shape)); + Py_INCREF(self->subarray->shape); + return self->subarray->shape; } static PyObject * arraydescr_ndim_get(PyArray_Descr *self) { + Py_ssize_t ndim; + if (!PyDataType_HASSUBARRAY(self)) { return PyInt_FromLong(0); } - /*TODO - * self->subarray->shape should always be a tuple, - * so this check should be unnecessary + + /* + * PyTuple_Size has built in check + * for tuple argument */ - if (PyTuple_Check(self->subarray->shape)) { - Py_ssize_t ndim = PyTuple_Size(self->subarray->shape); - return PyInt_FromLong(ndim); - } - /* consistent with arraydescr_shape_get */ - return PyInt_FromLong(1); + ndim = PyTuple_Size(self->subarray->shape); + return PyInt_FromLong(ndim); } diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 5dbc30aa9..470a5fff9 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -591,7 +591,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]) * @from@(data1[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(accum); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum); return; } @@ -749,7 +749,7 @@ finish_after_unrolled_loop: accum += @from@(data1[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(value0 * accum); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + value0 * accum); return; } @@ -848,7 +848,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(accum * value1); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum * value1); return; } diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c index d36be61f5..4505e645b 100644 --- a/numpy/core/src/multiarray/nditer_pywrap.c +++ b/numpy/core/src/multiarray/nditer_pywrap.c @@ -20,16 +20,14 @@ typedef struct NewNpyArrayIterObject_tag NewNpyArrayIterObject; -enum NPYITER_CONTEXT {CONTEXT_NOTENTERED, CONTEXT_INSIDE, CONTEXT_EXITED}; - struct NewNpyArrayIterObject_tag { PyObject_HEAD /* The iterator */ NpyIter *iter; /* Flag indicating iteration started/stopped */ char started, finished; - /* iter must used as a context manager if writebackifcopy semantics used */ - char managed; + /* iter operands cannot be referenced if iter is closed */ + npy_bool is_closed; /* Child to update for nested iteration */ NewNpyArrayIterObject *nested_child; /* Cached values from the iterator */ @@ -89,7 +87,7 @@ npyiter_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) if (self != NULL) { self->iter = NULL; self->nested_child = NULL; - self->managed = CONTEXT_NOTENTERED; + self->is_closed = 0; } return (PyObject *)self; @@ -1419,7 +1417,7 @@ static PyObject *npyiter_value_get(NewNpyArrayIterObject *self) ret = npyiter_seq_item(self, 0); } else { - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -1454,7 +1452,7 @@ static PyObject *npyiter_operands_get(NewNpyArrayIterObject *self) "Iterator is invalid"); return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -1489,7 +1487,7 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self) return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -1517,7 +1515,8 @@ static PyObject *npyiter_itviews_get(NewNpyArrayIterObject *self) static PyObject * npyiter_next(NewNpyArrayIterObject *self) { - if (self->iter == NULL || self->iternext == NULL || self->finished) { + if (self->iter == NULL || self->iternext == NULL || + self->finished || self->is_closed) { return NULL; } @@ -1912,7 +1911,7 @@ static PyObject *npyiter_dtypes_get(NewNpyArrayIterObject *self) return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -2014,7 +2013,7 @@ npyiter_seq_item(NewNpyArrayIterObject *self, Py_ssize_t i) return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -2104,7 +2103,7 @@ npyiter_seq_slice(NewNpyArrayIterObject *self, return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -2170,7 +2169,7 @@ npyiter_seq_ass_item(NewNpyArrayIterObject *self, Py_ssize_t i, PyObject *v) return -1; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return -1; @@ -2250,7 +2249,7 @@ npyiter_seq_ass_slice(NewNpyArrayIterObject *self, Py_ssize_t ilow, return -1; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return -1; @@ -2307,7 +2306,7 @@ npyiter_subscript(NewNpyArrayIterObject *self, PyObject *op) return NULL; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return NULL; @@ -2362,7 +2361,7 @@ npyiter_ass_subscript(NewNpyArrayIterObject *self, PyObject *op, return -1; } - if (self->managed == CONTEXT_EXITED) { + if (self->is_closed) { PyErr_SetString(PyExc_ValueError, "Iterator is closed"); return -1; @@ -2402,11 +2401,10 @@ npyiter_enter(NewNpyArrayIterObject *self) PyErr_SetString(PyExc_RuntimeError, "operation on non-initialized iterator"); return NULL; } - if (self->managed == CONTEXT_EXITED) { - PyErr_SetString(PyExc_ValueError, "cannot reuse iterator after exit"); + if (self->is_closed) { + PyErr_SetString(PyExc_ValueError, "cannot reuse closed iterator"); return NULL; } - self->managed = CONTEXT_INSIDE; Py_INCREF(self); return (PyObject *)self; } @@ -2420,6 +2418,7 @@ npyiter_close(NewNpyArrayIterObject *self) Py_RETURN_NONE; } ret = NpyIter_Close(iter); + self->is_closed = 1; if (ret < 0) { return NULL; } @@ -2429,7 +2428,6 @@ npyiter_close(NewNpyArrayIterObject *self) static PyObject * npyiter_exit(NewNpyArrayIterObject *self, PyObject *args) { - self->managed = CONTEXT_EXITED; /* even if called via exception handling, writeback any data */ return npyiter_close(self); } diff --git a/numpy/core/src/umath/override.c b/numpy/core/src/umath/override.c index 0aef093b0..123d9af87 100644 --- a/numpy/core/src/umath/override.c +++ b/numpy/core/src/umath/override.c @@ -123,11 +123,16 @@ normalize_reduce_args(PyUFuncObject *ufunc, PyObject *args, npy_intp nargs = PyTuple_GET_SIZE(args); npy_intp i; PyObject *obj; - static char *kwlist[] = {"array", "axis", "dtype", "out", "keepdims"}; + static PyObject *NoValue = NULL; + static char *kwlist[] = {"array", "axis", "dtype", "out", "keepdims", + "initial"}; + + npy_cache_import("numpy", "_NoValue", &NoValue); + if (NoValue == NULL) return -1; - if (nargs < 1 || nargs > 5) { + if (nargs < 1 || nargs > 6) { PyErr_Format(PyExc_TypeError, - "ufunc.reduce() takes from 1 to 5 positional " + "ufunc.reduce() takes from 1 to 6 positional " "arguments but %"NPY_INTP_FMT" were given", nargs); return -1; } @@ -151,6 +156,10 @@ normalize_reduce_args(PyUFuncObject *ufunc, PyObject *args, } obj = PyTuple_GetSlice(args, 3, 4); } + /* Remove initial=np._NoValue */ + if (i == 5 && obj == NoValue) { + continue; + } PyDict_SetItemString(*normal_kwds, kwlist[i], obj); if (i == 3) { Py_DECREF(obj); diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c index 95daa2d2d..36b77ef03 100644 --- a/numpy/core/src/umath/ufunc_object.c +++ b/numpy/core/src/umath/ufunc_object.c @@ -3096,20 +3096,25 @@ finish_loop: */ static PyArrayObject * PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, - int naxes, int *axes, PyArray_Descr *odtype, int keepdims) + int naxes, int *axes, PyArray_Descr *odtype, int keepdims, + PyObject *initial) { int iaxes, ndim; npy_bool reorderable; npy_bool axis_flags[NPY_MAXDIMS]; PyArray_Descr *dtype; PyArrayObject *result; - PyObject *identity = NULL; + PyObject *identity; const char *ufunc_name = ufunc_get_name_cstr(ufunc); /* These parameters come from a TLS global */ int buffersize = 0, errormask = 0; + static PyObject *NoValue = NULL; NPY_UF_DBG_PRINT1("\nEvaluating ufunc %s.reduce\n", ufunc_name); + npy_cache_import("numpy", "_NoValue", &NoValue); + if (NoValue == NULL) return NULL; + ndim = PyArray_NDIM(arr); /* Create an array of flags for reduction */ @@ -3133,19 +3138,28 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, if (identity == NULL) { return NULL; } - /* - * The identity for a dynamic dtype like - * object arrays can't be used in general - */ - if (identity != Py_None && PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { + + /* Get the initial value */ + if (initial == NULL || initial == NoValue) { + initial = identity; + + /* + * The identity for a dynamic dtype like + * object arrays can't be used in general + */ + if (initial != Py_None && PyArray_ISOBJECT(arr) && PyArray_SIZE(arr) != 0) { + Py_DECREF(initial); + initial = Py_None; + Py_INCREF(initial); + } + } else { Py_DECREF(identity); - identity = Py_None; - Py_INCREF(identity); + Py_INCREF(initial); /* match the reference count in the if above */ } /* Get the reduction dtype */ if (reduce_type_resolver(ufunc, arr, odtype, &dtype) < 0) { - Py_DECREF(identity); + Py_DECREF(initial); return NULL; } @@ -3153,12 +3167,12 @@ PyUFunc_Reduce(PyUFuncObject *ufunc, PyArrayObject *arr, PyArrayObject *out, NPY_UNSAFE_CASTING, axis_flags, reorderable, keepdims, 0, - identity, + initial, reduce_loop, ufunc, buffersize, ufunc_name, errormask); Py_DECREF(dtype); - Py_DECREF(identity); + Py_DECREF(initial); return result; } @@ -3922,8 +3936,9 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, PyArray_Descr *otype = NULL; PyArrayObject *out = NULL; int keepdims = 0; + PyObject *initial = NULL; static char *reduce_kwlist[] = { - "array", "axis", "dtype", "out", "keepdims", NULL}; + "array", "axis", "dtype", "out", "keepdims", "initial", NULL}; static char *accumulate_kwlist[] = { "array", "axis", "dtype", "out", NULL}; static char *reduceat_kwlist[] = { @@ -3995,13 +4010,13 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, } } else { - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&i:reduce", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO&O&iO:reduce", reduce_kwlist, &op, &axes_in, PyArray_DescrConverter2, &otype, PyArray_OutputConverter, &out, - &keepdims)) { + &keepdims, &initial)) { goto fail; } } @@ -4132,7 +4147,7 @@ PyUFunc_GenericReduction(PyUFuncObject *ufunc, PyObject *args, switch(operation) { case UFUNC_REDUCE: ret = PyUFunc_Reduce(ufunc, mp, out, naxes, axes, - otype, keepdims); + otype, keepdims, initial); break; case UFUNC_ACCUMULATE: if (naxes != 1) { diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py index 2c142f82b..6214e325c 100644 --- a/numpy/core/tests/test_arrayprint.py +++ b/numpy/core/tests/test_arrayprint.py @@ -491,6 +491,8 @@ class TestPrintOptions(object): np.array(1.), style=repr) # but not in legacy mode np.array2string(np.array(1.), style=repr, legacy='1.13') + # gh-10934 style was broken in legacy mode, check it works + np.array2string(np.array(1.), legacy='1.13') def test_float_spacing(self): x = np.array([1., 2., 3.]) diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py index 792b9e0a2..104dd1986 100644 --- a/numpy/core/tests/test_einsum.py +++ b/numpy/core/tests/test_einsum.py @@ -502,6 +502,16 @@ class TestEinSum(object): optimize=optimize), np.full((1, 5), 5)) + # Cases which were failing (gh-10899) + x = np.eye(2, dtype=dtype) + y = np.ones(2, dtype=dtype) + assert_array_equal(np.einsum("ji,i->", x, y, optimize=optimize), + [2.]) # contig_contig_outstride0_two + assert_array_equal(np.einsum("i,ij->", y, x, optimize=optimize), + [2.]) # stride0_contig_outstride0_two + assert_array_equal(np.einsum("ij,i->", x, y, optimize=optimize), + [2.]) # contig_stride0_outstride0_two + def test_einsum_sums_int8(self): self.check_einsum_sums('i1') diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 806a3b083..3c5f90cfc 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -6204,7 +6204,10 @@ class TestPEP3118Dtype(object): self._check('i', 'i') self._check('i:f0:', [('f0', 'i')]) + class TestNewBufferProtocol(object): + """ Test PEP3118 buffers """ + def _check_roundtrip(self, obj): obj = np.asarray(obj) x = memoryview(obj) @@ -6515,6 +6518,35 @@ class TestNewBufferProtocol(object): with assert_raises(ValueError): memoryview(arr) + def test_max_dims(self): + a = np.empty((1,) * 32) + self._check_roundtrip(a) + + def test_error_too_many_dims(self): + def make_ctype(shape, scalar_type): + t = scalar_type + for dim in shape[::-1]: + t = dim * t + return t + + # construct a memoryview with 33 dimensions + c_u8_33d = make_ctype((1,)*33, ctypes.c_uint8) + m = memoryview(c_u8_33d()) + assert_equal(m.ndim, 33) + + assert_raises_regex( + RuntimeError, "ndim", + np.array, m) + + def test_error_pointer_type(self): + # gh-6741 + m = memoryview(ctypes.pointer(ctypes.c_uint8())) + assert_('&' in m.format) + + assert_raises_regex( + ValueError, "format string", + np.array, m) + class TestArrayAttributeDeletion(object): @@ -7246,16 +7278,20 @@ class TestWritebackIfCopy(object): def test_view_assign(self): from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_resolve + arr = np.arange(9).reshape(3, 3).T arr_wb = npy_create_writebackifcopy(arr) assert_(arr_wb.flags.writebackifcopy) assert_(arr_wb.base is arr) - arr_wb[:] = -100 + arr_wb[...] = -100 npy_resolve(arr_wb) + # arr changes after resolve, even though we assigned to arr_wb assert_equal(arr, -100) # after resolve, the two arrays no longer reference each other - assert_(not arr_wb.ctypes.data == 0) - arr_wb[:] = 100 + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + # assigning to arr_wb does not get transfered to arr + arr_wb[...] = 100 assert_equal(arr, -100) def test_dealloc_warning(self): @@ -7266,6 +7302,30 @@ class TestWritebackIfCopy(object): _multiarray_tests.npy_abuse_writebackifcopy(v) assert len(sup.log) == 1 + def test_view_discard_refcount(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_discard + + arr = np.arange(9).reshape(3, 3).T + orig = arr.copy() + if HAS_REFCOUNT: + arr_cnt = sys.getrefcount(arr) + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_discard(arr_wb) + # arr remains unchanged after discard + assert_equal(arr, orig) + # after discard, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + if HAS_REFCOUNT: + assert_equal(arr_cnt, sys.getrefcount(arr)) + # assigning to arr_wb does not get transfered to arr + arr_wb[...] = 100 + assert_equal(arr, orig) + + class TestArange(object): def test_infinite(self): assert_raises_regex( diff --git a/numpy/core/tests/test_nditer.py b/numpy/core/tests/test_nditer.py index bc9456536..77c26eacf 100644 --- a/numpy/core/tests/test_nditer.py +++ b/numpy/core/tests/test_nditer.py @@ -2847,7 +2847,7 @@ def test_writebacks(): enter = it.__enter__ assert_raises(ValueError, enter) -def test_close(): +def test_close_equivalent(): ''' using a context amanger and using nditer.close are equivalent ''' def add_close(x, y, out=None): @@ -2856,8 +2856,10 @@ def test_close(): [['readonly'], ['readonly'], ['writeonly','allocate']]) for (a, b, c) in it: addop(a, b, out=c) + ret = it.operands[2] it.close() - return it.operands[2] + return ret + def add_context(x, y, out=None): addop = np.add it = np.nditer([x, y, out], [], @@ -2871,6 +2873,13 @@ def test_close(): z = add_context(range(5), range(5)) assert_equal(z, range(0, 10, 2)) +def test_close_raises(): + it = np.nditer(np.arange(3)) + assert_equal (next(it), 0) + it.close() + assert_raises(StopIteration, next, it) + assert_raises(ValueError, getattr, it, 'operands') + def test_warn_noclose(): a = np.arange(6, dtype='f4') au = a.byteswap().newbyteorder() diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py index 7a276c04d..fe40456d5 100644 --- a/numpy/core/tests/test_ufunc.py +++ b/numpy/core/tests/test_ufunc.py @@ -494,6 +494,17 @@ class TestUfunc(object): d += d assert_almost_equal(d, 2. + 2j) + def test_sum_initial(self): + # Integer, single axis + assert_equal(np.sum([3], initial=2), 5) + + # Floating point + assert_almost_equal(np.sum([0.2], initial=0.1), 0.3) + + # Multiple non-adjacent axes + assert_equal(np.sum(np.ones((2, 3, 5), dtype=np.int64), axis=(0, 2), initial=2), + [12, 12, 12]) + def test_inner1d(self): a = np.arange(6).reshape((2, 3)) assert_array_equal(umt.inner1d(a, a), np.sum(a*a, axis=-1)) @@ -844,6 +855,7 @@ class TestUfunc(object): assert_equal(np.min(a), False) assert_equal(np.array([[1]], dtype=object).sum(), 1) assert_equal(np.array([[[1, 2]]], dtype=object).sum((0, 1)), [1, 2]) + assert_equal(np.array([1], dtype=object).sum(initial=1), 2) def test_object_array_accumulate_inplace(self): # Checks that in-place accumulates work, see also gh-7402 @@ -987,7 +999,7 @@ class TestUfunc(object): assert_equal(np.sqrt(a, where=m), [1]) def check_identityless_reduction(self, a): - # np.minimum.reduce is a identityless reduction + # np.minimum.reduce is an identityless reduction # Verify that it sees the zero at various positions a[...] = 1 @@ -1056,6 +1068,35 @@ class TestUfunc(object): a = a[1:, 1:, 1:] self.check_identityless_reduction(a) + def test_initial_reduction(self): + # np.minimum.reduce is an identityless reduction + + # For cases like np.maximum(np.abs(...), initial=0) + # More generally, a supremum over non-negative numbers. + assert_equal(np.maximum.reduce([], initial=0), 0) + + # For cases like reduction of an empty array over the reals. + assert_equal(np.minimum.reduce([], initial=np.inf), np.inf) + assert_equal(np.maximum.reduce([], initial=-np.inf), -np.inf) + + # Random tests + assert_equal(np.minimum.reduce([5], initial=4), 4) + assert_equal(np.maximum.reduce([4], initial=5), 5) + assert_equal(np.maximum.reduce([5], initial=4), 5) + assert_equal(np.minimum.reduce([4], initial=5), 4) + + # Check initial=None raises ValueError for both types of ufunc reductions + assert_raises(ValueError, np.minimum.reduce, [], initial=None) + assert_raises(ValueError, np.add.reduce, [], initial=None) + + # Check that np._NoValue gives default behavior. + assert_equal(np.add.reduce([], initial=np._NoValue), 0) + + # Check that initial kwarg behaves as intended for dtype=object + a = np.array([10], dtype=object) + res = np.add.reduce(a, initial=5) + assert_equal(res, 15) + def test_identityless_reduction_nonreorderable(self): a = np.array([[8.0, 2.0, 2.0], [1.0, 0.5, 0.25]]) @@ -1407,15 +1448,18 @@ class TestUfunc(object): assert_equal(f(d, 0, None, None), r) assert_equal(f(d, 0, None, None, keepdims=False), r) assert_equal(f(d, 0, None, None, True), r.reshape((1,) + r.shape)) + assert_equal(f(d, 0, None, None, False, 0), r) + assert_equal(f(d, 0, None, None, False, initial=0), r) # multiple keywords assert_equal(f(d, axis=0, dtype=None, out=None, keepdims=False), r) assert_equal(f(d, 0, dtype=None, out=None, keepdims=False), r) assert_equal(f(d, 0, None, out=None, keepdims=False), r) + assert_equal(f(d, 0, None, out=None, keepdims=False, initial=0), r) # too little assert_raises(TypeError, f) # too much - assert_raises(TypeError, f, d, 0, None, None, False, 1) + assert_raises(TypeError, f, d, 0, None, None, False, 0, 1) # invalid axis assert_raises(TypeError, f, d, "invalid") assert_raises(TypeError, f, d, axis="invalid") diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index ba567e68b..ea0be1892 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1810,7 +1810,7 @@ class TestSpecialMethods(object): # reduce, kwargs res = np.multiply.reduce(a, axis='axis0', dtype='dtype0', out='out0', - keepdims='keep0') + keepdims='keep0', initial='init0') assert_equal(res[0], a) assert_equal(res[1], np.multiply) assert_equal(res[2], 'reduce') @@ -1818,7 +1818,8 @@ class TestSpecialMethods(object): assert_equal(res[4], {'dtype':'dtype0', 'out': ('out0',), 'keepdims': 'keep0', - 'axis': 'axis0'}) + 'axis': 'axis0', + 'initial': 'init0'}) # reduce, output equal to None removed, but not other explicit ones, # even if they are at their default value. @@ -1828,6 +1829,14 @@ class TestSpecialMethods(object): assert_equal(res[4], {'axis': 0, 'keepdims': True}) res = np.multiply.reduce(a, None, out=(None,), dtype=None) assert_equal(res[4], {'axis': None, 'dtype': None}) + res = np.multiply.reduce(a, 0, None, None, False, 2) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, 'initial': 2}) + # np._NoValue ignored for initial. + res = np.multiply.reduce(a, 0, None, None, False, np._NoValue) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False}) + # None kept for initial. + res = np.multiply.reduce(a, 0, None, None, False, None) + assert_equal(res[4], {'axis': 0, 'dtype': None, 'keepdims': False, 'initial': None}) # reduce, wrong args assert_raises(ValueError, np.multiply.reduce, a, out=()) diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 099b63c40..72beef471 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -54,7 +54,8 @@ __all__ = [ 'bincount', 'digitize', 'cov', 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', - 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc' + 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc', + 'quantile' ] @@ -3427,7 +3428,7 @@ def percentile(a, q, axis=None, out=None, interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -3463,6 +3464,7 @@ def percentile(a, q, axis=None, out=None, mean median : equivalent to ``percentile(..., 50)`` nanpercentile + quantile : equivalent to percentile, except with q in the range [0, 1]. Notes ----- @@ -3539,6 +3541,110 @@ def percentile(a, q, axis=None, out=None, a, q, axis, out, overwrite_input, interpolation, keepdims) +def quantile(a, q, axis=None, out=None, + overwrite_input=False, interpolation='linear', keepdims=False): + """ + Compute the `q`th quantile of the data along the specified axis. + ..versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean + percentile : equivalent to quantile, but with q in the range [0, 100]. + median : equivalent to ``quantile(..., 0.5)`` + nanquantile + + Notes + ----- + Given a vector ``V`` of length ``N``, the ``q``-th quantile of + ``V`` is the value ``q`` of the way from the minimum to the + maximum in a sorted copy of ``V``. The values and distances of + the two nearest neighbors as well as the `interpolation` parameter + will determine the quantile if the normalized ranking does not + match the location of ``q`` exactly. This function is the same as + the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the + same as the maximum if ``q=1.0``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.quantile(a, 0.5) + 3.5 + >>> np.quantile(a, 0.5, axis=0) + array([[ 6.5, 4.5, 2.5]]) + >>> np.quantile(a, 0.5, axis=1) + array([ 7., 2.]) + >>> np.quantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.quantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.quantile(a, 0.5, axis=0, out=out) + array([[ 6.5, 4.5, 2.5]]) + >>> m + array([[ 6.5, 4.5, 2.5]]) + >>> b = a.copy() + >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a == b) + """ + q = np.asanyarray(q) + if not _quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _quantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index d2a398a0a..90e19769e 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -911,10 +911,6 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = np.diff(edges[i]) - # Handle empty input. - if N == 0: - return np.zeros(nbin-2), edges - # Compute the bin number each sample falls into. Ncount = tuple( np.digitize(sample[:, i], edges[i]) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index dddc0e5b8..abd2da1a2 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -16,6 +16,7 @@ Functions - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values - `nanmedian` -- median of non-NaN values +- `nanquantile` -- qth quantile of non-NaN values - `nanpercentile` -- qth percentile of non-NaN values """ @@ -29,7 +30,7 @@ from numpy.lib import function_base __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', - 'nancumsum', 'nancumprod' + 'nancumsum', 'nancumprod', 'nanquantile' ] @@ -1057,7 +1058,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, `a` after this function completes is undefined. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -1095,6 +1096,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean + nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. Notes ----- @@ -1144,6 +1146,110 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) +def nanquantile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear', keepdims=np._NoValue): + """ + Compute the qth quantile of the data along the specified axis, + while ignoring nan values. + Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + quantile + nanmean, nanmedian + nanmedian : equivalent to ``nanquantile(..., 0.5)`` + nanpercentile : same as nanquantile, but with q in the range [0, 100]. + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.quantile(a, 0.5) + nan + >>> np.nanquantile(a, 0.5) + 3.5 + >>> np.nanquantile(a, 0.5, axis=0) + array([ 6.5, 2., 2.5]) + >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.nanquantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanquantile(a, 0.5, axis=0, out=out) + array([ 6.5, 2., 2.5]) + >>> m + array([ 6.5, 2. , 2.5]) + >>> b = a.copy() + >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + """ + a = np.asanyarray(a) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 59379bdda..67585443b 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -936,7 +936,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if encoding is not None: fencoding = encoding # we must assume local encoding - # TOOD emit portability warning? + # TODO emit portability warning? elif fencoding is None: import locale fencoding = locale.getpreferredencoding() diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 6653b5ba1..43d62a7ff 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -2749,6 +2749,28 @@ class TestPercentile(object): a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) +class TestQuantile(object): + # most of this is already tested by TestPercentile + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.quantile(x, 0), 0.) + assert_equal(np.quantile(x, 1), 3.5) + assert_equal(np.quantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, beause quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + class TestMedian(object): def test_basic(self): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 84aca9915..f58c9e33d 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -23,7 +23,7 @@ from numpy.ma.testutils import assert_equal from numpy.testing import ( assert_warns, assert_, SkipTest, assert_raises_regex, assert_raises, assert_allclose, assert_array_equal, temppath, tempdir, IS_PYPY, - HAS_REFCOUNT, suppress_warnings, + HAS_REFCOUNT, suppress_warnings, assert_no_gc_cycles, ) @@ -937,7 +937,7 @@ class TestLoadTxt(LoadTxtBase): assert_equal(res, tgt) def test_complex_misformatted(self): - # test for backward compatability + # test for backward compatibility # some complex formats used to generate x+-yj a = np.zeros((2, 2), dtype=np.complex128) re = np.pi @@ -2416,14 +2416,5 @@ def test_load_refcount(): np.savez(f, [1, 2, 3]) f.seek(0) - assert_(gc.isenabled()) - gc.disable() - try: - gc.collect() + with assert_no_gc_cycles(): np.load(f) - # gc.collect returns the number of unreachable objects in cycles that - # were found -- we are checking that no cycles were created by np.load - n_objects_in_cycles = gc.collect() - finally: - gc.enable() - assert_equal(n_objects_in_cycles, 0) diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 1f403f7b8..e69d9dd7d 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -886,3 +886,39 @@ class TestNanFunctions_Percentile(object): megamat = np.ones((3, 4, 5, 6)) assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + + +class TestNanFunctions_Quantile(object): + # most of this is already tested by TestPercentile + + def test_regression(self): + ar = np.arange(24).reshape(2, 3, 4).astype(float) + ar[0][1] = np.nan + + assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50)) + assert_equal(np.nanquantile(ar, q=0.5, axis=0), + np.nanpercentile(ar, q=50, axis=0)) + assert_equal(np.nanquantile(ar, q=0.5, axis=1), + np.nanpercentile(ar, q=50, axis=1)) + assert_equal(np.nanquantile(ar, q=[0.5], axis=1), + np.nanpercentile(ar, q=[50], axis=1)) + assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1), + np.nanpercentile(ar, q=[25, 50, 75], axis=1)) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.nanquantile(x, 0), 0.) + assert_equal(np.nanquantile(x, 1), 3.5) + assert_equal(np.nanquantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, beause quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index 4dabaa093..8ef153c15 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -3817,7 +3817,7 @@ cdef class RandomState: Draw samples from a negative binomial distribution. Samples are drawn from a negative binomial distribution with specified - parameters, `n` trials and `p` probability of success where `n` is an + parameters, `n` successes and `p` probability of success where `n` is an integer > 0 and `p` is in the interval [0, 1]. Parameters @@ -3837,21 +3837,19 @@ cdef class RandomState: ------- out : ndarray or scalar Drawn samples from the parameterized negative binomial distribution, - where each sample is equal to N, the number of trials it took to - achieve n - 1 successes, N - (n - 1) failures, and a success on the, - (N + n)th trial. + where each sample is equal to N, the number of failures that + occurred before a total of n successes was reached. Notes ----- The probability density for the negative binomial distribution is - .. math:: P(N;n,p) = \\binom{N+n-1}{n-1}p^{n}(1-p)^{N}, + .. math:: P(N;n,p) = \\binom{N+n-1}{N}p^{n}(1-p)^{N}, - where :math:`n-1` is the number of successes, :math:`p` is the - probability of success, and :math:`N+n-1` is the number of trials. - The negative binomial distribution gives the probability of n-1 - successes and N failures in N+n-1 trials, and success on the (N+n)th - trial. + where :math:`n` is the number of successes, :math:`p` is the + probability of success, and :math:`N+n` is the number of trials. + The negative binomial distribution gives the probability of N + failures given n successes, with a success on the last trial. If one throws a die repeatedly until the third time a "1" appears, then the probability distribution of the number of non-"1"s that diff --git a/numpy/testing/_private/utils.py b/numpy/testing/_private/utils.py index 507ecb1e2..b0c0b0c48 100644 --- a/numpy/testing/_private/utils.py +++ b/numpy/testing/_private/utils.py @@ -7,6 +7,7 @@ from __future__ import division, absolute_import, print_function import os import sys import re +import gc import operator import warnings from functools import partial, wraps @@ -14,6 +15,7 @@ import shutil import contextlib from tempfile import mkdtemp, mkstemp from unittest.case import SkipTest +import pprint from numpy.core import( float32, empty, arange, array_repr, ndarray, isnat, array) @@ -35,7 +37,7 @@ __all__ = [ 'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings', 'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY', 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', - '_assert_valid_refcount', '_gen_alignment_data', + '_assert_valid_refcount', '_gen_alignment_data', 'assert_no_gc_cycles', ] @@ -2272,3 +2274,89 @@ class suppress_warnings(object): return func(*args, **kwargs) return new_func + + +@contextlib.contextmanager +def _assert_no_gc_cycles_context(name=None): + __tracebackhide__ = True # Hide traceback for py.test + + # not meaningful to test if there is no refcounting + if not HAS_REFCOUNT: + return + + assert_(gc.isenabled()) + gc.disable() + gc_debug = gc.get_debug() + try: + for i in range(100): + if gc.collect() == 0: + break + else: + raise RuntimeError( + "Unable to fully collect garbage - perhaps a __del__ method is " + "creating more reference cycles?") + + gc.set_debug(gc.DEBUG_SAVEALL) + yield + # gc.collect returns the number of unreachable objects in cycles that + # were found -- we are checking that no cycles were created in the context + n_objects_in_cycles = gc.collect() + objects_in_cycles = gc.garbage[:] + finally: + del gc.garbage[:] + gc.set_debug(gc_debug) + gc.enable() + + if n_objects_in_cycles: + name_str = " when calling %s" % name if name is not None else "" + raise AssertionError( + "Reference cycles were found{}: {} objects were collected, " + "of which {} are shown below:{}" + .format( + name_str, + n_objects_in_cycles, + len(objects_in_cycles), + ''.join( + "\n {} object with id={}:\n {}".format( + type(o).__name__, + id(o), + pprint.pformat(o).replace('\n', '\n ') + ) for o in objects_in_cycles + ) + ) + ) + + +def assert_no_gc_cycles(*args, **kwargs): + """ + Fail if the given callable produces any reference cycles. + + If called with all arguments omitted, may be used as a context manager: + + with assert_no_gc_cycles(): + do_something() + + .. versionadded:: 1.15.0 + + Parameters + ---------- + func : callable + The callable to test. + \\*args : Arguments + Arguments passed to `func`. + \\*\\*kwargs : Kwargs + Keyword arguments passed to `func`. + + Returns + ------- + Nothing. The result is deliberately discarded to ensure that all cycles + are found. + + """ + if not args: + return _assert_no_gc_cycles_context() + + func = args[0] + args = args[1:] + with _assert_no_gc_cycles_context(name=func.__name__): + func(*args, **kwargs) diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py index 35f81d8a7..0592e62f8 100644 --- a/numpy/testing/tests/test_utils.py +++ b/numpy/testing/tests/test_utils.py @@ -6,6 +6,7 @@ import os import itertools import textwrap import pytest +import weakref import numpy as np from numpy.testing import ( @@ -14,7 +15,7 @@ from numpy.testing import ( assert_raises, assert_warns, assert_no_warnings, assert_allclose, assert_approx_equal, assert_array_almost_equal_nulp, assert_array_max_ulp, clear_and_catch_warnings, suppress_warnings, assert_string_equal, assert_, - tempdir, temppath, + tempdir, temppath, assert_no_gc_cycles, HAS_REFCOUNT ) @@ -1360,3 +1361,76 @@ def test_clear_and_catch_warnings_inherit(): warnings.simplefilter('ignore') warnings.warn('Some warning') assert_equal(my_mod.__warningregistry__, {}) + + +@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts") +class TestAssertNoGcCycles(object): + """ Test assert_no_gc_cycles """ + def test_passes(self): + def no_cycle(): + b = [] + b.append([]) + return b + + with assert_no_gc_cycles(): + no_cycle() + + assert_no_gc_cycles(no_cycle) + + + def test_asserts(self): + def make_cycle(): + a = [] + a.append(a) + a.append(a) + return a + + with assert_raises(AssertionError): + with assert_no_gc_cycles(): + make_cycle() + + with assert_raises(AssertionError): + assert_no_gc_cycles(make_cycle) + + + def test_fails(self): + """ + Test that in cases where the garbage cannot be collected, we raise an + error, instead of hanging forever trying to clear it. + """ + + class ReferenceCycleInDel(object): + """ + An object that not only contains a reference cycle, but creates new + cycles whenever it's garbage-collected and its __del__ runs + """ + make_cycle = True + + def __init__(self): + self.cycle = self + + def __del__(self): + # break the current cycle so that `self` can be freed + self.cycle = None + + if ReferenceCycleInDel.make_cycle: + # but create a new one so that the garbage collector has more + # work to do. + ReferenceCycleInDel() + + try: + w = weakref.ref(ReferenceCycleInDel()) + try: + with assert_raises(RuntimeError): + # this will be unable to get a baseline empty garbage + assert_no_gc_cycles(lambda: None) + except AssertionError: + # the above test is only necessary if the GC actually tried to free + # our object anyway, which python 2.7 does not. + if w() is not None: + pytest.skip("GC does not call __del__ on cyclic objects") + raise + + finally: + # make sure that we stop creating reference cycles + ReferenceCycleInDel.make_cycle = False diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index 78cf405cf..184adcc74 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -25,5 +25,5 @@ __all__ = [ 'assert_allclose', 'IgnoreException', 'clear_and_catch_warnings', 'SkipTest', 'KnownFailureException', 'temppath', 'tempdir', 'IS_PYPY', 'HAS_REFCOUNT', 'suppress_warnings', 'assert_array_compare', - '_assert_valid_refcount', '_gen_alignment_data', + '_assert_valid_refcount', '_gen_alignment_data', 'assert_no_gc_cycles' ] |
