diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/arrayprint.py | 3 | ||||
-rw-r--r-- | numpy/core/include/numpy/ndarrayobject.h | 13 | ||||
-rw-r--r-- | numpy/core/src/multiarray/_multiarray_tests.c.src | 15 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 85 | ||||
-rw-r--r-- | numpy/core/src/multiarray/einsum.c.src | 6 | ||||
-rw-r--r-- | numpy/core/tests/test_arrayprint.py | 2 | ||||
-rw-r--r-- | numpy/core/tests/test_einsum.py | 10 | ||||
-rw-r--r-- | numpy/core/tests/test_multiarray.py | 66 | ||||
-rw-r--r-- | numpy/lib/function_base.py | 110 | ||||
-rw-r--r-- | numpy/lib/histograms.py | 4 | ||||
-rw-r--r-- | numpy/lib/nanfunctions.py | 110 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 22 | ||||
-rw-r--r-- | numpy/lib/tests/test_nanfunctions.py | 36 | ||||
-rw-r--r-- | numpy/random/mtrand/mtrand.pyx | 18 |
14 files changed, 428 insertions, 72 deletions
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py index adbbab6ed..f39248bd0 100644 --- a/numpy/core/arrayprint.py +++ b/numpy/core/arrayprint.py @@ -647,6 +647,9 @@ def array2string(a, max_line_width=None, precision=None, options.update(overrides) if options['legacy'] == '1.13': + if style is np._NoValue: + style = repr + if a.shape == () and not a.dtype.names: return style(a.item()) elif style is not np._NoValue: diff --git a/numpy/core/include/numpy/ndarrayobject.h b/numpy/core/include/numpy/ndarrayobject.h index ec0fd1ee9..12fc7098c 100644 --- a/numpy/core/include/numpy/ndarrayobject.h +++ b/numpy/core/include/numpy/ndarrayobject.h @@ -170,14 +170,17 @@ extern "C" CONFUSE_EMACS (k)*PyArray_STRIDES(obj)[2] + \ (l)*PyArray_STRIDES(obj)[3])) +/* Move to arrayobject.c once PyArray_XDECREF_ERR is removed */ static NPY_INLINE void PyArray_DiscardWritebackIfCopy(PyArrayObject *arr) { - if (arr != NULL) { - if ((PyArray_FLAGS(arr) & NPY_ARRAY_WRITEBACKIFCOPY) || - (PyArray_FLAGS(arr) & NPY_ARRAY_UPDATEIFCOPY)) { - PyArrayObject *base = (PyArrayObject *)PyArray_BASE(arr); - PyArray_ENABLEFLAGS(base, NPY_ARRAY_WRITEABLE); + PyArrayObject_fields *fa = (PyArrayObject_fields *)arr; + if (fa && fa->base) { + if ((fa->flags & NPY_ARRAY_UPDATEIFCOPY) || + (fa->flags & NPY_ARRAY_WRITEBACKIFCOPY)) { + PyArray_ENABLEFLAGS((PyArrayObject*)fa->base, NPY_ARRAY_WRITEABLE); + Py_DECREF(fa->base); + fa->base = NULL; PyArray_CLEARFLAGS(arr, NPY_ARRAY_WRITEBACKIFCOPY); PyArray_CLEARFLAGS(arr, NPY_ARRAY_UPDATEIFCOPY); } diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 38698887a..0299f1a1b 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -687,6 +687,18 @@ npy_resolve(PyObject* NPY_UNUSED(self), PyObject* args) Py_RETURN_NONE; } +/* resolve WRITEBACKIFCOPY */ +static PyObject* +npy_discard(PyObject* NPY_UNUSED(self), PyObject* args) +{ + if (!PyArray_Check(args)) { + PyErr_SetString(PyExc_TypeError, "test needs ndarray input"); + return NULL; + } + PyArray_DiscardWritebackIfCopy((PyArrayObject*)args); + Py_RETURN_NONE; +} + #if !defined(NPY_PY3K) static PyObject * int_subclass(PyObject *dummy, PyObject *args) @@ -1857,6 +1869,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"npy_resolve", npy_resolve, METH_O, NULL}, + {"npy_discard", + npy_discard, + METH_O, NULL}, #if !defined(NPY_PY3K) {"test_int_subclass", int_subclass, diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index 59eb2457c..5d3cee647 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1276,42 +1276,31 @@ PyArray_New(PyTypeObject *subtype, int nd, npy_intp *dims, int type_num, } -NPY_NO_EXPORT int -_array_from_buffer_3118(PyObject *obj, PyObject **out) +/* Steals a reference to the memory view */ +NPY_NO_EXPORT PyObject * +_array_from_buffer_3118(PyObject *memoryview) { /* PEP 3118 */ - PyObject *memoryview; Py_buffer *view; PyArray_Descr *descr = NULL; - PyObject *r; - int nd, flags, k; + PyObject *r = NULL; + int nd, flags; Py_ssize_t d; npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS]; - memoryview = PyMemoryView_FromObject(obj); - if (memoryview == NULL) { - PyErr_Clear(); - return -1; - } - view = PyMemoryView_GET_BUFFER(memoryview); if (view->format != NULL) { descr = _descriptor_from_pep3118_format(view->format); if (descr == NULL) { - PyObject *msg; - msg = PyBytes_FromFormat("Invalid PEP 3118 format string: '%s'", - view->format); - PyErr_WarnEx(PyExc_RuntimeWarning, PyBytes_AS_STRING(msg), 0); - Py_DECREF(msg); goto fail; } /* Sanity check */ if (descr->elsize != view->itemsize) { - PyErr_WarnEx(PyExc_RuntimeWarning, - "Item size computed from the PEP 3118 buffer format " - "string does not match the actual item size.", - 0); + PyErr_SetString( + PyExc_RuntimeError, + "Item size computed from the PEP 3118 buffer format " + "string does not match the actual item size."); goto fail; } } @@ -1322,13 +1311,13 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) nd = view->ndim; if (view->shape != NULL) { - if (nd >= NPY_MAXDIMS || nd < 0) { + int k; + if (nd > NPY_MAXDIMS || nd < 0) { + PyErr_Format(PyExc_RuntimeError, + "PEP3118 dimensions do not satisfy 0 <= ndim <= NPY_MAXDIMS"); goto fail; } for (k = 0; k < nd; ++k) { - if (k >= NPY_MAXDIMS) { - goto fail; - } shape[k] = view->shape[k]; } if (view->strides != NULL) { @@ -1352,10 +1341,9 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) strides[0] = view->itemsize; } else if (nd > 1) { - PyErr_WarnEx(PyExc_RuntimeWarning, - "ndim computed from the PEP 3118 buffer format " - "is greater than 1, but shape is NULL.", - 0); + PyErr_SetString(PyExc_RuntimeError, + "ndim computed from the PEP 3118 buffer format " + "is greater than 1, but shape is NULL."); goto fail; } } @@ -1364,21 +1352,21 @@ _array_from_buffer_3118(PyObject *obj, PyObject **out) r = PyArray_NewFromDescr(&PyArray_Type, descr, nd, shape, strides, view->buf, flags, NULL); - if (r == NULL || - PyArray_SetBaseObject((PyArrayObject *)r, memoryview) < 0) { - Py_XDECREF(r); - Py_DECREF(memoryview); - return -1; + if (r == NULL) { + goto fail; + } + if (PyArray_SetBaseObject((PyArrayObject *)r, memoryview) < 0) { + goto fail; } PyArray_UpdateFlags((PyArrayObject *)r, NPY_ARRAY_UPDATE_ALL); - *out = r; - return 0; + return r; fail: + Py_XDECREF(r); Py_XDECREF(descr); Py_DECREF(memoryview); - return -1; + return NULL; } @@ -1490,14 +1478,25 @@ PyArray_GetArrayParamsFromObject(PyObject *op, } /* If op supports the PEP 3118 buffer interface */ - if (!PyBytes_Check(op) && !PyUnicode_Check(op) && - _array_from_buffer_3118(op, (PyObject **)out_arr) == 0) { - if (writeable - && PyArray_FailUnlessWriteable(*out_arr, "PEP 3118 buffer") < 0) { - Py_DECREF(*out_arr); - return -1; + if (!PyBytes_Check(op) && !PyUnicode_Check(op)) { + + PyObject *memoryview = PyMemoryView_FromObject(op); + if (memoryview == NULL) { + PyErr_Clear(); + } + else { + PyObject *arr = _array_from_buffer_3118(memoryview); + if (arr == NULL) { + return -1; + } + if (writeable + && PyArray_FailUnlessWriteable((PyArrayObject *)arr, "PEP 3118 buffer") < 0) { + Py_DECREF(arr); + return -1; + } + *out_arr = (PyArrayObject *)arr; + return 0; } - return (*out_arr) == NULL ? -1 : 0; } /* If op supports the __array_struct__ or __array_interface__ interface */ diff --git a/numpy/core/src/multiarray/einsum.c.src b/numpy/core/src/multiarray/einsum.c.src index 5dbc30aa9..470a5fff9 100644 --- a/numpy/core/src/multiarray/einsum.c.src +++ b/numpy/core/src/multiarray/einsum.c.src @@ -591,7 +591,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]) * @from@(data1[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(accum); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum); return; } @@ -749,7 +749,7 @@ finish_after_unrolled_loop: accum += @from@(data1[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(value0 * accum); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + value0 * accum); return; } @@ -848,7 +848,7 @@ finish_after_unrolled_loop: accum += @from@(data0[@i@]); /**end repeat2**/ case 0: - *(@type@ *)dataptr[2] += @to@(accum * value1); + *(@type@ *)dataptr[2] = @to@(@from@(*(@type@ *)dataptr[2]) + accum * value1); return; } diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py index 2c142f82b..6214e325c 100644 --- a/numpy/core/tests/test_arrayprint.py +++ b/numpy/core/tests/test_arrayprint.py @@ -491,6 +491,8 @@ class TestPrintOptions(object): np.array(1.), style=repr) # but not in legacy mode np.array2string(np.array(1.), style=repr, legacy='1.13') + # gh-10934 style was broken in legacy mode, check it works + np.array2string(np.array(1.), legacy='1.13') def test_float_spacing(self): x = np.array([1., 2., 3.]) diff --git a/numpy/core/tests/test_einsum.py b/numpy/core/tests/test_einsum.py index 792b9e0a2..104dd1986 100644 --- a/numpy/core/tests/test_einsum.py +++ b/numpy/core/tests/test_einsum.py @@ -502,6 +502,16 @@ class TestEinSum(object): optimize=optimize), np.full((1, 5), 5)) + # Cases which were failing (gh-10899) + x = np.eye(2, dtype=dtype) + y = np.ones(2, dtype=dtype) + assert_array_equal(np.einsum("ji,i->", x, y, optimize=optimize), + [2.]) # contig_contig_outstride0_two + assert_array_equal(np.einsum("i,ij->", y, x, optimize=optimize), + [2.]) # stride0_contig_outstride0_two + assert_array_equal(np.einsum("ij,i->", x, y, optimize=optimize), + [2.]) # contig_stride0_outstride0_two + def test_einsum_sums_int8(self): self.check_einsum_sums('i1') diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 806a3b083..3c5f90cfc 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -6204,7 +6204,10 @@ class TestPEP3118Dtype(object): self._check('i', 'i') self._check('i:f0:', [('f0', 'i')]) + class TestNewBufferProtocol(object): + """ Test PEP3118 buffers """ + def _check_roundtrip(self, obj): obj = np.asarray(obj) x = memoryview(obj) @@ -6515,6 +6518,35 @@ class TestNewBufferProtocol(object): with assert_raises(ValueError): memoryview(arr) + def test_max_dims(self): + a = np.empty((1,) * 32) + self._check_roundtrip(a) + + def test_error_too_many_dims(self): + def make_ctype(shape, scalar_type): + t = scalar_type + for dim in shape[::-1]: + t = dim * t + return t + + # construct a memoryview with 33 dimensions + c_u8_33d = make_ctype((1,)*33, ctypes.c_uint8) + m = memoryview(c_u8_33d()) + assert_equal(m.ndim, 33) + + assert_raises_regex( + RuntimeError, "ndim", + np.array, m) + + def test_error_pointer_type(self): + # gh-6741 + m = memoryview(ctypes.pointer(ctypes.c_uint8())) + assert_('&' in m.format) + + assert_raises_regex( + ValueError, "format string", + np.array, m) + class TestArrayAttributeDeletion(object): @@ -7246,16 +7278,20 @@ class TestWritebackIfCopy(object): def test_view_assign(self): from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_resolve + arr = np.arange(9).reshape(3, 3).T arr_wb = npy_create_writebackifcopy(arr) assert_(arr_wb.flags.writebackifcopy) assert_(arr_wb.base is arr) - arr_wb[:] = -100 + arr_wb[...] = -100 npy_resolve(arr_wb) + # arr changes after resolve, even though we assigned to arr_wb assert_equal(arr, -100) # after resolve, the two arrays no longer reference each other - assert_(not arr_wb.ctypes.data == 0) - arr_wb[:] = 100 + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + # assigning to arr_wb does not get transfered to arr + arr_wb[...] = 100 assert_equal(arr, -100) def test_dealloc_warning(self): @@ -7266,6 +7302,30 @@ class TestWritebackIfCopy(object): _multiarray_tests.npy_abuse_writebackifcopy(v) assert len(sup.log) == 1 + def test_view_discard_refcount(self): + from numpy.core._multiarray_tests import npy_create_writebackifcopy, npy_discard + + arr = np.arange(9).reshape(3, 3).T + orig = arr.copy() + if HAS_REFCOUNT: + arr_cnt = sys.getrefcount(arr) + arr_wb = npy_create_writebackifcopy(arr) + assert_(arr_wb.flags.writebackifcopy) + assert_(arr_wb.base is arr) + arr_wb[...] = -100 + npy_discard(arr_wb) + # arr remains unchanged after discard + assert_equal(arr, orig) + # after discard, the two arrays no longer reference each other + assert_(arr_wb.ctypes.data != 0) + assert_equal(arr_wb.base, None) + if HAS_REFCOUNT: + assert_equal(arr_cnt, sys.getrefcount(arr)) + # assigning to arr_wb does not get transfered to arr + arr_wb[...] = 100 + assert_equal(arr, orig) + + class TestArange(object): def test_infinite(self): assert_raises_regex( diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 099b63c40..72beef471 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -54,7 +54,8 @@ __all__ = [ 'bincount', 'digitize', 'cov', 'corrcoef', 'msort', 'median', 'sinc', 'hamming', 'hanning', 'bartlett', 'blackman', 'kaiser', 'trapz', 'i0', 'add_newdoc', 'add_docstring', - 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc' + 'meshgrid', 'delete', 'insert', 'append', 'interp', 'add_newdoc_ufunc', + 'quantile' ] @@ -3427,7 +3428,7 @@ def percentile(a, q, axis=None, out=None, interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -3463,6 +3464,7 @@ def percentile(a, q, axis=None, out=None, mean median : equivalent to ``percentile(..., 50)`` nanpercentile + quantile : equivalent to percentile, except with q in the range [0, 1]. Notes ----- @@ -3539,6 +3541,110 @@ def percentile(a, q, axis=None, out=None, a, q, axis, out, overwrite_input, interpolation, keepdims) +def quantile(a, q, axis=None, out=None, + overwrite_input=False, interpolation='linear', keepdims=False): + """ + Compute the `q`th quantile of the data along the specified axis. + ..versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + mean + percentile : equivalent to quantile, but with q in the range [0, 100]. + median : equivalent to ``quantile(..., 0.5)`` + nanquantile + + Notes + ----- + Given a vector ``V`` of length ``N``, the ``q``-th quantile of + ``V`` is the value ``q`` of the way from the minimum to the + maximum in a sorted copy of ``V``. The values and distances of + the two nearest neighbors as well as the `interpolation` parameter + will determine the quantile if the normalized ranking does not + match the location of ``q`` exactly. This function is the same as + the median if ``q=0.5``, the same as the minimum if ``q=0.0`` and the + same as the maximum if ``q=1.0``. + + Examples + -------- + >>> a = np.array([[10, 7, 4], [3, 2, 1]]) + >>> a + array([[10, 7, 4], + [ 3, 2, 1]]) + >>> np.quantile(a, 0.5) + 3.5 + >>> np.quantile(a, 0.5, axis=0) + array([[ 6.5, 4.5, 2.5]]) + >>> np.quantile(a, 0.5, axis=1) + array([ 7., 2.]) + >>> np.quantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.quantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.quantile(a, 0.5, axis=0, out=out) + array([[ 6.5, 4.5, 2.5]]) + >>> m + array([[ 6.5, 4.5, 2.5]]) + >>> b = a.copy() + >>> np.quantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a == b) + """ + q = np.asanyarray(q) + if not _quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _quantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _quantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index d2a398a0a..90e19769e 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -911,10 +911,6 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = np.diff(edges[i]) - # Handle empty input. - if N == 0: - return np.zeros(nbin-2), edges - # Compute the bin number each sample falls into. Ncount = tuple( np.digitize(sample[:, i], edges[i]) diff --git a/numpy/lib/nanfunctions.py b/numpy/lib/nanfunctions.py index dddc0e5b8..abd2da1a2 100644 --- a/numpy/lib/nanfunctions.py +++ b/numpy/lib/nanfunctions.py @@ -16,6 +16,7 @@ Functions - `nanvar` -- variance of non-NaN values - `nanstd` -- standard deviation of non-NaN values - `nanmedian` -- median of non-NaN values +- `nanquantile` -- qth quantile of non-NaN values - `nanpercentile` -- qth percentile of non-NaN values """ @@ -29,7 +30,7 @@ from numpy.lib import function_base __all__ = [ 'nansum', 'nanmax', 'nanmin', 'nanargmax', 'nanargmin', 'nanmean', 'nanmedian', 'nanpercentile', 'nanvar', 'nanstd', 'nanprod', - 'nancumsum', 'nancumprod' + 'nancumsum', 'nancumprod', 'nanquantile' ] @@ -1057,7 +1058,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, `a` after this function completes is undefined. interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} This optional parameter specifies the interpolation method to - use when the desired quantile lies between two data points + use when the desired percentile lies between two data points ``i < j``: * 'linear': ``i + (j - i) * fraction``, where ``fraction`` @@ -1095,6 +1096,7 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, nanmean nanmedian : equivalent to ``nanpercentile(..., 50)`` percentile, median, mean + nanquantile : equivalent to nanpercentile, but with q in the range [0, 1]. Notes ----- @@ -1144,6 +1146,110 @@ def nanpercentile(a, q, axis=None, out=None, overwrite_input=False, a, q, axis, out, overwrite_input, interpolation, keepdims) +def nanquantile(a, q, axis=None, out=None, overwrite_input=False, + interpolation='linear', keepdims=np._NoValue): + """ + Compute the qth quantile of the data along the specified axis, + while ignoring nan values. + Returns the qth quantile(s) of the array elements. + .. versionadded:: 1.15.0 + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, containing + nan values to be ignored + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The + default is to compute the quantile(s) along a flattened + version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by intermediate + calculations, to save memory. In this case, the contents of the input + `a` after this function completes is undefined. + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} + This optional parameter specifies the interpolation method to + use when the desired quantile lies between two data points + ``i < j``: + * linear: ``i + (j - i) * fraction``, where ``fraction`` + is the fractional part of the index surrounded by ``i`` + and ``j``. + * lower: ``i``. + * higher: ``j``. + * nearest: ``i`` or ``j``, whichever is nearest. + * midpoint: ``(i + j) / 2``. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + If this is anything but the default value it will be passed + through (in the special case of an empty array) to the + `mean` function of the underlying array. If the array is + a sub-class and `mean` does not have the kwarg `keepdims` this + will raise a RuntimeError. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + quantile + nanmean, nanmedian + nanmedian : equivalent to ``nanquantile(..., 0.5)`` + nanpercentile : same as nanquantile, but with q in the range [0, 100]. + + Examples + -------- + >>> a = np.array([[10., 7., 4.], [3., 2., 1.]]) + >>> a[0][1] = np.nan + >>> a + array([[ 10., nan, 4.], + [ 3., 2., 1.]]) + >>> np.quantile(a, 0.5) + nan + >>> np.nanquantile(a, 0.5) + 3.5 + >>> np.nanquantile(a, 0.5, axis=0) + array([ 6.5, 2., 2.5]) + >>> np.nanquantile(a, 0.5, axis=1, keepdims=True) + array([[ 7.], + [ 2.]]) + >>> m = np.nanquantile(a, 0.5, axis=0) + >>> out = np.zeros_like(m) + >>> np.nanquantile(a, 0.5, axis=0, out=out) + array([ 6.5, 2., 2.5]) + >>> m + array([ 6.5, 2. , 2.5]) + >>> b = a.copy() + >>> np.nanquantile(b, 0.5, axis=1, overwrite_input=True) + array([ 7., 2.]) + >>> assert not np.all(a==b) + """ + a = np.asanyarray(a) + q = np.asanyarray(q) + if not function_base._quantile_is_valid(q): + raise ValueError("Quantiles must be in the range [0, 1]") + return _nanquantile_unchecked( + a, q, axis, out, overwrite_input, interpolation, keepdims) + + def _nanquantile_unchecked(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=np._NoValue): """Assumes that q is in [0, 1], and is an ndarray""" diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 6653b5ba1..43d62a7ff 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -2749,6 +2749,28 @@ class TestPercentile(object): a, [0.3, 0.6], (0, 2), interpolation='nearest'), b) +class TestQuantile(object): + # most of this is already tested by TestPercentile + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.quantile(x, 0), 0.) + assert_equal(np.quantile(x, 1), 3.5) + assert_equal(np.quantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, beause quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.quantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + class TestMedian(object): def test_basic(self): diff --git a/numpy/lib/tests/test_nanfunctions.py b/numpy/lib/tests/test_nanfunctions.py index 1f403f7b8..e69d9dd7d 100644 --- a/numpy/lib/tests/test_nanfunctions.py +++ b/numpy/lib/tests/test_nanfunctions.py @@ -886,3 +886,39 @@ class TestNanFunctions_Percentile(object): megamat = np.ones((3, 4, 5, 6)) assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)) + + +class TestNanFunctions_Quantile(object): + # most of this is already tested by TestPercentile + + def test_regression(self): + ar = np.arange(24).reshape(2, 3, 4).astype(float) + ar[0][1] = np.nan + + assert_equal(np.nanquantile(ar, q=0.5), np.nanpercentile(ar, q=50)) + assert_equal(np.nanquantile(ar, q=0.5, axis=0), + np.nanpercentile(ar, q=50, axis=0)) + assert_equal(np.nanquantile(ar, q=0.5, axis=1), + np.nanpercentile(ar, q=50, axis=1)) + assert_equal(np.nanquantile(ar, q=[0.5], axis=1), + np.nanpercentile(ar, q=[50], axis=1)) + assert_equal(np.nanquantile(ar, q=[0.25, 0.5, 0.75], axis=1), + np.nanpercentile(ar, q=[25, 50, 75], axis=1)) + + def test_basic(self): + x = np.arange(8) * 0.5 + assert_equal(np.nanquantile(x, 0), 0.) + assert_equal(np.nanquantile(x, 1), 3.5) + assert_equal(np.nanquantile(x, 0.5), 1.75) + + def test_no_p_overwrite(self): + # this is worth retesting, beause quantile does not make a copy + p0 = np.array([0, 0.75, 0.25, 0.5, 1.0]) + p = p0.copy() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) + + p0 = p0.tolist() + p = p.tolist() + np.nanquantile(np.arange(100.), p, interpolation="midpoint") + assert_array_equal(p, p0) diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index 4dabaa093..8ef153c15 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -3817,7 +3817,7 @@ cdef class RandomState: Draw samples from a negative binomial distribution. Samples are drawn from a negative binomial distribution with specified - parameters, `n` trials and `p` probability of success where `n` is an + parameters, `n` successes and `p` probability of success where `n` is an integer > 0 and `p` is in the interval [0, 1]. Parameters @@ -3837,21 +3837,19 @@ cdef class RandomState: ------- out : ndarray or scalar Drawn samples from the parameterized negative binomial distribution, - where each sample is equal to N, the number of trials it took to - achieve n - 1 successes, N - (n - 1) failures, and a success on the, - (N + n)th trial. + where each sample is equal to N, the number of failures that + occurred before a total of n successes was reached. Notes ----- The probability density for the negative binomial distribution is - .. math:: P(N;n,p) = \\binom{N+n-1}{n-1}p^{n}(1-p)^{N}, + .. math:: P(N;n,p) = \\binom{N+n-1}{N}p^{n}(1-p)^{N}, - where :math:`n-1` is the number of successes, :math:`p` is the - probability of success, and :math:`N+n-1` is the number of trials. - The negative binomial distribution gives the probability of n-1 - successes and N failures in N+n-1 trials, and success on the (N+n)th - trial. + where :math:`n` is the number of successes, :math:`p` is the + probability of success, and :math:`N+n` is the number of trials. + The negative binomial distribution gives the probability of N + failures given n successes, with a success on the last trial. If one throws a die repeatedly until the third time a "1" appears, then the probability distribution of the number of non-"1"s that |