diff options
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r-- | numpy/lib/histograms.py | 77 |
1 files changed, 58 insertions, 19 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py index 2922b3a86..422b356f7 100644 --- a/numpy/lib/histograms.py +++ b/numpy/lib/histograms.py @@ -4,6 +4,7 @@ Histogram-related functions from __future__ import division, absolute_import, print_function import operator +import warnings import numpy as np from numpy.compat.py3k import basestring @@ -559,7 +560,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None): return bin_edges -def histogram(a, bins=10, range=None, normed=False, weights=None, +def histogram(a, bins=10, range=None, normed=None, weights=None, density=None): r""" Compute the histogram of a set of data. @@ -571,8 +572,8 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, bins : int or sequence of scalars or str, optional If `bins` is an int, it defines the number of equal-width bins in the given range (10, by default). If `bins` is a - sequence, it defines the bin edges, including the rightmost - edge, allowing for non-uniform bin widths. + sequence, it defines a monotonically increasing array of bin edges, + including the rightmost edge, allowing for non-uniform bin widths. .. versionadded:: 1.11.0 @@ -591,14 +592,12 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, .. deprecated:: 1.6.0 - This keyword is deprecated in NumPy 1.6.0 due to confusing/buggy - behavior. It will be removed in NumPy 2.0.0. Use the ``density`` - keyword instead. If ``False``, the result will contain the - number of samples in each bin. If ``True``, the result is the - value of the probability *density* function at the bin, - normalized such that the *integral* over the range is 1. Note - that this latter behavior is known to be buggy with unequal bin - widths; use ``density`` instead. + This is equivalent to the `density` argument, but produces incorrect + results for unequal bin widths. It should not be used. + + .. versionchanged:: 1.15.0 + DeprecationWarnings are actually emitted. + weights : array_like, optional An array of weights, of the same shape as `a`. Each value in `a` only contributes its associated weight towards the bin count @@ -777,20 +776,44 @@ def histogram(a, bins=10, range=None, normed=False, weights=None, # density overrides the normed keyword if density is not None: - normed = False + if normed is not None: + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "The normed argument is ignored when density is provided. " + "In future passing both will result in an error.", + DeprecationWarning, stacklevel=2) + normed = None if density: db = np.array(np.diff(bin_edges), float) return n/db/n.sum(), bin_edges elif normed: - # deprecated, buggy behavior. Remove for NumPy 2.0.0 + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "Passing `normed=True` on non-uniform bins has always been " + "broken, and computes neither the probability density " + "function nor the probability mass function. " + "The result is only correct if the bins are uniform, when " + "density=True will produce the same result anyway. " + "The argument will be removed in a future version of " + "numpy.", + np.VisibleDeprecationWarning, stacklevel=2) + + # this normalization is incorrect, but db = np.array(np.diff(bin_edges), float) return n/(n*db).sum(), bin_edges else: + if normed is not None: + # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6) + warnings.warn( + "Passing normed=False is deprecated, and has no effect. " + "Consider passing the density argument instead.", + DeprecationWarning, stacklevel=2) return n, bin_edges -def histogramdd(sample, bins=10, range=None, normed=False, weights=None): +def histogramdd(sample, bins=10, range=None, normed=None, weights=None, + density=None): """ Compute the multidimensional histogram of some data. @@ -811,7 +834,8 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): bins : sequence or int, optional The bin specification: - * A sequence of arrays describing the bin edges along each dimension. + * A sequence of arrays describing the monotonically increasing bin + edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). @@ -822,9 +846,14 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): An entry of None in the sequence results in the minimum and maximum values being used for the corresponding dimension. The default, None, is equivalent to passing a tuple of D None values. + density : bool, optional + If False, the default, returns the number of samples in each bin. + If True, returns the probability *density* function at the bin, + ``bin_count / sample_count / bin_volume``. normed : bool, optional - If False, returns the number of samples in each bin. If True, - returns the bin density ``bin_count / sample_count / bin_volume``. + An alias for the density argument that behaves identically. To avoid + confusion with the broken normed argument to `histogram`, `density` + should be preferred. weights : (N,) array_like, optional An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. Weights are normalized to 1 if normed is True. If normed is False, @@ -938,8 +967,18 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): core = D*(slice(1, -1),) hist = hist[core] - # Normalize if normed is True - if normed: + # handle the aliasing normed argument + if normed is None: + if density is None: + density = False + elif density is None: + # an explicit normed argument was passed, alias it to the new name + density = normed + else: + raise TypeError("Cannot specify both 'normed' and 'density'") + + if density: + # calculate the probability density function s = hist.sum() for i in _range(D): shape = np.ones(D, int) |