summaryrefslogtreecommitdiff
path: root/numpy/lib/histograms.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/histograms.py')
-rw-r--r--numpy/lib/histograms.py106
1 files changed, 24 insertions, 82 deletions
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 1a9b41ced..35745e6dd 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -506,8 +506,8 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
with non-normal datasets.
'scott'
- Less robust estimator that that takes into account data
- variability and data size.
+ Less robust estimator that takes into account data variability
+ and data size.
'stone'
Estimator based on leave-one-out cross-validation estimate of
@@ -562,7 +562,8 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
below, :math:`h` is the binwidth and :math:`n_h` is the number of
bins. All estimators that compute bin counts are recast to bin width
using the `ptp` of the data. The final bin count is obtained from
- ``np.round(np.ceil(range / h))``.
+ ``np.round(np.ceil(range / h))``. The final bin width is often less
+ than what is returned by the estimators below.
'auto' (maximum of the 'sturges' and 'fd' estimators)
A compromise to get a good value. For small datasets the Sturges
@@ -580,7 +581,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
datasets. The IQR is very robust to outliers.
'scott'
- .. math:: h = \sigma \sqrt[3]{\frac{24 * \sqrt{\pi}}{n}}
+ .. math:: h = \sigma \sqrt[3]{\frac{24 \sqrt{\pi}}{n}}
The binwidth is proportional to the standard deviation of the
data and inversely proportional to cube root of ``x.size``. Can
@@ -597,7 +598,7 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
does not take into account data variability.
'sturges'
- .. math:: n_h = \log _{2}n+1
+ .. math:: n_h = \log _{2}(n) + 1
The number of bins is the base 2 log of ``a.size``. This
estimator assumes normality of data and is too conservative for
@@ -606,9 +607,9 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
'doane'
.. math:: n_h = 1 + \log_{2}(n) +
- \log_{2}(1 + \frac{|g_1|}{\sigma_{g_1}})
+ \log_{2}\left(1 + \frac{|g_1|}{\sigma_{g_1}}\right)
- g_1 = mean[(\frac{x - \mu}{\sigma})^3]
+ g_1 = mean\left[\left(\frac{x - \mu}{\sigma}\right)^3\right]
\sigma_{g_1} = \sqrt{\frac{6(n - 2)}{(n + 1)(n + 3)}}
@@ -670,15 +671,14 @@ def histogram_bin_edges(a, bins=10, range=None, weights=None):
def _histogram_dispatcher(
- a, bins=None, range=None, normed=None, weights=None, density=None):
+ a, bins=None, range=None, density=None, weights=None):
return (a, bins, weights)
@array_function_dispatch(_histogram_dispatcher)
-def histogram(a, bins=10, range=None, normed=None, weights=None,
- density=None):
+def histogram(a, bins=10, range=None, density=None, weights=None):
r"""
- Compute the histogram of a set of data.
+ Compute the histogram of a dataset.
Parameters
----------
@@ -703,16 +703,6 @@ def histogram(a, bins=10, range=None, normed=None, weights=None,
computation as well. While bin width is computed to be optimal
based on the actual data within `range`, the bin count will fill
the entire range including portions containing no data.
- normed : bool, optional
-
- .. deprecated:: 1.6.0
-
- This is equivalent to the `density` argument, but produces incorrect
- results for unequal bin widths. It should not be used.
-
- .. versionchanged:: 1.15.0
- DeprecationWarnings are actually emitted.
-
weights : array_like, optional
An array of weights, of the same shape as `a`. Each value in
`a` only contributes its associated weight towards the bin count
@@ -727,8 +717,6 @@ def histogram(a, bins=10, range=None, normed=None, weights=None,
histogram values will not be equal to 1 unless bins of unity
width are chosen; it is not a probability *mass* function.
- Overrides the ``normed`` keyword if given.
-
Returns
-------
hist : array
@@ -890,46 +878,15 @@ def histogram(a, bins=10, range=None, normed=None, weights=None,
n = np.diff(cum_n)
- # density overrides the normed keyword
- if density is not None:
- if normed is not None:
- # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
- warnings.warn(
- "The normed argument is ignored when density is provided. "
- "In future passing both will result in an error.",
- DeprecationWarning, stacklevel=3)
- normed = None
-
if density:
db = np.array(np.diff(bin_edges), float)
return n/db/n.sum(), bin_edges
- elif normed:
- # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
- warnings.warn(
- "Passing `normed=True` on non-uniform bins has always been "
- "broken, and computes neither the probability density "
- "function nor the probability mass function. "
- "The result is only correct if the bins are uniform, when "
- "density=True will produce the same result anyway. "
- "The argument will be removed in a future version of "
- "numpy.",
- np.VisibleDeprecationWarning, stacklevel=3)
-
- # this normalization is incorrect, but
- db = np.array(np.diff(bin_edges), float)
- return n/(n*db).sum(), bin_edges
- else:
- if normed is not None:
- # 2018-06-13, numpy 1.15.0 (this was not noisily deprecated in 1.6)
- warnings.warn(
- "Passing normed=False is deprecated, and has no effect. "
- "Consider passing the density argument instead.",
- DeprecationWarning, stacklevel=3)
- return n, bin_edges
+
+ return n, bin_edges
-def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None,
- weights=None, density=None):
+def _histogramdd_dispatcher(sample, bins=None, range=None, density=None,
+ weights=None):
if hasattr(sample, 'shape'): # same condition as used in histogramdd
yield sample
else:
@@ -940,14 +897,13 @@ def _histogramdd_dispatcher(sample, bins=None, range=None, normed=None,
@array_function_dispatch(_histogramdd_dispatcher)
-def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
- density=None):
+def histogramdd(sample, bins=10, range=None, density=None, weights=None):
"""
Compute the multidimensional histogram of some data.
Parameters
----------
- sample : (N, D) array, or (D, N) array_like
+ sample : (N, D) array, or (N, D) array_like
The data to be histogrammed.
Note the unusual interpretation of sample when an array_like:
@@ -978,20 +934,16 @@ def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
If False, the default, returns the number of samples in each bin.
If True, returns the probability *density* function at the bin,
``bin_count / sample_count / bin_volume``.
- normed : bool, optional
- An alias for the density argument that behaves identically. To avoid
- confusion with the broken normed argument to `histogram`, `density`
- should be preferred.
weights : (N,) array_like, optional
An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`.
- Weights are normalized to 1 if normed is True. If normed is False,
+ Weights are normalized to 1 if density is True. If density is False,
the values of the returned histogram are equal to the sum of the
weights belonging to the samples falling into each bin.
Returns
-------
H : ndarray
- The multidimensional histogram of sample x. See normed and weights
+ The multidimensional histogram of sample x. See density and weights
for the different possible semantics.
edges : list
A list of D arrays describing the bin edges for each dimension.
@@ -1018,7 +970,7 @@ def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
sample = np.atleast_2d(sample).T
N, D = sample.shape
- nbin = np.empty(D, int)
+ nbin = np.empty(D, np.intp)
edges = D*[None]
dedges = D*[None]
if weights is not None:
@@ -1029,7 +981,7 @@ def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
if M != D:
raise ValueError(
'The dimension of bins must be equal to the dimension of the '
- ' sample x.')
+ 'sample x.')
except TypeError:
# bins is an integer
bins = D*[bins]
@@ -1049,13 +1001,13 @@ def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
smin, smax = _get_outer_edges(sample[:,i], range[i])
try:
n = operator.index(bins[i])
-
+
except TypeError as e:
raise TypeError(
"`bins[{}]` must be an integer, when a scalar".format(i)
) from e
-
- edges[i] = np.linspace(smin, smax, n + 1)
+
+ edges[i] = np.linspace(smin, smax, n + 1)
elif np.ndim(bins[i]) == 1:
edges[i] = np.asarray(bins[i])
if np.any(edges[i][:-1] > edges[i][1:]):
@@ -1103,16 +1055,6 @@ def histogramdd(sample, bins=10, range=None, normed=None, weights=None,
core = D*(slice(1, -1),)
hist = hist[core]
- # handle the aliasing normed argument
- if normed is None:
- if density is None:
- density = False
- elif density is None:
- # an explicit normed argument was passed, alias it to the new name
- density = normed
- else:
- raise TypeError("Cannot specify both 'normed' and 'density'")
-
if density:
# calculate the probability density function
s = hist.sum()