summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2017-12-23 11:47:26 +0000
committerEric Wieser <wieser.eric@gmail.com>2017-12-24 22:32:05 +0000
commitf0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe (patch)
treeb792cb73d4aa87560fbefb4bb8e442b0bba74e5c
parentd45cf0b261b4a563f44fa238d34bd55f051f315f (diff)
downloadnumpy-f0ffd58e7dbe8f4b111b0c090e8df0c3b779e1fe.tar.gz
BUG: Allow nan values in the data when the bins are explicit
Fixes gh-7503 Closes gh-8984
-rw-r--r--doc/release/1.15.0-notes.rst10
-rw-r--r--numpy/lib/histograms.py6
-rw-r--r--numpy/lib/tests/test_histograms.py25
3 files changed, 39 insertions, 2 deletions
diff --git a/doc/release/1.15.0-notes.rst b/doc/release/1.15.0-notes.rst
index 60720deda..a24156ea0 100644
--- a/doc/release/1.15.0-notes.rst
+++ b/doc/release/1.15.0-notes.rst
@@ -64,6 +64,16 @@ to maintain compatibility, aliased at ``np.lib.function_base.histogram(dd)``.
Code that does ``from np.lib.function_base import *`` will need to be updated
with the new location, and should consider not using ``import *`` in future.
+``histogram`` will accept NaN values when explicit bins are given
+-----------------------------------------------------------------
+Previously it would fail when trying to compute a finite range for the data.
+Since the range is ignored anyway when the bins are given explcitly, this error
+was needless.
+
+Note that calling `histogram` on NaN values continues to raise the
+`RuntimeWarning`s typical of working with nan values, which can be silenced
+as usual with `errstate`.
+
``np.r_`` works with 0d arrays, and ``np.ma.mr_` works with ``np.ma.masked``
----------------------------------------------------------------------------
0d arrays passed to the `r_` and `mr_` concatenation helpers are now treated as
diff --git a/numpy/lib/histograms.py b/numpy/lib/histograms.py
index 9c4ba8efc..ccae9de22 100644
--- a/numpy/lib/histograms.py
+++ b/numpy/lib/histograms.py
@@ -259,8 +259,6 @@ def _get_bin_edges(a, bins, range, weights):
The upper bound, lowerbound, and number of bins, used in the optimized
implementation of `histogram` that works on uniform bins.
"""
- first_edge, last_edge = _get_outer_edges(a, range)
-
# parse the overloaded bins argument
n_equal_bins = None
bin_edges = None
@@ -276,6 +274,8 @@ def _get_bin_edges(a, bins, range, weights):
raise TypeError("Automated estimation of the number of "
"bins is not supported for weighted data")
+ first_edge, last_edge = _get_outer_edges(a, range)
+
# truncate the range if needed
if range is not None:
keep = (a >= first_edge)
@@ -304,6 +304,8 @@ def _get_bin_edges(a, bins, range, weights):
if n_equal_bins < 1:
raise ValueError('`bins` must be positive, when an integer')
+ first_edge, last_edge = _get_outer_edges(a, range)
+
elif np.ndim(bins) == 1:
bin_edges = np.asarray(bins)
if np.any(bin_edges[:-1] > bin_edges[1:]):
diff --git a/numpy/lib/tests/test_histograms.py b/numpy/lib/tests/test_histograms.py
index 8319041b8..0986ad16b 100644
--- a/numpy/lib/tests/test_histograms.py
+++ b/numpy/lib/tests/test_histograms.py
@@ -249,6 +249,31 @@ class TestHistogram(object):
np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
np.histogram([np.array([0.5]) for i in range(10)] + [.5])
+ def test_some_nan_values(self):
+ # gh-7503
+ one_nan = np.array([0, 1, np.nan])
+ all_nan = np.array([np.nan, np.nan])
+
+ # the internal commparisons with NaN give warnings
+ sup = suppress_warnings()
+ sup.filter(RuntimeWarning)
+ with sup:
+ # can't infer range with nan
+ assert_raises(ValueError, histogram, one_nan, bins='auto')
+ assert_raises(ValueError, histogram, all_nan, bins='auto')
+
+ # explicit range solves the problem
+ h, b = histogram(one_nan, bins='auto', range=(0, 1))
+ assert_equal(h.sum(), 2) # nan is not counted
+ h, b = histogram(all_nan, bins='auto', range=(0, 1))
+ assert_equal(h.sum(), 0) # nan is not counted
+
+ # as does an explicit set of bins
+ h, b = histogram(one_nan, bins=[0, 1])
+ assert_equal(h.sum(), 2) # nan is not counted
+ h, b = histogram(all_nan, bins=[0, 1])
+ assert_equal(h.sum(), 0) # nan is not counted
+
class TestHistogramOptimBinNums(object):
"""