diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2021-01-24 12:55:34 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-24 12:55:34 -0700 |
commit | 04b58d3ffbd2c8d30c36ae6ed6366f1069136c43 (patch) | |
tree | ecaa80c74a93aa19686cff79e98036f188d7dff4 | |
parent | e5875457faf3202c3530f33387341529a2715a7d (diff) | |
parent | 5cbfefb4c69b130f8b01408615e9f7b42b707beb (diff) | |
download | numpy-04b58d3ffbd2c8d30c36ae6ed6366f1069136c43.tar.gz |
Merge pull request #18211 from rgommers/fix-shuffle-object
MAINT: random shuffle: warn on unrecognized objects, fix empty array bug
-rw-r--r-- | numpy/random/_generator.pyx | 22 | ||||
-rw-r--r-- | numpy/random/mtrand.pyx | 20 | ||||
-rw-r--r-- | numpy/random/tests/test_generator_mt19937.py | 15 | ||||
-rw-r--r-- | numpy/random/tests/test_randomstate.py | 6 |
4 files changed, 54 insertions, 9 deletions
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 7d652ce89..3033a1495 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -2,6 +2,7 @@ #cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3 import operator import warnings +from collections.abc import MutableSequence from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer from cpython cimport (Py_INCREF, PyFloat_AsDouble) @@ -4347,14 +4348,14 @@ cdef class Generator: """ shuffle(x, axis=0) - Modify a sequence in-place by shuffling its contents. + Modify an array or sequence in-place by shuffling its contents. The order of sub-arrays is changed but their contents remains the same. Parameters ---------- - x : array_like - The array or list to be shuffled. + x : ndarray or MutableSequence + The array, list or mutable sequence to be shuffled. axis : int, optional The axis which `x` is shuffled along. Default is 0. It is only supported on `ndarray` objects. @@ -4414,7 +4415,11 @@ cdef class Generator: with self.lock, nogil: _shuffle_raw_wrap(&self._bitgen, n, 1, itemsize, stride, x_ptr, buf_ptr) - elif isinstance(x, np.ndarray) and x.ndim and x.size: + elif isinstance(x, np.ndarray): + if x.size == 0: + # shuffling is a no-op + return + x = np.swapaxes(x, 0, axis) buf = np.empty_like(x[0, ...]) with self.lock: @@ -4428,6 +4433,15 @@ cdef class Generator: x[i] = buf else: # Untyped path. + if not isinstance(x, MutableSequence): + # See gh-18206. We may decide to deprecate here in the future. + warnings.warn( + "`x` isn't a recognized object; `shuffle` is not guaranteed " + "to behave correctly. E.g., non-numpy array/tensor objects " + "with view semantics may contain duplicates after shuffling.", + UserWarning, stacklevel=2 + ) + if axis != 0: raise NotImplementedError("Axis argument is only supported " "on ndarray objects") diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index d43e7f5aa..814630c03 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -2,6 +2,7 @@ #cython: wraparound=False, nonecheck=False, boundscheck=False, cdivision=True, language_level=3 import operator import warnings +from collections.abc import MutableSequence import numpy as np @@ -4402,8 +4403,8 @@ cdef class RandomState: Parameters ---------- - x : array_like - The array or list to be shuffled. + x : ndarray or MutableSequence + The array, list or mutable sequence to be shuffled. Returns ------- @@ -4456,7 +4457,11 @@ cdef class RandomState: self._shuffle_raw(n, sizeof(np.npy_intp), stride, x_ptr, buf_ptr) else: self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr) - elif isinstance(x, np.ndarray) and x.ndim and x.size: + elif isinstance(x, np.ndarray): + if x.size == 0: + # shuffling is a no-op + return + buf = np.empty_like(x[0, ...]) with self.lock: for i in reversed(range(1, n)): @@ -4468,6 +4473,15 @@ cdef class RandomState: x[i] = buf else: # Untyped path. + if not isinstance(x, MutableSequence): + # See gh-18206. We may decide to deprecate here in the future. + warnings.warn( + "`x` isn't a recognized object; `shuffle` is not guaranteed " + "to behave correctly. E.g., non-numpy array/tensor objects " + "with view semantics may contain duplicates after shuffling.", + UserWarning, stacklevel=2 + ) + with self.lock: for i in reversed(range(1, n)): j = random_interval(&self._bitgen, i) diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index c4fb5883c..47c81584c 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -960,6 +960,14 @@ class TestRandomDist: random.shuffle(actual, axis=-1) assert_array_equal(actual, desired) + def test_shuffle_custom_axis_empty(self): + random = Generator(MT19937(self.seed)) + desired = np.array([]).reshape((0, 6)) + for axis in (0, 1): + actual = np.array([]).reshape((0, 6)) + random.shuffle(actual, axis=axis) + assert_array_equal(actual, desired) + def test_shuffle_axis_nonsquare(self): y1 = np.arange(20).reshape(2, 10) y2 = y1.copy() @@ -993,6 +1001,11 @@ class TestRandomDist: arr = [[1, 2, 3], [4, 5, 6]] assert_raises(NotImplementedError, random.shuffle, arr, 1) + arr = np.array(3) + assert_raises(TypeError, random.shuffle, arr) + arr = np.ones((3, 2)) + assert_raises(np.AxisError, random.shuffle, arr, 2) + def test_permutation(self): random = Generator(MT19937(self.seed)) alist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] @@ -1004,7 +1017,7 @@ class TestRandomDist: arr_2d = np.atleast_2d([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]).T actual = random.permutation(arr_2d) assert_array_equal(actual, np.atleast_2d(desired).T) - + bad_x_str = "abcd" assert_raises(np.AxisError, random.permutation, bad_x_str) diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index b70a04347..7f5f08050 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -642,7 +642,7 @@ class TestRandomDist: a = np.array([42, 1, 2]) p = [None, None, None] assert_raises(ValueError, random.choice, a, p=p) - + def test_choice_p_non_contiguous(self): p = np.ones(10) / 5 p[1::2] = 3.0 @@ -699,6 +699,10 @@ class TestRandomDist: assert_equal( sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) + def test_shuffle_invalid_objects(self): + x = np.array(3) + assert_raises(TypeError, random.shuffle, x) + def test_permutation(self): random.seed(self.seed) alist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0] |