diff options
-rw-r--r-- | doc/release/1.11.0-notes.rst | 4 | ||||
-rw-r--r-- | numpy/random/mtrand/mtrand.pyx | 45 | ||||
-rw-r--r-- | numpy/random/tests/test_random.py | 51 |
3 files changed, 60 insertions, 40 deletions
diff --git a/doc/release/1.11.0-notes.rst b/doc/release/1.11.0-notes.rst index 770938b60..e541a4739 100644 --- a/doc/release/1.11.0-notes.rst +++ b/doc/release/1.11.0-notes.rst @@ -204,6 +204,10 @@ extended to ``@``, ``numpy.dot``, ``numpy.inner``, and ``numpy.matmul``. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This matches the behavior of ``assert_raises``. +Speed improvement for np.random.shuffle +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``np.random.shuffle`` is now much faster for 1d ndarrays. + Changes ======= Pyrex support was removed from ``numpy.distutils``. The method diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx index ff8171d45..f70f578cc 100644 --- a/numpy/random/mtrand/mtrand.pyx +++ b/numpy/random/mtrand/mtrand.pyx @@ -24,6 +24,8 @@ include "Python.pxi" include "numpy.pxd" +from libc cimport string + cdef extern from "math.h": double exp(double x) double log(double x) @@ -4979,33 +4981,42 @@ cdef class RandomState: [0, 1, 2]]) """ - cdef npy_intp i, j - - i = len(x) - 1 - - # Logic adapted from random.shuffle() - if isinstance(x, np.ndarray) and \ - (x.ndim > 1 or x.dtype.fields is not None): - # For a multi-dimensional ndarray, indexing returns a view onto - # each row. So we can't just use ordinary assignment to swap the - # rows; we need a bounce buffer. + cdef: + npy_intp i, j, n = len(x) + size_t stride, nbytes + char* x_ptr + char* buf_ptr + + if type(x) is np.ndarray and x.ndim == 1 and x.size: + # Fast, statically typed path: shuffle the underlying buffer. + # Only for non-empty, 1d objects of class ndarray (subclasses such + # as MaskedArrays may not support this approach). + x_ptr = <char*><size_t>x.ctypes.data + stride = x.strides[0] + nbytes = x[:1].nbytes + buf = np.empty_like(x[0]) # GC'd at function exit + buf_ptr = <char*><size_t>buf.ctypes.data + with self.lock: + for i in reversed(range(1, n)): + j = rk_interval(i, self.internal_state) + string.memcpy(buf_ptr, x_ptr + j * stride, nbytes) + string.memcpy(x_ptr + j * stride, x_ptr + i * stride, nbytes) + string.memcpy(x_ptr + i * stride, buf_ptr, nbytes) + elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size: + # Multidimensional ndarrays require a bounce buffer. buf = np.empty_like(x[0]) with self.lock: - while i > 0: + for i in reversed(range(1, n)): j = rk_interval(i, self.internal_state) buf[...] = x[j] x[j] = x[i] x[i] = buf - i = i - 1 else: - # For single-dimensional arrays, lists, and any other Python - # sequence types, indexing returns a real object that's - # independent of the array contents, so we can just swap directly. + # Untyped path. with self.lock: - while i > 0: + for i in reversed(range(1, n)): j = rk_interval(i, self.internal_state) x[i], x[j] = x[j], x[i] - i = i - 1 def permutation(self, object x): """ diff --git a/numpy/random/tests/test_random.py b/numpy/random/tests/test_random.py index 37c1876bf..e3391a9a2 100644 --- a/numpy/random/tests/test_random.py +++ b/numpy/random/tests/test_random.py @@ -10,6 +10,7 @@ import sys import warnings + class TestSeed(TestCase): def test_scalar(self): s = np.random.RandomState(0) @@ -40,6 +41,7 @@ class TestSeed(TestCase): assert_raises(ValueError, np.random.RandomState, [1, 2, 4294967296]) assert_raises(ValueError, np.random.RandomState, [1, -2, 4294967296]) + class TestBinomial(TestCase): def test_n_zero(self): # Tests the corner case of n == 0 for the binomial distribution. @@ -130,6 +132,7 @@ class TestSetState(TestCase): # arguments without truncation. self.prng.negative_binomial(0.5, 0.5) + class TestRandint(TestCase): rfunc = np.random.randint @@ -207,12 +210,13 @@ class TestRandint(TestCase): assert_(tgt[np.dtype(np.bool).name] == res) -class TestRandomDist(TestCase): +class TestRandomDist: # Make sure the random distribution returns the correct value for a # given seed - def setUp(self): - self.seed = 1234567890 + @classmethod + def setup_class(cls): + cls.seed = 1234567890 def test_rand(self): np.random.seed(self.seed) @@ -368,40 +372,41 @@ class TestRandomDist(TestCase): np.testing.assert_equal(actual, desired) def test_shuffle(self): - # Test lists, arrays, and multidimensional versions of both: - for conv in [lambda x: x, - np.asarray, + # Test lists, arrays (of various dtypes), and multidimensional versions + # of both, c-contiguous or not: + for conv in [lambda x: np.array([]), + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), lambda x: [(i, i) for i in x], - lambda x: np.asarray([(i, i) for i in x])]: + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-4270 + lambda x: np.asarray([(i, i) for i in x], + [("a", object, 1), + ("b", np.int32, 1)])]: np.random.seed(self.seed) alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) np.random.shuffle(alist) actual = alist desired = conv([0, 1, 9, 6, 2, 4, 5, 8, 7, 3]) - np.testing.assert_array_equal(actual, desired) - - def test_shuffle_flexible(self): - # gh-4270 - arr = [(0, 1), (2, 3)] - dt = np.dtype([('a', np.int32, 1), ('b', np.int32, 1)]) - nparr = np.array(arr, dtype=dt) - a, b = nparr[0].copy(), nparr[1].copy() - for i in range(50): - np.random.shuffle(nparr) - assert_(a in nparr) - assert_(b in nparr) + yield np.testing.assert_array_equal, actual, desired def test_shuffle_masked(self): # gh-3263 a = np.ma.masked_values(np.reshape(range(20), (5,4)) % 3 - 1, -1) b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) - ma = np.ma.count_masked(a) - mb = np.ma.count_masked(b) + a_orig = a.copy() + b_orig = b.copy() for i in range(50): np.random.shuffle(a) - self.assertEqual(ma, np.ma.count_masked(a)) + assert_equal( + sorted(a.data[~a.mask]), sorted(a_orig.data[~a_orig.mask])) np.random.shuffle(b) - self.assertEqual(mb, np.ma.count_masked(b)) + assert_equal( + sorted(b.data[~b.mask]), sorted(b_orig.data[~b_orig.mask])) def test_beta(self): np.random.seed(self.seed) |