diff options
author | Kevin Sheppard <kevin.k.sheppard@gmail.com> | 2018-11-05 23:32:22 +0000 |
---|---|---|
committer | mattip <matti.picus@gmail.com> | 2019-05-20 18:45:27 +0300 |
commit | 9dac6a5bb80cf7e9904154f24a486659837b9919 (patch) | |
tree | 1cbef967fce61f5d954ac4e201f522a3209bd110 | |
parent | c4ed60ee392431e8f09dd4fa7bf62ac79cd7ee52 (diff) | |
download | numpy-9dac6a5bb80cf7e9904154f24a486659837b9919.tar.gz |
MAINT: Sync with upstream changes
Sync upstream changes in numpy/numpy#11613, numpy/numpy#11771, and
numpy/numpy#12089
Update to NumPy 1.12 as the minimum version
Fix documentation
Add information about Lemire generator
Update change log
Fix docstring for randint
Refactor benchmark with more options
Clean code for PEP8 violations
Improve performance testing
-rw-r--r-- | _randomgen/.travis.yml | 10 | ||||
-rw-r--r-- | _randomgen/README.md | 11 | ||||
-rw-r--r-- | _randomgen/README.rst | 11 | ||||
-rw-r--r-- | _randomgen/benchmark.py | 173 | ||||
-rw-r--r-- | _randomgen/doc/source/change-log.rst | 12 | ||||
-rw-r--r-- | _randomgen/doc/source/conf.py | 2 | ||||
-rw-r--r-- | _randomgen/doc/source/index.rst | 5 | ||||
-rw-r--r-- | _randomgen/doc/source/new-or-different.rst | 9 | ||||
-rw-r--r-- | _randomgen/doc/source/performance.py | 23 | ||||
-rw-r--r-- | _randomgen/doc/source/references.rst | 5 | ||||
-rw-r--r-- | _randomgen/randomgen/common.pxd | 1 | ||||
-rw-r--r-- | _randomgen/randomgen/common.pyx | 5 | ||||
-rw-r--r-- | _randomgen/randomgen/generator.pyx | 18 | ||||
-rw-r--r-- | _randomgen/randomgen/tests/test_numpy_mt19937.py | 20 | ||||
-rw-r--r-- | _randomgen/randomgen/tests/test_numpy_mt19937_regressions.py | 22 |
15 files changed, 177 insertions, 150 deletions
diff --git a/_randomgen/.travis.yml b/_randomgen/.travis.yml index bc483fb97..10eba96d2 100644 --- a/_randomgen/.travis.yml +++ b/_randomgen/.travis.yml @@ -20,15 +20,15 @@ matrix: fast_finish: true include: - os: linux - env: [PYTHON=2.7, NUMPY=1.10, CYTHON=0.26] + env: [PYTHON=2.7, NUMPY=1.13, CYTHON=0.26] - os: linux - env: [PYTHON=3.5, NUMPY=1.11] + env: [PYTHON=3.5, NUMPY=1.13] - os: linux - env: [PYTHON=3.6, NUMPY=1.13, CYTHON=0.27] + env: [PYTHON=3.6, NUMPY=1.14, CYTHON=0.27] - os: linux - env: [PYTHON=3.6, NUMPY=1.13, CYTHON=0.26] + env: [PYTHON=3.6, NUMPY=1.15, CYTHON=0.28] - os: linux - env: [PYTHON=3.6, DOCBUILD=true] + env: [PYTHON=3.7, DOCBUILD=true] - os: osx language: generic env: [PYTHON=3.6] diff --git a/_randomgen/README.md b/_randomgen/README.md index de392c528..34986ebdb 100644 --- a/_randomgen/README.md +++ b/_randomgen/README.md @@ -68,6 +68,9 @@ which can fully reproduce the sequence produced by NumPy. * Normals (`standard_normal`) * Standard Gammas (via `standard_gamma`) +* Support for Lemire's method of generating uniform integers on an + arbitrary interval by setting `use_masked=True`. + ## Included Pseudo Random Number Generators This module includes a number of alternative random @@ -103,6 +106,10 @@ The RNGs include: * Core random number generators can fill existing arrays using the `out` keyword argument * Standardizes integer-values random values as int64 for all platforms. +* `randint` supports generating using rejection sampling on masked + values (the default) or Lemire's method. Lemire's method can be much + faster when the required interval length is much smaller than the + closes power of 2. ### New Functions @@ -150,8 +157,8 @@ need to be smoothed. ## Requirements Building requires: -* Python (2.7, 3.4, 3.5, 3.6) -* NumPy (1.11, 1.12, 1.13, 1.14, 1.15) +* Python (2.7, 3.5, 3.6, 3.7) +* NumPy (1.13, 1.14, 1.15) * Cython (0.26+) * tempita (0.5+), if not provided by Cython diff --git a/_randomgen/README.rst b/_randomgen/README.rst index 814b1272b..69de75ab3 100644 --- a/_randomgen/README.rst +++ b/_randomgen/README.rst @@ -71,6 +71,9 @@ Features - Normals (``standard_normal``) - Standard Gammas (via ``standard_gamma``) +- Support for Lemire’s method of generating uniform integers on an + arbitrary interval by setting ``use_masked=True``. + Included Pseudo Random Number Generators ---------------------------------------- @@ -111,6 +114,10 @@ New Features - Core random number generators can fill existing arrays using the ``out`` keyword argument - Standardizes integer-values random values as int64 for all platforms. +- ``randint`` supports generating using rejection sampling on masked + values (the default) or Lemire’s method. Lemire’s method can be much + faster when the required interval length is much smaller than the + closes power of 2. New Functions ~~~~~~~~~~~~~ @@ -166,8 +173,8 @@ Requirements Building requires: -- Python (2.7, 3.4, 3.5, 3.6) -- NumPy (1.11, 1.12, 1.13, 1.14, 1.15) +- Python (2.7, 3.5, 3.6, 3.7) +- NumPy (1.13, 1.14, 1.15) - Cython (0.26+) - tempita (0.5+), if not provided by Cython diff --git a/_randomgen/benchmark.py b/_randomgen/benchmark.py index 860134de1..c4c4ab93d 100644 --- a/_randomgen/benchmark.py +++ b/_randomgen/benchmark.py @@ -84,122 +84,67 @@ def timer_uniform(): run_timer(dist, command, None, SETUP, 'Uniforms') -def timer_8bit_bounded(max=95, use_masked=True): - min = 0 +def timer_bounded(bits=8, max=95, use_masked=True): + """ + Timer for 8-bit bounded values. + + Parameters + ---------- + bits : {8, 16, 32, 64} + Bit width of unsigned output type + max : int + Upper bound for range. Lower is always 0. Must be <= 2**bits. + use_masked: bool + If True, masking and rejection sampling is used to generate a random + number in an interval. If False, Lemire's algorithm is used if + available to generate a random number in an interval. + + Notes + ----- + Lemire's algorithm has improved performance when {max}+1 is not a + power of two. + """ + if bits not in (8, 16, 32, 64): + raise ValueError('bits must be one of 8, 16, 32, 64.') + minimum = 0 dist = 'random_uintegers' - # Note on performance of generating random numbers in an interval: - # use_masked=True : masking and rejection sampling is used to generate a random number in an interval. - # use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval. - # Lemire's algorithm has improved performance when {max}+1 is not a power of two. + if use_masked: # Use masking & rejection. + command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=True)' + else: # Use Lemire's algo. + command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=False)' - if use_masked: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=True)' # Use masking & rejection. - else: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8, use_masked=False)' # Use Lemire's algo. + command = command.format(min=minimum, max=max, bits=bits) - command = command.format(min=min, max=max) - - command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint8)' - command_numpy = command_numpy.format(min=min, max=max) + command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits})' + command_numpy = command_numpy.format(min=minimum, max=max, bits=bits) run_timer(dist, command, command_numpy, SETUP, - '8-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked)) - - -def timer_16bit_bounded(max=1535, use_masked=True): - min = 0 - - dist = 'random_uintegers' - - # Note on performance of generating random numbers in an interval: - # use_masked=True : masking and rejection sampling is used to generate a random number in an interval. - # use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval. - # Lemire's algorithm has improved performance when {max}+1 is not a power of two. - - if use_masked: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=True)' # Use masking & rejection. - else: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16, use_masked=False)' # Use Lemire's algo. - - command = command.format(min=min, max=max) - - command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint16)' - command_numpy = command_numpy.format(min=min, max=max) - - run_timer(dist, command, command_numpy, SETUP, - '16-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked)) + '{bits}-bit bounded unsigned integers (max={max}, ' + 'use_masked={use_masked})'.format(max=max, use_masked=use_masked, bits=bits)) def timer_32bit(): info = np.iinfo(np.uint32) - min, max = info.min, info.max + minimum, maximum = info.min, info.max dist = 'random_uintegers' command = 'rg.random_uintegers(1000000, 32)' command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32)' - command_numpy = command_numpy.format(min=min, max=max) + command_numpy = command_numpy.format(min=minimum, max=maximum) run_timer(dist, command, command_numpy, SETUP, '32-bit unsigned integers') -def timer_32bit_bounded(max=1535, use_masked=True): - min = 0 - - dist = 'random_uintegers' - - # Note on performance of generating random numbers in an interval: - # use_masked=True : masking and rejection sampling is used to generate a random number in an interval. - # use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval. - # Lemire's algorithm has improved performance when {max}+1 is not a power of two. - - if use_masked: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=True)' # Use masking & rejection. - else: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32, use_masked=False)' # Use Lemire's algo. - - command = command.format(min=min, max=max) - - command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint32)' - command_numpy = command_numpy.format(min=min, max=max) - - run_timer(dist, command, command_numpy, SETUP, - '32-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked)) - - def timer_64bit(): info = np.iinfo(np.uint64) - min, max = info.min, info.max + minimum, maximum = info.min, info.max dist = 'random_uintegers' command = 'rg.random_uintegers(1000000)' command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64)' - command_numpy = command_numpy.format(min=min, max=max) + command_numpy = command_numpy.format(min=minimum, max=maximum) run_timer(dist, command, command_numpy, SETUP, '64-bit unsigned integers') -def timer_64bit_bounded(max=1535, use_masked=True): - min = 0 - - dist = 'random_uintegers' - - # Note on performance of generating random numbers in an interval: - # use_masked=True : masking and rejection sampling is used to generate a random number in an interval. - # use_masked=False : Lemire's algorithm is used if available to generate a random number in an interval. - # Lemire's algorithm has improved performance when {max}+1 is not a power of two. - - if use_masked: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=True)' # Use masking & rejection. - else: - command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64, use_masked=False)' # Use Lemire's algo. - - command = command.format(min=min, max=max) - - command_numpy = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint64)' - command_numpy = command_numpy.format(min=min, max=max) - - run_timer(dist, command, command_numpy, SETUP, - '64-bit bounded unsigned integers (max={max}, use_masked={use_masked})'.format(max=max, use_masked=use_masked)) - - def timer_normal_zig(): dist = 'standard_normal' command = 'rg.standard_normal(1000000)' @@ -210,35 +155,47 @@ def timer_normal_zig(): if __name__ == '__main__': import argparse + parser = argparse.ArgumentParser() - parser.add_argument('--full', dest='full', action='store_true') + parser.add_argument('-f', '--full', + help='Run benchmarks for a wide range of distributions.' + ' If not provided, only tests the production of ' + 'uniform values.', + dest='full', action='store_true') + parser.add_argument('-bi', '--bounded-ints', + help='Included benchmark coverage of the bounded ' + 'integer generators in a full run.', + dest='bounded_ints', action='store_true') args = parser.parse_args() timer_uniform() if args.full: timer_raw() - timer_8bit_bounded(use_masked=True) - timer_8bit_bounded(max=64, use_masked=False) # Worst case for Numpy. - timer_8bit_bounded(max=95, use_masked=False) # Typ. avrg. case for Numpy. - timer_8bit_bounded(max=127, use_masked=False) # Best case for Numpy. + if args.bounded_ints: + timer_bounded(use_masked=True) + timer_bounded(max=64, use_masked=False) # Worst case for Numpy. + timer_bounded(max=95, use_masked=False) # Typ. avrg. case for Numpy. + timer_bounded(max=127, use_masked=False) # Best case for Numpy. - timer_16bit_bounded(use_masked=True) - timer_16bit_bounded(max=1024, use_masked=False) # Worst case for Numpy. - timer_16bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy. - timer_16bit_bounded(max=2047, use_masked=False) # Best case for Numpy. + timer_bounded(16, use_masked=True) + timer_bounded(16, max=1024, use_masked=False) # Worst case for Numpy. + timer_bounded(16, max=1535, use_masked=False) # Typ. avrg. case for Numpy. + timer_bounded(16, max=2047, use_masked=False) # Best case for Numpy. timer_32bit() - timer_32bit_bounded(use_masked=True) - timer_32bit_bounded(max=1024, use_masked=False) # Worst case for Numpy. - timer_32bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy. - timer_32bit_bounded(max=2047, use_masked=False) # Best case for Numpy. + if args.bounded_ints: + timer_bounded(32, use_masked=True) + timer_bounded(32, max=1024, use_masked=False) # Worst case for Numpy. + timer_bounded(32, max=1535, use_masked=False) # Typ. avrg. case for Numpy. + timer_bounded(32, max=2047, use_masked=False) # Best case for Numpy. timer_64bit() - timer_64bit_bounded(use_masked=True) - timer_64bit_bounded(max=1024, use_masked=False) # Worst case for Numpy. - timer_64bit_bounded(max=1535, use_masked=False) # Typ. avrg. case for Numpy. - timer_64bit_bounded(max=2047, use_masked=False) # Best case for Numpy. + if args.bounded_ints: + timer_bounded(64, use_masked=True) + timer_bounded(64, max=1024, use_masked=False) # Worst case for Numpy. + timer_bounded(64, max=1535, use_masked=False) # Typ. avrg. case for Numpy. + timer_bounded(64, max=2047, use_masked=False) # Best case for Numpy. timer_normal_zig() diff --git a/_randomgen/doc/source/change-log.rst b/_randomgen/doc/source/change-log.rst index 29876a4b9..629dd0002 100644 --- a/_randomgen/doc/source/change-log.rst +++ b/_randomgen/doc/source/change-log.rst @@ -1,12 +1,12 @@ Change Log ---------- -After v1.15 -=========== -- Added Xoshiro256** and Xoshiro512**, the preferred generators of this class -- Fixed bug in `jump` method of Random123 generators which did nto specify a default value - - +v1.15.1 +======= +- Added Xoshiro256** and Xoshiro512**, the preferred generators of this class. +- Fixed bug in `jump` method of Random123 generators which did nto specify a default value. +- Added support for generating bounded uniform integers using Lemire's method. +- Synchronized with upstream changes, which requires moving the minimum supported NumPy to 1.13. v1.15 ===== diff --git a/_randomgen/doc/source/conf.py b/_randomgen/doc/source/conf.py index c89900b55..4b38c42d5 100644 --- a/_randomgen/doc/source/conf.py +++ b/_randomgen/doc/source/conf.py @@ -15,9 +15,7 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) -from distutils.version import LooseVersion import guzzle_sphinx_theme -# import sphinx_rtd_theme import randomgen # -- Project information ----------------------------------------------------- diff --git a/_randomgen/doc/source/index.rst b/_randomgen/doc/source/index.rst index 7db6cbd5b..eac8c1ef1 100644 --- a/_randomgen/doc/source/index.rst +++ b/_randomgen/doc/source/index.rst @@ -119,6 +119,10 @@ What's New or Different these basic RNGs to be used in numba. * The basic random number generators can be used in downstream projects via Cython. +* Support for Lemire’s method [Lemire]_ of generating uniform integers on an + arbitrary interval by setting ``use_masked=True`` in + (:meth:`~randomgen.generator.RandomGenerator.randint`). + See :ref:`new-or-different` for a complete list of improvements and differences. @@ -205,6 +209,7 @@ New Features Comparing Performance <performance> extending Reading System Entropy <entropy> + references Changes ~~~~~~~ diff --git a/_randomgen/doc/source/new-or-different.rst b/_randomgen/doc/source/new-or-different.rst index c94d95c7c..6598c13fe 100644 --- a/_randomgen/doc/source/new-or-different.rst +++ b/_randomgen/doc/source/new-or-different.rst @@ -87,3 +87,12 @@ What's New or Different print(existing) .. * For changes since the previous release, see the :ref:`change-log` + +* Support for Lemire’s method of generating uniform integers on an + arbitrary interval by setting ``use_masked=True`` in + (:meth:`~randomgen.generator.RandomGenerator.randint`). + +.. ipython:: python + + %timeit rg.randint(0, 1535, use_masked=False) + %timeit numpy.random.randint(0, 1535) diff --git a/_randomgen/doc/source/performance.py b/_randomgen/doc/source/performance.py index d84c3147e..12cbbc5d3 100644 --- a/_randomgen/doc/source/performance.py +++ b/_randomgen/doc/source/performance.py @@ -1,11 +1,14 @@ +from collections import OrderedDict +from timeit import repeat + import numpy as np -from timeit import timeit, repeat import pandas as pd from randomgen import MT19937, DSFMT, ThreeFry, PCG64, Xoroshiro128, \ - Xorshift1024, Philox + Xorshift1024, Philox, Xoshiro256StarStar, Xoshiro512StarStar -PRNGS = [DSFMT, MT19937, Philox, PCG64, ThreeFry, Xoroshiro128, Xorshift1024] +PRNGS = [DSFMT, MT19937, Philox, PCG64, ThreeFry, Xoroshiro128, Xorshift1024, + Xoshiro256StarStar, Xoshiro512StarStar] funcs = {'32-bit Unsigned Ints': 'random_uintegers(size=1000000,bits=32)', '64-bit Unsigned Ints': 'random_uintegers(size=1000000,bits=32)', @@ -24,20 +27,19 @@ rg = {prng}().generator """ test = "rg.{func}" -table = {} +table = OrderedDict() for prng in PRNGS: print(prng) - col = {} + col = OrderedDict() for key in funcs: t = repeat(test.format(func=funcs[key]), setup.format(prng=prng().__class__.__name__), number=1, repeat=3) - col[key]= 1000 * min(t) + col[key] = 1000 * min(t) col = pd.Series(col) table[prng().__class__.__name__] = col - -npfuncs = {} +npfuncs = OrderedDict() npfuncs.update(funcs) npfuncs['32-bit Unsigned Ints'] = 'randint(2**32,dtype="uint32",size=1000000)' npfuncs['64-bit Unsigned Ints'] = 'tomaxint(size=1000000)' @@ -54,7 +56,6 @@ for key in npfuncs: col[key] = 1000 * min(t) table['NumPy'] = pd.Series(col) - table = pd.DataFrame(table) table = table.reindex(table.mean(1).sort_values().index) order = np.log(table).mean().sort_values().index @@ -63,11 +64,11 @@ table = table.reindex(order) table = table.T print(table.to_csv(float_format='%0.1f')) -rel = table / (table.iloc[:,[0]].values @ np.ones((1,8))) +rel = table.loc[:, ['NumPy']].values @ np.ones((1, table.shape[1])) / table rel.pop(rel.columns[0]) rel = rel.T rel['Overall'] = np.exp(np.log(rel).mean(1)) rel *= 100 rel = np.round(rel) rel = rel.T -print(rel.to_csv(float_format='%0d'))
\ No newline at end of file +print(rel.to_csv(float_format='%0d')) diff --git a/_randomgen/doc/source/references.rst b/_randomgen/doc/source/references.rst new file mode 100644 index 000000000..0dc99868f --- /dev/null +++ b/_randomgen/doc/source/references.rst @@ -0,0 +1,5 @@ +References +---------- + +.. [Lemire] Daniel Lemire., "Fast Random Integer Generation in an Interval", + CoRR, Aug. 13, 2018, http://arxiv.org/abs/1805.10941. diff --git a/_randomgen/randomgen/common.pxd b/_randomgen/randomgen/common.pxd index f6017c0d7..62163ad62 100644 --- a/_randomgen/randomgen/common.pxd +++ b/_randomgen/randomgen/common.pxd @@ -18,6 +18,7 @@ cdef enum ConstraintType: CONS_POSITIVE CONS_BOUNDED_0_1 CONS_BOUNDED_0_1_NOTNAN + CONS_BOUNDED_GT_0_1 CONS_GT_1 CONS_GTE_1 CONS_POISSON diff --git a/_randomgen/randomgen/common.pyx b/_randomgen/randomgen/common.pyx index b152099eb..eb872d02f 100644 --- a/_randomgen/randomgen/common.pyx +++ b/_randomgen/randomgen/common.pyx @@ -155,10 +155,13 @@ cdef int check_array_constraint(np.ndarray val, object name, constraint_type con raise ValueError(name + " <= 0") elif cons == CONS_BOUNDED_0_1 or cons == CONS_BOUNDED_0_1_NOTNAN: if np.any(np.less(val, 0)) or np.any(np.greater(val, 1)): - raise ValueError(name + " <= 0 or " + name + " >= 1") + raise ValueError(name + " < 0 or " + name + " > 1") if cons == CONS_BOUNDED_0_1_NOTNAN: if np.any(np.isnan(val)): raise ValueError(name + ' contains NaNs') + elif cons == CONS_BOUNDED_GT_0_1: + if np.any(np.less_equal(val, 0)) or np.any(np.greater(val, 1)): + raise ValueError(name + " <= 0 or " + name + " > 1") elif cons == CONS_GT_1: if np.any(np.less_equal(val, 1)): raise ValueError(name + " <= 1") diff --git a/_randomgen/randomgen/generator.pyx b/_randomgen/randomgen/generator.pyx index ca6c6af24..5d239c725 100644 --- a/_randomgen/randomgen/generator.pyx +++ b/_randomgen/randomgen/generator.pyx @@ -574,7 +574,7 @@ cdef class RandomGenerator: def randint(self, low, high=None, size=None, dtype=int, use_masked=True): """ - randint(low, high=None, size=None, dtype='l') + randint(low, high=None, size=None, dtype='l', use_masked=True) Return random integers from `low` (inclusive) to `high` (exclusive). @@ -652,6 +652,11 @@ cdef class RandomGenerator: >>> randomgen.randint([1, 3, 5, 7], [[10], [20]], dtype=np.uint8) array([[ 8, 6, 9, 7], [ 1, 16, 9, 12]], dtype=uint8) + + References + ---------- + .. [1] Daniel Lemire., "Fast Random Integer Generation in an Interval", + CoRR, Aug. 13, 2018, http://arxiv.org/abs/1805.10941. """ if high is None: high = low @@ -3568,7 +3573,7 @@ cdef class RandomGenerator: """ return disc(&random_geometric, self._brng, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, + p, 'p', CONS_BOUNDED_GT_0_1, 0.0, '', CONS_NONE, 0.0, '', CONS_NONE) @@ -4241,9 +4246,8 @@ cdef class RandomGenerator: self._shuffle_raw(n, sizeof(np.npy_intp), stride, x_ptr, buf_ptr) else: self._shuffle_raw(n, itemsize, stride, x_ptr, buf_ptr) - elif isinstance(x, np.ndarray) and x.ndim > 1 and x.size: - # Multidimensional ndarrays require a bounce buffer. - buf = np.empty_like(x[0]) + elif isinstance(x, np.ndarray) and x.ndim and x.size: + buf = np.empty_like(x[0,...]) with self.lock: for i in reversed(range(1, n)): j = random_interval(self._brng, i) @@ -4312,8 +4316,8 @@ cdef class RandomGenerator: # shuffle has fast-path for 1-d if arr.ndim == 1: - # must return a copy - if arr is x: + # Return a copy if same memory + if np.may_share_memory(arr, x): arr = np.array(arr) self.shuffle(arr) return arr diff --git a/_randomgen/randomgen/tests/test_numpy_mt19937.py b/_randomgen/randomgen/tests/test_numpy_mt19937.py index 747e10b39..888baf92f 100644 --- a/_randomgen/randomgen/tests/test_numpy_mt19937.py +++ b/_randomgen/randomgen/tests/test_numpy_mt19937.py @@ -437,27 +437,31 @@ class TestRandomDist(object): assert_array_equal(actual, desired) def test_randint_masked(self): - """ Test masked rejection sampling algorithm to generate array of uint32 in an interval. """ + # Test masked rejection sampling algorithm to generate array of + # uint32 in an interval. mt19937.seed(self.seed) - actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, use_masked=True) + actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, + use_masked=True) desired = np.array([[2, 47], [12, 51], [33, 43]], dtype=np.uint32) assert_array_equal(actual, desired) def test_randint_lemire_32(self): - """ Test lemire algorithm to generate array of uint32 in an interval. """ + # Test lemire algorithm to generate array of uint32 in an interval. mt19937.seed(self.seed) - actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, use_masked=False) + actual = mt19937.randint(0, 99, size=(3, 2), dtype=np.uint32, + use_masked=False) desired = np.array([[61, 33], [58, 14], [87, 23]], dtype=np.uint32) assert_array_equal(actual, desired) def test_randint_lemire_64(self): - """ Test lemire algorithm to generate array of uint64 in an interval. """ + # Test lemire algorithm to generate array of uint64 in an interval. mt19937.seed(self.seed) - actual = mt19937.randint(0, 99 + 0xFFFFFFFFF, size=(3, 2), dtype=np.uint64, use_masked=False) + actual = mt19937.randint(0, 99 + 0xFFFFFFFFF, size=(3, 2), + dtype=np.uint64, use_masked=False) desired = np.array([[42523252834, 40656066204], [61069871386, 61274051182], [31443797706, 53476677934]], dtype=np.uint64) @@ -621,6 +625,10 @@ class TestRandomDist(object): lambda x: [(i, i) for i in x], lambda x: np.asarray([[i, i] for i in x]), lambda x: np.vstack([x, x]).T, + # gh-11442 + lambda x: (np.asarray([(i, i) for i in x], + [("a", int), ("b", int)]) + .view(np.recarray)), # gh-4270 lambda x: np.asarray([(i, i) for i in x], [("a", object, 1), diff --git a/_randomgen/randomgen/tests/test_numpy_mt19937_regressions.py b/_randomgen/randomgen/tests/test_numpy_mt19937_regressions.py index 1f082925e..4e51327aa 100644 --- a/_randomgen/randomgen/tests/test_numpy_mt19937_regressions.py +++ b/_randomgen/randomgen/tests/test_numpy_mt19937_regressions.py @@ -136,3 +136,25 @@ class TestRegression(object): # Force Garbage Collection - should not segfault. import gc gc.collect() + + def test_permutation_subclass(self): + class N(np.ndarray): + pass + + mt19937.seed(1) + orig = np.arange(3).view(N) + perm = mt19937.permutation(orig) + assert_array_equal(perm, np.array([0, 2, 1])) + assert_array_equal(orig, np.arange(3).view(N)) + + class M(object): + a = np.arange(5) + + def __array__(self): + return self.a + + mt19937.seed(1) + m = M() + perm = mt19937.permutation(m) + assert_array_equal(perm, np.array([2, 1, 4, 0, 3])) + assert_array_equal(m.__array__(), np.arange(5)) |