diff options
author | Kevin Sheppard <kevin.k.sheppard@gmail.com> | 2019-02-04 15:41:04 +0000 |
---|---|---|
committer | mattip <matti.picus@gmail.com> | 2019-05-20 18:45:27 +0300 |
commit | 578889bf65adedbbee9af8125dab35a965d4b6fa (patch) | |
tree | 1c35536e6bac4afe2ff8e3880cb5ec41205ee1de | |
parent | 9dac6a5bb80cf7e9904154f24a486659837b9919 (diff) | |
download | numpy-578889bf65adedbbee9af8125dab35a965d4b6fa.tar.gz |
BUG: Ensure buffer_loc is reset in DSFMT
Ensure buffer location is reset after reseed or jump to ensure
that values are produced from the new state and not reused from
the old.
Ensure that the fallback path uses the correct number of bytes in Xor*
Small doc fixes.
Update dirichlet documentation
Update beta docstring
Fix weibull for a=0
-rw-r--r-- | _randomgen/doc/source/change-log.rst | 11 | ||||
-rw-r--r-- | _randomgen/randomgen/common.pxd | 2 | ||||
-rw-r--r-- | _randomgen/randomgen/distributions.pxd | 2 | ||||
-rw-r--r-- | _randomgen/randomgen/dsfmt.pyx | 18 | ||||
-rw-r--r-- | _randomgen/randomgen/generator.pyx | 38 | ||||
-rw-r--r-- | _randomgen/randomgen/legacy/legacy_distributions.pxd | 2 | ||||
-rw-r--r-- | _randomgen/randomgen/mt19937.pyx | 4 | ||||
-rw-r--r-- | _randomgen/randomgen/src/distributions/distributions.c | 3 | ||||
-rw-r--r-- | _randomgen/randomgen/tests/test_direct.py | 6 | ||||
-rw-r--r-- | _randomgen/randomgen/tests/test_numpy_mt19937.py | 3 | ||||
-rw-r--r-- | _randomgen/randomgen/xorshift1024.pyx | 2 | ||||
-rw-r--r-- | _randomgen/randomgen/xoshiro512starstar.pyx | 4 |
12 files changed, 73 insertions, 22 deletions
diff --git a/_randomgen/doc/source/change-log.rst b/_randomgen/doc/source/change-log.rst index 629dd0002..eb884c7f3 100644 --- a/_randomgen/doc/source/change-log.rst +++ b/_randomgen/doc/source/change-log.rst @@ -1,5 +1,16 @@ Change Log ---------- +v1.15.2 +======= +- Fixed a bug that affected :class:`~randomgen.dsfmt.DSFMT` when calling + :func:`~randomgen.dsfmt.DSFMT.jump` or :func:`~randomgen.dsfmt.DSFMT.seed` + that failed to reset the buffer. This resulted in upto 381 values from the + previous state being used before the buffer was refilled at the new state. +- Fixed bugs in :class:`~randomgen.xoshiro512starstar.Xoshiro512StarStar` + and :class:`~randomgen.xorshift1024.Xorshift1024` where the fallback + entropy initialization used too few bytes. This bug is unlikely to be + encountered since this path is only encountered if the system random + number generator fails. v1.15.1 ======= diff --git a/_randomgen/randomgen/common.pxd b/_randomgen/randomgen/common.pxd index 62163ad62..63a1b3f7d 100644 --- a/_randomgen/randomgen/common.pxd +++ b/_randomgen/randomgen/common.pxd @@ -1,3 +1,5 @@ +#cython: language_level=3 + from __future__ import absolute_import from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, diff --git a/_randomgen/randomgen/distributions.pxd b/_randomgen/randomgen/distributions.pxd index a50d5d821..35d92db51 100644 --- a/_randomgen/randomgen/distributions.pxd +++ b/_randomgen/randomgen/distributions.pxd @@ -1,3 +1,5 @@ +#cython: language_level=3 + from libc.stdint cimport (uint8_t, uint16_t, uint32_t, uint64_t, int8_t, int16_t, int32_t, int64_t, intptr_t) import numpy as np diff --git a/_randomgen/randomgen/dsfmt.pyx b/_randomgen/randomgen/dsfmt.pyx index c83ade5cd..ee8ef270d 100644 --- a/_randomgen/randomgen/dsfmt.pyx +++ b/_randomgen/randomgen/dsfmt.pyx @@ -79,8 +79,8 @@ cdef class DSFMT: Can be an integer in [0, 2**32-1], array of integers in [0, 2**32-1] or ``None`` (the default). If `seed` is ``None``, then ``DSFMT`` will try to read entropy from ``/dev/urandom`` - (or the Windows analog) if available to produce a 64-bit - seed. If unavailable, a 64-bit hash of the time and process + (or the Windows analog) if available to produce a 32-bit + seed. If unavailable, a 32-bit hash of the time and process ID is used. Notes @@ -114,7 +114,7 @@ cdef class DSFMT: The ``DSFMT`` state vector consists of a 384 element array of 64-bit unsigned integers plus a single integer value between 0 and 382 indicating the current position within the main array. The implementation - used here augments this with a 384 element array of doubles which are used + used here augments this with a 382 element array of doubles which are used to efficiently access the random numbers produced by the dSFMT generator. ``DSFMT`` is seeded using either a single 32-bit unsigned integer @@ -182,6 +182,10 @@ cdef class DSFMT: free(self.rng_state) free(self._brng) + cdef _reset_state_variables(self): + self.rng_state.buffer_loc = DSFMT_N64 + + def _benchmark(self, Py_ssize_t cnt, method=u'uint64'): cdef Py_ssize_t i if method==u'uint64': @@ -206,8 +210,8 @@ cdef class DSFMT: Can be an integer in [0, 2**32-1], array of integers in [0, 2**32-1] or ``None`` (the default). If `seed` is ``None``, then ``DSFMT`` will try to read entropy from ``/dev/urandom`` - (or the Windows analog) if available to produce a 64-bit - seed. If unavailable, a 64-bit hash of the time and process + (or the Windows analog) if available to produce a 32-bit + seed. If unavailable, a 32-bit hash of the time and process ID is used. Raises @@ -238,6 +242,8 @@ cdef class DSFMT: dsfmt_init_by_array(self.rng_state.state, <uint32_t *>obj.data, np.PyArray_DIM(obj, 0)) + # Clear the buffer + self._reset_state_variables() def jump(self, np.npy_intp iter=1): """ @@ -258,6 +264,8 @@ cdef class DSFMT: cdef np.npy_intp i for i in range(iter): dsfmt_jump(self.rng_state) + # Clear the buffer + self._reset_state_variables() return self @property diff --git a/_randomgen/randomgen/generator.pyx b/_randomgen/randomgen/generator.pyx index 5d239c725..01fa7f1be 100644 --- a/_randomgen/randomgen/generator.pyx +++ b/_randomgen/randomgen/generator.pyx @@ -385,9 +385,9 @@ cdef class RandomGenerator: Parameters ---------- a : float or array_like of floats - Alpha, non-negative. + Alpha, positive (>0). b : float or array_like of floats - Beta, non-negative. + Beta, positive (>0). size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -2355,7 +2355,7 @@ cdef class RandomGenerator: Parameters ---------- a : float or array_like of floats - Shape of the distribution. Should be greater than zero. + Shape parameter of the distribution. Must be nonnegative. size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. If size is ``None`` (default), @@ -4068,8 +4068,9 @@ cdef class RandomGenerator: Draw `size` samples of dimension k from a Dirichlet distribution. A Dirichlet-distributed random variable can be seen as a multivariate - generalization of a Beta distribution. Dirichlet pdf is the conjugate - prior of a multinomial in Bayesian inference. + generalization of a Beta distribution. The Dirichlet distribution + is a conjugate prior of a multinomial distribution in Bayesian + inference. Parameters ---------- @@ -4086,15 +4087,30 @@ cdef class RandomGenerator: samples : ndarray, The drawn samples, of shape (size, alpha.ndim). + Raises + ------- + ValueError + If any value in alpha is less than or equal to zero + Notes ----- - .. math:: X \\approx \\prod_{i=1}^{k}{x^{\\alpha_i-1}_i} + The Dirichlet distribution is a distribution over vectors + :math:`x` that fulfil the conditions :math:`x_i>0` and + :math:`\\sum_{i=1}^k x_i = 1`. + + The probability density function :math:`p` of a + Dirichlet-distributed random vector :math:`X` is + proportional to + + .. math:: p(x) \\propto \\prod_{i=1}^{k}{x^{\\alpha_i-1}_i}, + + where :math:`\\alpha` is a vector containing the positive + concentration parameters. - Uses the following property for computation: for each dimension, - draw a random sample y_i from a standard gamma generator of shape - `alpha_i`, then - :math:`X = \\frac{1}{\\sum_{i=1}^k{y_i}} (y_1, \\ldots, y_n)` is - Dirichlet distributed. + The method uses the following property for computation: let :math:`Y` + be a random vector which has components that follow a standard gamma + distribution, then :math:`X = \\frac{1}{\\sum_{i=1}^k{Y_i}} Y` + is Dirichlet-distributed References ---------- diff --git a/_randomgen/randomgen/legacy/legacy_distributions.pxd b/_randomgen/randomgen/legacy/legacy_distributions.pxd index d22d6a73a..e2157f706 100644 --- a/_randomgen/randomgen/legacy/legacy_distributions.pxd +++ b/_randomgen/randomgen/legacy/legacy_distributions.pxd @@ -1,3 +1,5 @@ +#cython: language_level=3 + from libc.stdint cimport uint64_t import numpy as np diff --git a/_randomgen/randomgen/mt19937.pyx b/_randomgen/randomgen/mt19937.pyx index 14ab24f39..207ebc4bd 100644 --- a/_randomgen/randomgen/mt19937.pyx +++ b/_randomgen/randomgen/mt19937.pyx @@ -206,8 +206,8 @@ cdef class MT19937: Can be an integer in [0, 2**32-1], array of integers in [0, 2**32-1] or ``None`` (the default). If `seed` is ``None``, then ``MT19937`` will try to read entropy from ``/dev/urandom`` - (or the Windows analog) if available to produce a 64-bit - seed. If unavailable, a 64-bit hash of the time and process + (or the Windows analog) if available to produce a 32-bit + seed. If unavailable, a 32-bit hash of the time and process ID is used. Raises diff --git a/_randomgen/randomgen/src/distributions/distributions.c b/_randomgen/randomgen/src/distributions/distributions.c index e04259136..4e7493afa 100644 --- a/_randomgen/randomgen/src/distributions/distributions.c +++ b/_randomgen/randomgen/src/distributions/distributions.c @@ -573,6 +573,9 @@ double random_pareto(brng_t *brng_state, double a) { } double random_weibull(brng_t *brng_state, double a) { + if (a == 0.0) { + return 0.0; + } return pow(standard_exponential_zig(brng_state), 1. / a); } diff --git a/_randomgen/randomgen/tests/test_direct.py b/_randomgen/randomgen/tests/test_direct.py index ee69d5416..5d251a3b7 100644 --- a/_randomgen/randomgen/tests/test_direct.py +++ b/_randomgen/randomgen/tests/test_direct.py @@ -428,6 +428,12 @@ class TestDSFMT(Base): assert_allclose(uniforms, vals) assert_equal(uniforms.dtype, np.float32) + def test_buffer_reset(self): + rs = RandomGenerator(self.brng(*self.data1['seed'])) + u = rs.random_sample(1) + rs.seed(*self.data1['seed']) + assert rs.state['buffer_loc'] == 382 + class TestThreeFry32(Base): @classmethod diff --git a/_randomgen/randomgen/tests/test_numpy_mt19937.py b/_randomgen/randomgen/tests/test_numpy_mt19937.py index 888baf92f..df458f2ee 100644 --- a/_randomgen/randomgen/tests/test_numpy_mt19937.py +++ b/_randomgen/randomgen/tests/test_numpy_mt19937.py @@ -1104,7 +1104,8 @@ class TestRandomDist(object): assert_array_almost_equal(actual, desired, decimal=15) def test_weibull_0(self): - assert_equal(mt19937.weibull(a=0), 0) + mt19937.seed(self.seed) + assert_equal(mt19937.weibull(a=0, size=12), np.zeros(12)) assert_raises(ValueError, mt19937.weibull, a=-0.) def test_zipf(self): diff --git a/_randomgen/randomgen/xorshift1024.pyx b/_randomgen/randomgen/xorshift1024.pyx index bf8f32e90..8341985b4 100644 --- a/_randomgen/randomgen/xorshift1024.pyx +++ b/_randomgen/randomgen/xorshift1024.pyx @@ -234,7 +234,7 @@ cdef class Xorshift1024: try: state = random_entropy(32) except RuntimeError: - state = random_entropy(4, 'fallback') + state = random_entropy(32, 'fallback') state = state.view(np.uint64) else: state = seed_by_array(seed, 16) diff --git a/_randomgen/randomgen/xoshiro512starstar.pyx b/_randomgen/randomgen/xoshiro512starstar.pyx index b4ab76158..17fe3c420 100644 --- a/_randomgen/randomgen/xoshiro512starstar.pyx +++ b/_randomgen/randomgen/xoshiro512starstar.pyx @@ -201,9 +201,9 @@ cdef class Xoshiro512StarStar: ub = 2 ** 64 if seed is None: try: - state = random_entropy(2 * 8) + state = random_entropy(16) except RuntimeError: - state = random_entropy(8, 'fallback') + state = random_entropy(16, 'fallback') state = state.view(np.uint64) else: state = seed_by_array(seed, 8) |