diff options
Diffstat (limited to 'numpy/random/generator.pyx')
-rw-r--r-- | numpy/random/generator.pyx | 34 |
1 files changed, 28 insertions, 6 deletions
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx index 368a03e8f..05323c422 100644 --- a/numpy/random/generator.pyx +++ b/numpy/random/generator.pyx @@ -6,7 +6,7 @@ import warnings import numpy as np from .bounded_integers import _integers_types -from .xoshiro256 import Xoshiro256 +from .pcg64 import PCG64 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer from cpython cimport (Py_INCREF, PyFloat_AsDouble) @@ -89,7 +89,7 @@ cdef class Generator: def __init__(self, bit_generator=None): if bit_generator is None: - bit_generator = Xoshiro256() + bit_generator = PCG64() self._bit_generator = bit_generator capsule = bit_generator.capsule @@ -419,7 +419,8 @@ cdef class Generator: References ---------- .. [1] Daniel Lemire., "Fast Random Integer Generation in an Interval", - CoRR, Aug. 13, 2018, http://arxiv.org/abs/1805.10941. + ACM Transactions on Modeling and Computer Simulation 29 (1), 2019, + http://arxiv.org/abs/1805.10941. """ if high is None: @@ -439,7 +440,7 @@ cdef class Generator: # Implementation detail: the old API used a masked method to generate # bounded uniform integers. Lemire's method is preferable since it is # faster. randomgen allows a choice, we will always use the faster one. - cdef bint _masked = True + cdef bint _masked = False if key == 'int32': ret = _rand_int32(low, high, size, _masked, endpoint, &self._bitgen, self.lock) @@ -3094,9 +3095,11 @@ cdef class Generator: Parameters ---------- ngood : int or array_like of ints - Number of ways to make a good selection. Must be nonnegative. + Number of ways to make a good selection. Must be nonnegative and + less than 10**9. nbad : int or array_like of ints - Number of ways to make a bad selection. Must be nonnegative. + Number of ways to make a bad selection. Must be nonnegative and + less than 10**9. nsample : int or array_like of ints Number of items sampled. Must be nonnegative and less than ``ngood + nbad``. @@ -3141,6 +3144,13 @@ cdef class Generator: replacement (or the sample space is infinite). As the sample space becomes large, this distribution approaches the binomial. + The arguments `ngood` and `nbad` each must be less than `10**9`. For + extremely large arguments, the algorithm that is used to compute the + samples [4]_ breaks down because of loss of precision in floating point + calculations. For such large values, if `nsample` is not also large, + the distribution can be approximated with the binomial distribution, + `binomial(n=nsample, p=ngood/(ngood + nbad))`. + References ---------- .. [1] Lentner, Marvin, "Elementary Applied Statistics", Bogden @@ -3150,6 +3160,9 @@ cdef class Generator: http://mathworld.wolfram.com/HypergeometricDistribution.html .. [3] Wikipedia, "Hypergeometric distribution", https://en.wikipedia.org/wiki/Hypergeometric_distribution + .. [4] Stadlober, Ernst, "The ratio of uniforms approach for generating + discrete random variates", Journal of Computational and Applied + Mathematics, 31, pp. 181-189 (1990). Examples -------- @@ -3171,6 +3184,7 @@ cdef class Generator: # answer = 0.003 ... pretty unlikely! """ + DEF HYPERGEOM_MAX = 10**9 cdef bint is_scalar = True cdef np.ndarray ongood, onbad, onsample cdef int64_t lngood, lnbad, lnsample @@ -3185,6 +3199,9 @@ cdef class Generator: lnbad = <int64_t>nbad lnsample = <int64_t>nsample + if lngood >= HYPERGEOM_MAX or lnbad >= HYPERGEOM_MAX: + raise ValueError("both ngood and nbad must be less than %d" % + HYPERGEOM_MAX) if lngood + lnbad < lnsample: raise ValueError("ngood + nbad < nsample") return disc(&random_hypergeometric, &self._bitgen, size, self.lock, 0, 3, @@ -3192,8 +3209,13 @@ cdef class Generator: lnbad, 'nbad', CONS_NON_NEGATIVE, lnsample, 'nsample', CONS_NON_NEGATIVE) + if np.any(ongood >= HYPERGEOM_MAX) or np.any(onbad >= HYPERGEOM_MAX): + raise ValueError("both ngood and nbad must be less than %d" % + HYPERGEOM_MAX) + if np.any(np.less(np.add(ongood, onbad), onsample)): raise ValueError("ngood + nbad < nsample") + return discrete_broadcast_iii(&random_hypergeometric, &self._bitgen, size, self.lock, ongood, 'ngood', CONS_NON_NEGATIVE, onbad, 'nbad', CONS_NON_NEGATIVE, |