summaryrefslogtreecommitdiff
path: root/numpy/random/generator.pyx
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/random/generator.pyx')
-rw-r--r--numpy/random/generator.pyx34
1 files changed, 28 insertions, 6 deletions
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
index 368a03e8f..05323c422 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/generator.pyx
@@ -6,7 +6,7 @@ import warnings
import numpy as np
from .bounded_integers import _integers_types
-from .xoshiro256 import Xoshiro256
+from .pcg64 import PCG64
from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
from cpython cimport (Py_INCREF, PyFloat_AsDouble)
@@ -89,7 +89,7 @@ cdef class Generator:
def __init__(self, bit_generator=None):
if bit_generator is None:
- bit_generator = Xoshiro256()
+ bit_generator = PCG64()
self._bit_generator = bit_generator
capsule = bit_generator.capsule
@@ -419,7 +419,8 @@ cdef class Generator:
References
----------
.. [1] Daniel Lemire., "Fast Random Integer Generation in an Interval",
- CoRR, Aug. 13, 2018, http://arxiv.org/abs/1805.10941.
+ ACM Transactions on Modeling and Computer Simulation 29 (1), 2019,
+ http://arxiv.org/abs/1805.10941.
"""
if high is None:
@@ -439,7 +440,7 @@ cdef class Generator:
# Implementation detail: the old API used a masked method to generate
# bounded uniform integers. Lemire's method is preferable since it is
# faster. randomgen allows a choice, we will always use the faster one.
- cdef bint _masked = True
+ cdef bint _masked = False
if key == 'int32':
ret = _rand_int32(low, high, size, _masked, endpoint, &self._bitgen, self.lock)
@@ -3094,9 +3095,11 @@ cdef class Generator:
Parameters
----------
ngood : int or array_like of ints
- Number of ways to make a good selection. Must be nonnegative.
+ Number of ways to make a good selection. Must be nonnegative and
+ less than 10**9.
nbad : int or array_like of ints
- Number of ways to make a bad selection. Must be nonnegative.
+ Number of ways to make a bad selection. Must be nonnegative and
+ less than 10**9.
nsample : int or array_like of ints
Number of items sampled. Must be nonnegative and less than
``ngood + nbad``.
@@ -3141,6 +3144,13 @@ cdef class Generator:
replacement (or the sample space is infinite). As the sample space
becomes large, this distribution approaches the binomial.
+ The arguments `ngood` and `nbad` each must be less than `10**9`. For
+ extremely large arguments, the algorithm that is used to compute the
+ samples [4]_ breaks down because of loss of precision in floating point
+ calculations. For such large values, if `nsample` is not also large,
+ the distribution can be approximated with the binomial distribution,
+ `binomial(n=nsample, p=ngood/(ngood + nbad))`.
+
References
----------
.. [1] Lentner, Marvin, "Elementary Applied Statistics", Bogden
@@ -3150,6 +3160,9 @@ cdef class Generator:
http://mathworld.wolfram.com/HypergeometricDistribution.html
.. [3] Wikipedia, "Hypergeometric distribution",
https://en.wikipedia.org/wiki/Hypergeometric_distribution
+ .. [4] Stadlober, Ernst, "The ratio of uniforms approach for generating
+ discrete random variates", Journal of Computational and Applied
+ Mathematics, 31, pp. 181-189 (1990).
Examples
--------
@@ -3171,6 +3184,7 @@ cdef class Generator:
# answer = 0.003 ... pretty unlikely!
"""
+ DEF HYPERGEOM_MAX = 10**9
cdef bint is_scalar = True
cdef np.ndarray ongood, onbad, onsample
cdef int64_t lngood, lnbad, lnsample
@@ -3185,6 +3199,9 @@ cdef class Generator:
lnbad = <int64_t>nbad
lnsample = <int64_t>nsample
+ if lngood >= HYPERGEOM_MAX or lnbad >= HYPERGEOM_MAX:
+ raise ValueError("both ngood and nbad must be less than %d" %
+ HYPERGEOM_MAX)
if lngood + lnbad < lnsample:
raise ValueError("ngood + nbad < nsample")
return disc(&random_hypergeometric, &self._bitgen, size, self.lock, 0, 3,
@@ -3192,8 +3209,13 @@ cdef class Generator:
lnbad, 'nbad', CONS_NON_NEGATIVE,
lnsample, 'nsample', CONS_NON_NEGATIVE)
+ if np.any(ongood >= HYPERGEOM_MAX) or np.any(onbad >= HYPERGEOM_MAX):
+ raise ValueError("both ngood and nbad must be less than %d" %
+ HYPERGEOM_MAX)
+
if np.any(np.less(np.add(ongood, onbad), onsample)):
raise ValueError("ngood + nbad < nsample")
+
return discrete_broadcast_iii(&random_hypergeometric, &self._bitgen, size, self.lock,
ongood, 'ngood', CONS_NON_NEGATIVE,
onbad, 'nbad', CONS_NON_NEGATIVE,