diff options
author | Kevin Sheppard <kevin.k.sheppard@gmail.com> | 2019-05-18 23:47:51 +0100 |
---|---|---|
committer | mattip <matti.picus@gmail.com> | 2019-05-20 19:00:38 +0300 |
commit | b42a5ca0a076b40c612014dc540ca5f9bcf10f41 (patch) | |
tree | 339ed4a67923d7f27357b2aa6d997b14990efe26 | |
parent | 17e0070df93f4262908f884dca4b08cb7d0bba7f (diff) | |
download | numpy-b42a5ca0a076b40c612014dc540ca5f9bcf10f41.tar.gz |
BUG: Ensure integer-type stream on 32bit
Ensure integer type is stream compatible on 32 bit
Fix incorrect clause end
Add integer-generator tests that check long streams
-rw-r--r-- | numpy/random/common.pxd | 6 | ||||
-rw-r--r-- | numpy/random/common.pyx | 6 | ||||
-rw-r--r-- | numpy/random/legacy_distributions.pxd | 8 | ||||
-rw-r--r-- | numpy/random/mtrand.pyx | 126 | ||||
-rw-r--r-- | numpy/random/setup.py | 5 | ||||
-rw-r--r-- | numpy/random/src/distributions/distributions.c | 112 | ||||
-rw-r--r-- | numpy/random/src/distributions/distributions.h | 47 | ||||
-rw-r--r-- | numpy/random/src/legacy/distributions-boxmuller.c | 39 | ||||
-rw-r--r-- | numpy/random/src/legacy/distributions-boxmuller.h | 12 | ||||
-rw-r--r-- | numpy/random/tests/test_generator_mt19937.py | 10 | ||||
-rw-r--r-- | numpy/random/tests/test_randomstate.py | 61 |
11 files changed, 293 insertions, 139 deletions
diff --git a/numpy/random/common.pxd b/numpy/random/common.pxd index 4f7c8c903..1223b2706 100644 --- a/numpy/random/common.pxd +++ b/numpy/random/common.pxd @@ -96,6 +96,6 @@ cdef object cont_broadcast_3(void *func, void *state, object size, object lock, np.ndarray c_arr, object c_name, constraint_type c_constraint) cdef object discrete_broadcast_iii(void *func, void *state, object size, object lock, - np.ndarray a_arr, object a_name, constraint_type a_constraint, - np.ndarray b_arr, object b_name, constraint_type b_constraint, - np.ndarray c_arr, object c_name, constraint_type c_constraint) + np.ndarray a_arr, object a_name, constraint_type a_constraint, + np.ndarray b_arr, object b_name, constraint_type b_constraint, + np.ndarray c_arr, object c_name, constraint_type c_constraint) diff --git a/numpy/random/common.pyx b/numpy/random/common.pyx index 2fa854807..e195195a4 100644 --- a/numpy/random/common.pyx +++ b/numpy/random/common.pyx @@ -666,9 +666,9 @@ cdef object discrete_broadcast_di(void *func, void *state, object size, object l return randoms cdef object discrete_broadcast_iii(void *func, void *state, object size, object lock, - np.ndarray a_arr, object a_name, constraint_type a_constraint, - np.ndarray b_arr, object b_name, constraint_type b_constraint, - np.ndarray c_arr, object c_name, constraint_type c_constraint): + np.ndarray a_arr, object a_name, constraint_type a_constraint, + np.ndarray b_arr, object b_name, constraint_type b_constraint, + np.ndarray c_arr, object c_name, constraint_type c_constraint): cdef np.ndarray randoms cdef int64_t *randoms_data cdef np.broadcast it diff --git a/numpy/random/legacy_distributions.pxd b/numpy/random/legacy_distributions.pxd index 7969cfb92..e5d7a22a4 100644 --- a/numpy/random/legacy_distributions.pxd +++ b/numpy/random/legacy_distributions.pxd @@ -5,7 +5,7 @@ from libc.stdint cimport int64_t import numpy as np cimport numpy as np -from .distributions cimport bitgen_t +from .distributions cimport bitgen_t, binomial_t cdef extern from "distributions-boxmuller.h": @@ -35,6 +35,12 @@ cdef extern from "distributions-boxmuller.h": double legacy_wald(aug_bitgen_t *aug_state, double mean, double scale) nogil double legacy_lognormal(aug_bitgen_t *aug_state, double mean, double sigma) nogil int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) nogil + int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, int64_t sample) nogil + int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) nogil + int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) nogil + int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) nogil + int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) nogil + void legacy_random_multinomial(bitgen_t *bitgen_state, long n, long *mnix, double *pix, np.npy_intp d, binomial_t *binomial) nogil double legacy_standard_cauchy(aug_bitgen_t *state) nogil double legacy_beta(aug_bitgen_t *aug_state, double a, double b) nogil double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) nogil diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index 299398c34..74d1f519d 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -21,6 +21,19 @@ from .legacy_distributions cimport * np.import_array() +cdef object int64_to_long(object x): + """ + Convert int64 to long for legacy compatibility, which used long for integer + distributions + """ + cdef int64_t x64 + + if np.isscalar(x): + x64 = x + return <long>x64 + return x.astype('l', casting='unsafe') + + cdef class RandomState: """ RandomState(bit_generator=None) @@ -3031,43 +3044,43 @@ cdef class RandomState: # Uses a custom implementation since self._binomial is required cdef double _dp = 0 - cdef int64_t _in = 0 + cdef long _in = 0 cdef bint is_scalar = True cdef np.npy_intp i, cnt cdef np.ndarray randoms - cdef np.int64_t *randoms_data + cdef long *randoms_data cdef np.broadcast it p_arr = <np.ndarray>np.PyArray_FROM_OTF(p, np.NPY_DOUBLE, np.NPY_ALIGNED) is_scalar = is_scalar and np.PyArray_NDIM(p_arr) == 0 - n_arr = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_INT64, np.NPY_ALIGNED) + n_arr = <np.ndarray>np.PyArray_FROM_OTF(n, np.NPY_LONG, np.NPY_ALIGNED) is_scalar = is_scalar and np.PyArray_NDIM(n_arr) == 0 if not is_scalar: check_array_constraint(p_arr, 'p', CONS_BOUNDED_0_1) check_array_constraint(n_arr, 'n', CONS_NON_NEGATIVE) if size is not None: - randoms = <np.ndarray>np.empty(size, np.int64) + randoms = <np.ndarray>np.empty(size, int) else: it = np.PyArray_MultiIterNew2(p_arr, n_arr) - randoms = <np.ndarray>np.empty(it.shape, np.int64) + randoms = <np.ndarray>np.empty(it.shape, int) - randoms_data = <np.int64_t *>np.PyArray_DATA(randoms) + randoms_data = <long *>np.PyArray_DATA(randoms) cnt = np.PyArray_SIZE(randoms) it = np.PyArray_MultiIterNew3(randoms, p_arr, n_arr) with self.lock, nogil: for i in range(cnt): _dp = (<double*>np.PyArray_MultiIter_DATA(it, 1))[0] - _in = (<int64_t*>np.PyArray_MultiIter_DATA(it, 2))[0] - (<int64_t*>np.PyArray_MultiIter_DATA(it, 0))[0] = random_binomial(&self._bitgen, _dp, _in, &self._binomial) + _in = (<long*>np.PyArray_MultiIter_DATA(it, 2))[0] + (<long*>np.PyArray_MultiIter_DATA(it, 0))[0] = random_binomial(&self._bitgen, _dp, _in, &self._binomial) np.PyArray_MultiIter_NEXT(it) return randoms _dp = PyFloat_AsDouble(p) - _in = <int64_t>n + _in = <long>n check_constraint(_dp, 'p', CONS_BOUNDED_0_1) check_constraint(<double>_in, 'n', CONS_NON_NEGATIVE) @@ -3075,9 +3088,9 @@ cdef class RandomState: with self.lock: return random_binomial(&self._bitgen, _dp, _in, &self._binomial) - randoms = <np.ndarray>np.empty(size, np.int64) + randoms = <np.ndarray>np.empty(size, int) cnt = np.PyArray_SIZE(randoms) - randoms_data = <np.int64_t *>np.PyArray_DATA(randoms) + randoms_data = <long *>np.PyArray_DATA(randoms) with self.lock, nogil: for i in range(cnt): @@ -3157,10 +3170,12 @@ cdef class RandomState: ... print(i, "wells drilled, probability of one success =", probability) """ - return disc(&legacy_negative_binomial, &self._aug_state, size, self.lock, 2, 0, - n, 'n', CONS_POSITIVE, - p, 'p', CONS_BOUNDED_0_1, - 0.0, '', CONS_NONE) + out = disc(&legacy_negative_binomial, &self._aug_state, size, self.lock, 2, 0, + n, 'n', CONS_POSITIVE, + p, 'p', CONS_BOUNDED_0_1, + 0.0, '', CONS_NONE) + # Match historical output type + return int64_to_long(out) def poisson(self, lam=1.0, size=None): """ @@ -3228,10 +3243,12 @@ cdef class RandomState: >>> s = np.random.poisson(lam=(100., 500.), size=(100, 2)) """ - return disc(&random_poisson, &self._bitgen, size, self.lock, 1, 0, - lam, 'lam', CONS_POISSON, - 0.0, '', CONS_NONE, - 0.0, '', CONS_NONE) + out = disc(&legacy_random_poisson, &self._bitgen, size, self.lock, 1, 0, + lam, 'lam', CONS_POISSON, + 0.0, '', CONS_NONE, + 0.0, '', CONS_NONE) + # Match historical output type + return int64_to_long(out) def zipf(self, a, size=None): """ @@ -3307,10 +3324,12 @@ cdef class RandomState: >>> plt.show() """ - return disc(&random_zipf, &self._bitgen, size, self.lock, 1, 0, - a, 'a', CONS_GT_1, - 0.0, '', CONS_NONE, - 0.0, '', CONS_NONE) + out = disc(&legacy_random_zipf, &self._bitgen, size, self.lock, 1, 0, + a, 'a', CONS_GT_1, + 0.0, '', CONS_NONE, + 0.0, '', CONS_NONE) + # Match historical output type + return int64_to_long(out) def geometric(self, p, size=None): """ @@ -3358,10 +3377,12 @@ cdef class RandomState: 0.34889999999999999 #random """ - return disc(&random_geometric, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_GT_0_1, - 0.0, '', CONS_NONE, - 0.0, '', CONS_NONE) + out = disc(&legacy_random_geometric, &self._bitgen, size, self.lock, 1, 0, + p, 'p', CONS_BOUNDED_GT_0_1, + 0.0, '', CONS_NONE, + 0.0, '', CONS_NONE) + # Match historical output type + return int64_to_long(out) def hypergeometric(self, ngood, nbad, nsample, size=None): """ @@ -3458,9 +3479,10 @@ cdef class RandomState: cdef np.ndarray ongood, onbad, onsample cdef int64_t lngood, lnbad, lnsample - ongood = <np.ndarray>np.PyArray_FROM_OTF(ngood, np.NPY_INT64, np.NPY_ALIGNED) - onbad = <np.ndarray>np.PyArray_FROM_OTF(nbad, np.NPY_INT64, np.NPY_ALIGNED) - onsample = <np.ndarray>np.PyArray_FROM_OTF(nsample, np.NPY_INT64, np.NPY_ALIGNED) + # This cast to long is required to ensure that the values are inbounds + ongood = <np.ndarray>np.PyArray_FROM_OTF(ngood, np.NPY_LONG, np.NPY_ALIGNED) + onbad = <np.ndarray>np.PyArray_FROM_OTF(nbad, np.NPY_LONG, np.NPY_ALIGNED) + onsample = <np.ndarray>np.PyArray_FROM_OTF(nsample, np.NPY_LONG, np.NPY_ALIGNED) if np.PyArray_NDIM(ongood) == np.PyArray_NDIM(onbad) == np.PyArray_NDIM(onsample) == 0: @@ -3470,17 +3492,25 @@ cdef class RandomState: if lngood + lnbad < lnsample: raise ValueError("ngood + nbad < nsample") - return disc(&random_hypergeometric, &self._bitgen, size, self.lock, 0, 3, - lngood, 'ngood', CONS_NON_NEGATIVE, - lnbad, 'nbad', CONS_NON_NEGATIVE, - lnsample, 'nsample', CONS_GTE_1) + out = disc(&legacy_random_hypergeometric, &self._bitgen, size, self.lock, 0, 3, + lngood, 'ngood', CONS_NON_NEGATIVE, + lnbad, 'nbad', CONS_NON_NEGATIVE, + lnsample, 'nsample', CONS_GTE_1) + # Match historical output type + return int64_to_long(out) if np.any(np.less(np.add(ongood, onbad), onsample)): raise ValueError("ngood + nbad < nsample") - return discrete_broadcast_iii(&random_hypergeometric, &self._bitgen, size, self.lock, - ongood, 'ngood', CONS_NON_NEGATIVE, - onbad, 'nbad', CONS_NON_NEGATIVE, - onsample, 'nsample', CONS_GTE_1) + # Convert to int64, if necessary, to use int64 infrastructure + ongood = ongood.astype(np.int64) + onbad = onbad.astype(np.int64) + onbad = onbad.astype(np.int64) + out = discrete_broadcast_iii(&legacy_random_hypergeometric,&self._bitgen, size, self.lock, + ongood, 'ngood', CONS_NON_NEGATIVE, + onbad, 'nbad', CONS_NON_NEGATIVE, + onsample, 'nsample', CONS_GTE_1) + # Match historical output type + return int64_to_long(out) def logseries(self, p, size=None): """ @@ -3557,10 +3587,12 @@ cdef class RandomState: >>> plt.show() """ - return disc(&random_logseries, &self._bitgen, size, self.lock, 1, 0, - p, 'p', CONS_BOUNDED_0_1, - 0.0, '', CONS_NONE, - 0.0, '', CONS_NONE) + out = disc(&legacy_random_logseries, &self._bitgen, size, self.lock, 1, 0, + p, 'p', CONS_BOUNDED_0_1, + 0.0, '', CONS_NONE, + 0.0, '', CONS_NONE) + # Match historical output type + return int64_to_long(out) # Multivariate distributions: def multivariate_normal(self, mean, cov, size=None, check_valid='warn', @@ -3808,8 +3840,8 @@ cdef class RandomState: cdef np.npy_intp d, i, sz, offset cdef np.ndarray parr, mnarr cdef double *pix - cdef int64_t *mnix - cdef int64_t ni + cdef long *mnix + cdef long ni d = len(pvals) parr = <np.ndarray>np.PyArray_FROM_OTF(pvals, np.NPY_DOUBLE, np.NPY_ALIGNED) @@ -3826,16 +3858,16 @@ cdef class RandomState: except: shape = tuple(size) + (d,) - multin = np.zeros(shape, dtype=np.int64) + multin = np.zeros(shape, dtype=int) mnarr = <np.ndarray>multin - mnix = <int64_t*>np.PyArray_DATA(mnarr) + mnix = <long*>np.PyArray_DATA(mnarr) sz = np.PyArray_SIZE(mnarr) ni = n check_constraint(ni, 'n', CONS_NON_NEGATIVE) offset = 0 with self.lock, nogil: for i in range(sz // d): - random_multinomial(&self._bitgen, ni, &mnix[offset], pix, d, &self._binomial) + legacy_random_multinomial(&self._bitgen, ni, &mnix[offset], pix, d, &self._binomial) offset += d return multin diff --git a/numpy/random/setup.py b/numpy/random/setup.py index 5f3a48783..3c9b3289f 100644 --- a/numpy/random/setup.py +++ b/numpy/random/setup.py @@ -84,9 +84,8 @@ def configuration(parent_package='',top_path=None): if DEBUG: EXTRA_LINK_ARGS += ['-debug'] EXTRA_COMPILE_ARGS += ["-Zi", "/Od"] - if sys.version_info < (3, 0): - EXTRA_INCLUDE_DIRS += [join(MOD_DIR, 'src', 'common')] + LEGACY_DEFS = [('NP_RANDOM_LEGACY', '1')] DSFMT_DEFS = [('DSFMT_MEXP', '19937')] if USE_SSE2: if os.name == 'nt': @@ -180,7 +179,7 @@ def configuration(parent_package='',top_path=None): extra_compile_args=EXTRA_COMPILE_ARGS, extra_link_args=EXTRA_LINK_ARGS, depends=['mtrand.pyx'], - define_macros=defs + DSFMT_DEFS, + define_macros=defs + DSFMT_DEFS + LEGACY_DEFS, ) return config diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c index 789440f8b..d8362a39a 100644 --- a/numpy/random/src/distributions/distributions.c +++ b/numpy/random/src/distributions/distributions.c @@ -646,8 +646,8 @@ double random_standard_t(bitgen_t *bitgen_state, double df) { return sqrt(df / 2) * num / sqrt(denom); } -static int64_t random_poisson_mult(bitgen_t *bitgen_state, double lam) { - int64_t X; +static RAND_INT_TYPE random_poisson_mult(bitgen_t *bitgen_state, double lam) { + RAND_INT_TYPE X; double prod, U, enlam; enlam = exp(-lam); @@ -671,8 +671,8 @@ static int64_t random_poisson_mult(bitgen_t *bitgen_state, double lam) { */ #define LS2PI 0.91893853320467267 #define TWELFTH 0.083333333333333333333333 -static int64_t random_poisson_ptrs(bitgen_t *bitgen_state, double lam) { - int64_t k; +static RAND_INT_TYPE random_poisson_ptrs(bitgen_t *bitgen_state, double lam) { + RAND_INT_TYPE k; double U, V, slam, loglam, a, b, invalpha, vr, us; slam = sqrt(lam); @@ -686,7 +686,7 @@ static int64_t random_poisson_ptrs(bitgen_t *bitgen_state, double lam) { U = next_double(bitgen_state) - 0.5; V = next_double(bitgen_state); us = 0.5 - fabs(U); - k = (int64_t)floor((2 * a / us + b) * U + lam + 0.43); + k = (RAND_INT_TYPE)floor((2 * a / us + b) * U + lam + 0.43); if ((us >= 0.07) && (V <= vr)) { return k; } @@ -702,7 +702,7 @@ static int64_t random_poisson_ptrs(bitgen_t *bitgen_state, double lam) { } } -int64_t random_poisson(bitgen_t *bitgen_state, double lam) { +RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam) { if (lam >= 10) { return random_poisson_ptrs(bitgen_state, lam); } else if (lam == 0) { @@ -712,16 +712,17 @@ int64_t random_poisson(bitgen_t *bitgen_state, double lam) { } } -int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, double p) { +RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n, + double p) { double Y = random_gamma(bitgen_state, n, (1 - p) / p); return random_poisson(bitgen_state, Y); } -int64_t random_binomial_btpe(bitgen_t *bitgen_state, int64_t n, double p, - binomial_t *binomial) { +RAND_INT_TYPE random_binomial_btpe(bitgen_t *bitgen_state, RAND_INT_TYPE n, + double p, binomial_t *binomial) { double r, q, fm, p1, xm, xl, xr, c, laml, lamr, p2, p3, p4; double a, u, v, s, F, rho, t, A, nrq, x1, x2, f1, f2, z, z2, w, w2, x; - int64_t m, y, k, i; + RAND_INT_TYPE m, y, k, i; if (!(binomial->has_binomial) || (binomial->nsave != n) || (binomial->psave != p)) { @@ -732,7 +733,7 @@ int64_t random_binomial_btpe(bitgen_t *bitgen_state, int64_t n, double p, binomial->r = r = MIN(p, 1.0 - p); binomial->q = q = 1.0 - r; binomial->fm = fm = n * r + r; - binomial->m = m = (int64_t)floor(binomial->fm); + binomial->m = m = (RAND_INT_TYPE)floor(binomial->fm); binomial->p1 = p1 = floor(2.195 * sqrt(n * r * q) - 4.6 * q) + 0.5; binomial->xm = xm = m + 0.5; binomial->xl = xl = xm - p1; @@ -769,7 +770,7 @@ Step10: v = next_double(bitgen_state); if (u > p1) goto Step20; - y = (int64_t)floor(xm - p1 * v + u); + y = (RAND_INT_TYPE)floor(xm - p1 * v + u); goto Step60; Step20: @@ -779,13 +780,13 @@ Step20: v = v * c + 1.0 - fabs(m - x + 0.5) / p1; if (v > 1.0) goto Step10; - y = (int64_t)floor(x); + y = (RAND_INT_TYPE)floor(x); goto Step50; Step30: if (u > p3) goto Step40; - y = (int64_t)floor(xl + log(v) / laml); + y = (RAND_INT_TYPE)floor(xl + log(v) / laml); /* Reject if v==0.0 since previous cast is undefined */ if ((y < 0) || (v == 0.0)) goto Step10; @@ -793,7 +794,7 @@ Step30: goto Step50; Step40: - y = (int64_t)floor(xr - log(v) / lamr); + y = (RAND_INT_TYPE)floor(xr - log(v) / lamr); /* Reject if v==0.0 since previous cast is undefined */ if ((y > n) || (v == 0.0)) goto Step10; @@ -860,10 +861,10 @@ Step60: return y; } -int64_t random_binomial_inversion(bitgen_t *bitgen_state, int64_t n, double p, - binomial_t *binomial) { +RAND_INT_TYPE random_binomial_inversion(bitgen_t *bitgen_state, RAND_INT_TYPE n, + double p, binomial_t *binomial) { double q, qn, np, px, U; - int64_t X, bound; + RAND_INT_TYPE X, bound; if (!(binomial->has_binomial) || (binomial->nsave != n) || (binomial->psave != p)) { @@ -873,7 +874,7 @@ int64_t random_binomial_inversion(bitgen_t *bitgen_state, int64_t n, double p, binomial->q = q = 1.0 - p; binomial->r = qn = exp(n * log(q)); binomial->c = np = n * p; - binomial->m = bound = (int64_t)MIN(n, np + 10.0 * sqrt(np * q + 1)); + binomial->m = bound = (RAND_INT_TYPE)MIN(n, np + 10.0 * sqrt(np * q + 1)); } else { q = binomial->q; qn = binomial->r; @@ -897,8 +898,8 @@ int64_t random_binomial_inversion(bitgen_t *bitgen_state, int64_t n, double p, return X; } -int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, - binomial_t *binomial) { +RAND_INT_TYPE random_binomial(bitgen_t *bitgen_state, double p, RAND_INT_TYPE n, + binomial_t *binomial) { double q; if ((n == 0LL) || (p == 0.0f)) @@ -933,7 +934,7 @@ double random_noncentral_chisquare(bitgen_t *bitgen_state, double df, const double n = random_gauss_zig(bitgen_state) + sqrt(nonc); return Chi2 + n * n; } else { - const int64_t i = random_poisson(bitgen_state, nonc / 2.0); + const RAND_INT_TYPE i = random_poisson(bitgen_state, nonc / 2.0); return random_chisquare(bitgen_state, df + 2 * i); } } @@ -1017,9 +1018,15 @@ double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa) { } } -int64_t random_logseries(bitgen_t *bitgen_state, double p) { +/* + * RAND_INT_TYPE is used to share integer generators with RandomState which + * used long in place of int64_t. If changing a distribution that uses + * RAND_INT_TYPE, then the original unmodified copy must be retained for + * use in RandomState by copying to the legacy distributions source file. + */ +RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p) { double q, r, U, V; - int64_t result; + RAND_INT_TYPE result; r = log(1.0 - p); @@ -1031,7 +1038,7 @@ int64_t random_logseries(bitgen_t *bitgen_state, double p) { U = next_double(bitgen_state); q = 1.0 - exp(r * U); if (V <= q * q) { - result = (int64_t)floor(1 + log(V) / log(q)); + result = (RAND_INT_TYPE)floor(1 + log(V) / log(q)); if ((result < 1) || (V == 0.0)) { continue; } else { @@ -1045,9 +1052,9 @@ int64_t random_logseries(bitgen_t *bitgen_state, double p) { } } -int64_t random_geometric_search(bitgen_t *bitgen_state, double p) { +RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p) { double U; - int64_t X; + RAND_INT_TYPE X; double sum, prod, q; X = 1; @@ -1062,11 +1069,11 @@ int64_t random_geometric_search(bitgen_t *bitgen_state, double p) { return X; } -int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p) { - return (int64_t)ceil(log(1.0 - next_double(bitgen_state)) / log(1.0 - p)); +RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p) { + return (RAND_INT_TYPE)ceil(log(1.0 - next_double(bitgen_state)) / log(1.0 - p)); } -int64_t random_geometric(bitgen_t *bitgen_state, double p) { +RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p) { if (p >= 0.333333333333333333333333) { return random_geometric_search(bitgen_state, p); } else { @@ -1074,7 +1081,7 @@ int64_t random_geometric(bitgen_t *bitgen_state, double p) { } } -int64_t random_zipf(bitgen_t *bitgen_state, double a) { +RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a) { double am1, b; am1 = a - 1.0; @@ -1091,13 +1098,13 @@ int64_t random_zipf(bitgen_t *bitgen_state, double a) { * just reject this value. This function then models a Zipf * distribution truncated to sys.maxint. */ - if (X > LONG_MAX || X < 1.0) { + if (X > RAND_INT_MAX || X < 1.0) { continue; } T = pow(1.0 + 1.0 / X, am1); if (V * X * (T - 1.0) / (b - 1.0) <= T / b) { - return (int64_t)X; + return (RAND_INT_TYPE)X; } } } @@ -1121,9 +1128,10 @@ double random_triangular(bitgen_t *bitgen_state, double left, double mode, } } -int64_t random_hypergeometric_hyp(bitgen_t *bitgen_state, int64_t good, - int64_t bad, int64_t sample) { - int64_t d1, k, z; +RAND_INT_TYPE random_hypergeometric_hyp(bitgen_t *bitgen_state, + RAND_INT_TYPE good, RAND_INT_TYPE bad, + RAND_INT_TYPE sample) { + RAND_INT_TYPE d1, k, z; double d2, u, y; d1 = bad + good - sample; @@ -1133,12 +1141,12 @@ int64_t random_hypergeometric_hyp(bitgen_t *bitgen_state, int64_t good, k = sample; while (y > 0.0) { u = next_double(bitgen_state); - y -= (int64_t)floor(u + y / (d1 + k)); + y -= (RAND_INT_TYPE)floor(u + y / (d1 + k)); k--; if (k == 0) break; } - z = (int64_t)(d2 - y); + z = (RAND_INT_TYPE)(d2 - y); if (good > bad) z = sample - z; return z; @@ -1148,11 +1156,12 @@ int64_t random_hypergeometric_hyp(bitgen_t *bitgen_state, int64_t good, /* D2 = 3 - 2*sqrt(3/e) */ #define D1 1.7155277699214135 #define D2 0.8989161620588988 -int64_t random_hypergeometric_hrua(bitgen_t *bitgen_state, int64_t good, - int64_t bad, int64_t sample) { - int64_t mingoodbad, maxgoodbad, popsize, m, d9; +RAND_INT_TYPE random_hypergeometric_hrua(bitgen_t *bitgen_state, + RAND_INT_TYPE good, RAND_INT_TYPE bad, + RAND_INT_TYPE sample) { + RAND_INT_TYPE mingoodbad, maxgoodbad, popsize, m, d9; double d4, d5, d6, d7, d8, d10, d11; - int64_t Z; + RAND_INT_TYPE Z; double T, W, X, Y; mingoodbad = MIN(good, bad); @@ -1164,7 +1173,7 @@ int64_t random_hypergeometric_hrua(bitgen_t *bitgen_state, int64_t good, d6 = m * d4 + 0.5; d7 = sqrt((double)(popsize - m) * sample * d4 * d5 / (popsize - 1) + 0.5); d8 = D1 * d7 + D2; - d9 = (int64_t)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2)); + d9 = (RAND_INT_TYPE)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2)); d10 = (loggam(d9 + 1) + loggam(mingoodbad - d9 + 1) + loggam(m - d9 + 1) + loggam(maxgoodbad - m + d9 + 1)); d11 = MIN(MIN(m, mingoodbad) + 1.0, floor(d6 + 16 * d7)); @@ -1179,7 +1188,7 @@ int64_t random_hypergeometric_hrua(bitgen_t *bitgen_state, int64_t good, if ((W < 0.0) || (W >= d11)) continue; - Z = (int64_t)floor(W); + Z = (RAND_INT_TYPE)floor(W); T = d10 - (loggam(Z + 1) + loggam(mingoodbad - Z + 1) + loggam(m - Z + 1) + loggam(maxgoodbad - m + Z + 1)); @@ -1208,8 +1217,8 @@ int64_t random_hypergeometric_hrua(bitgen_t *bitgen_state, int64_t good, #undef D1 #undef D2 -int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, int64_t bad, - int64_t sample) { +RAND_INT_TYPE random_hypergeometric(bitgen_t *bitgen_state, RAND_INT_TYPE good, + RAND_INT_TYPE bad, RAND_INT_TYPE sample) { if (sample > 10) { return random_hypergeometric_hrua(bitgen_state, good, bad, sample); } else if (sample > 0) { @@ -1849,11 +1858,12 @@ void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off, } } -void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix, - double *pix, npy_intp d, binomial_t *binomial) { +void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, + RAND_INT_TYPE *mnix, double *pix, npy_intp d, + binomial_t *binomial) { double remaining_p = 1.0; npy_intp j; - int64_t dn = n; + RAND_INT_TYPE dn = n; for (j = 0; j < (d - 1); j++) { mnix[j] = random_binomial(bitgen_state, pix[j] / remaining_p, dn, binomial); dn = dn - mnix[j]; @@ -1861,8 +1871,8 @@ void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix, break; } remaining_p -= pix[j]; - if (dn > 0) { + } + if (dn > 0) { mnix[d - 1] = dn; - } } } diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h index d1d439d78..e43a5279c 100644 --- a/numpy/random/src/distributions/distributions.h +++ b/numpy/random/src/distributions/distributions.h @@ -10,19 +10,18 @@ #include "numpy/npy_common.h" #include "numpy/npy_math.h" -#ifdef _WIN32 -#if _MSC_VER == 1500 - -static NPY_INLINE int64_t llabs(int64_t x) { - int64_t o; - if (x < 0) { - o = -x; - } else { - o = x; - } - return o; -} -#endif +/* + * RAND_INT_TYPE is used to share integer generators with RandomState which + * used long in place of int64_t. If changing a distribution that uses + * RAND_INT_TYPE, then the original unmodified copy must be retained for + * use in RandomState by copying to the legacy distributions source file. + */ +#ifdef NP_RANDOM_LEGACY +#define RAND_INT_TYPE long +#define RAND_INT_MAX LONG_MAX +#else +#define RAND_INT_TYPE int64_t +#define RAND_INT_MAX INT64_MAX #endif #ifdef DLL_EXPORT @@ -151,18 +150,18 @@ DECLDIR double random_vonmises(bitgen_t *bitgen_state, double mu, double kappa); DECLDIR double random_triangular(bitgen_t *bitgen_state, double left, double mode, double right); -DECLDIR int64_t random_poisson(bitgen_t *bitgen_state, double lam); -DECLDIR int64_t random_negative_binomial(bitgen_t *bitgen_state, double n, +DECLDIR RAND_INT_TYPE random_poisson(bitgen_t *bitgen_state, double lam); +DECLDIR RAND_INT_TYPE random_negative_binomial(bitgen_t *bitgen_state, double n, double p); -DECLDIR int64_t random_binomial(bitgen_t *bitgen_state, double p, int64_t n, +DECLDIR RAND_INT_TYPE random_binomial(bitgen_t *bitgen_state, double p, RAND_INT_TYPE n, binomial_t *binomial); -DECLDIR int64_t random_logseries(bitgen_t *bitgen_state, double p); -DECLDIR int64_t random_geometric_search(bitgen_t *bitgen_state, double p); -DECLDIR int64_t random_geometric_inversion(bitgen_t *bitgen_state, double p); -DECLDIR int64_t random_geometric(bitgen_t *bitgen_state, double p); -DECLDIR int64_t random_zipf(bitgen_t *bitgen_state, double a); -DECLDIR int64_t random_hypergeometric(bitgen_t *bitgen_state, int64_t good, - int64_t bad, int64_t sample); +DECLDIR RAND_INT_TYPE random_logseries(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_geometric_search(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_geometric_inversion(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_geometric(bitgen_t *bitgen_state, double p); +DECLDIR RAND_INT_TYPE random_zipf(bitgen_t *bitgen_state, double a); +DECLDIR RAND_INT_TYPE random_hypergeometric(bitgen_t *bitgen_state, RAND_INT_TYPE good, + RAND_INT_TYPE bad, RAND_INT_TYPE sample); DECLDIR uint64_t random_interval(bitgen_t *bitgen_state, uint64_t max); @@ -205,7 +204,7 @@ DECLDIR void random_bounded_bool_fill(bitgen_t *bitgen_state, npy_bool off, npy_bool rng, npy_intp cnt, bool use_masked, npy_bool *out); -DECLDIR void random_multinomial(bitgen_t *bitgen_state, int64_t n, int64_t *mnix, +DECLDIR void random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, RAND_INT_TYPE *mnix, double *pix, npy_intp d, binomial_t *binomial); #endif diff --git a/numpy/random/src/legacy/distributions-boxmuller.c b/numpy/random/src/legacy/distributions-boxmuller.c index 2c715799f..e7b3bdde7 100644 --- a/numpy/random/src/legacy/distributions-boxmuller.c +++ b/numpy/random/src/legacy/distributions-boxmuller.c @@ -163,7 +163,7 @@ double legacy_standard_t(aug_bitgen_t *aug_state, double df) { int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, double p) { double Y = legacy_gamma(aug_state, n, (1 - p) / p); - return random_poisson(aug_state->bit_generator, Y); + return (int64_t)random_poisson(aug_state->bit_generator, Y); } double legacy_standard_cauchy(aug_bitgen_t *aug_state) { @@ -212,3 +212,40 @@ double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden) { double legacy_exponential(aug_bitgen_t *aug_state, double scale) { return scale * legacy_standard_exponential(aug_state); } + +/* + * This is a wrapper function that matches the expected template. In the legacy + * generator, all int types are long, so this accepts int64 and then converts + * them to longs. These values must be in bounds for long and this is checked + * outside this function + * + * The remaining are included for the return type only + */ +int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, int64_t good, + int64_t bad, int64_t sample) { + return (int64_t)random_hypergeometric(bitgen_state, (RAND_INT_TYPE)good, + (RAND_INT_TYPE)bad, + (RAND_INT_TYPE)sample); +} + + int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p) { + return (int64_t)random_logseries(bitgen_state, p); +} + + int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam) { + return (int64_t)random_poisson(bitgen_state, lam); +} + + int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a) { + return (int64_t)random_zipf(bitgen_state, a); +} + + int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p) { + return (int64_t)random_geometric(bitgen_state, p); +} + + void legacy_random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, + RAND_INT_TYPE *mnix, double *pix, npy_intp d, + binomial_t *binomial) { + return random_multinomial(bitgen_state, n, mnix, pix, d, binomial); +} diff --git a/numpy/random/src/legacy/distributions-boxmuller.h b/numpy/random/src/legacy/distributions-boxmuller.h index 07e093b26..005c4e5d2 100644 --- a/numpy/random/src/legacy/distributions-boxmuller.h +++ b/numpy/random/src/legacy/distributions-boxmuller.h @@ -36,5 +36,17 @@ extern double legacy_f(aug_bitgen_t *aug_state, double dfnum, double dfden); extern double legacy_normal(aug_bitgen_t *aug_state, double loc, double scale); extern double legacy_standard_gamma(aug_bitgen_t *aug_state, double shape); extern double legacy_exponential(aug_bitgen_t *aug_state, double scale); +extern int64_t legacy_negative_binomial(aug_bitgen_t *aug_state, double n, + double p); +extern int64_t legacy_random_hypergeometric(bitgen_t *bitgen_state, + int64_t good, int64_t bad, + int64_t sample); +extern int64_t legacy_random_logseries(bitgen_t *bitgen_state, double p); +extern int64_t legacy_random_poisson(bitgen_t *bitgen_state, double lam); +extern int64_t legacy_random_zipf(bitgen_t *bitgen_state, double a); +extern int64_t legacy_random_geometric(bitgen_t *bitgen_state, double p); +void legacy_random_multinomial(bitgen_t *bitgen_state, RAND_INT_TYPE n, + RAND_INT_TYPE *mnix, double *pix, npy_intp d, + binomial_t *binomial); #endif diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index 3bb3bd791..770c32a39 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -182,10 +182,10 @@ class TestIntegers(object): lbnd = 0 if dt is bool else np.iinfo(dt).min ubnd = 2 if dt is bool else np.iinfo(dt).max + (not endpoint) - assert_raises(ValueError, self.rfunc, [ - lbnd - 1] * 2, [ubnd] * 2, endpoint=endpoint, dtype=dt) - assert_raises(ValueError, self.rfunc, [ - lbnd] * 2, [ubnd + 1] * 2, endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [lbnd - 1] * 2, [ubnd] * 2, + endpoint=endpoint, dtype=dt) + assert_raises(ValueError, self.rfunc, [lbnd] * 2, + [ubnd + 1] * 2, endpoint=endpoint, dtype=dt) assert_raises(ValueError, self.rfunc, ubnd, [lbnd] * 2, endpoint=endpoint, dtype=dt) assert_raises(ValueError, self.rfunc, [1] * 2, 0, @@ -1895,7 +1895,7 @@ class TestBroadcast(object): [4, 5, 1, 4, 3, 3]], [[1, 1, 1, 0, 0, 2], [2, 0, 4, 3, 7, 4]], - [[1, 2, 0, 0, 2, 2], + [[1, 2, 0, 0, 2, 0], [3, 2, 3, 4, 2, 6]]], dtype=np.int64) assert_array_equal(actual, desired) diff --git a/numpy/random/tests/test_randomstate.py b/numpy/random/tests/test_randomstate.py index 0c57b9aaf..75c35ef62 100644 --- a/numpy/random/tests/test_randomstate.py +++ b/numpy/random/tests/test_randomstate.py @@ -1,8 +1,10 @@ +import hashlib import pickle import sys import warnings import numpy as np +import pytest from numpy.testing import ( assert_, assert_raises, assert_equal, assert_warns, assert_no_warnings, assert_array_equal, assert_array_almost_equal, @@ -11,6 +13,44 @@ from numpy.testing import ( from numpy.random import MT19937, Xoshiro256, mtrand as random +INT_FUNCS = {'binomial': (100.0, 0.6), + 'geometric': (.5,), + 'hypergeometric': (20, 20, 10), + 'logseries': (.5,), + 'multinomial': (20, np.ones(6) / 6.0), + 'negative_binomial': (100, .5), + 'poisson': (10.0,), + 'zipf': (2,), + } + +if np.iinfo(int).max < 2**32: + # Windows and some 32-bit platforms, e.g., ARM + INT_FUNC_HASHES = {'binomial': '670e1c04223ffdbab27e08fbbad7bdba', + 'logseries': '6bd0183d2f8030c61b0d6e11aaa60caf', + 'geometric': '6e9df886f3e1e15a643168568d5280c0', + 'hypergeometric': '7964aa611b046aecd33063b90f4dec06', + 'multinomial': '68a0b049c16411ed0aa4aff3572431e4', + 'negative_binomial': 'dc265219eec62b4338d39f849cd36d09', + 'poisson': '7b4dce8e43552fc82701c2fa8e94dc6e', + 'zipf': 'fcd2a2095f34578723ac45e43aca48c5', + } +else: + INT_FUNC_HASHES = {'binomial': 'b5f8dcd74f172836536deb3547257b14', + 'geometric': '8814571f45c87c59699d62ccd3d6c350', + 'hypergeometric': 'bc64ae5976eac452115a16dad2dcf642', + 'logseries': '84be924b37485a27c4a98797bc88a7a4', + 'multinomial': 'ec3c7f9cf9664044bb0c6fb106934200', + 'negative_binomial': '210533b2234943591364d0117a552969', + 'poisson': '0536a8850c79da0c78defd742dccc3e0', + 'zipf': 'f2841f504dd2525cd67cdcad7561e532', + } + + +@pytest.fixture(scope='module', params=INT_FUNCS) +def int_func(request): + return (request.param, INT_FUNCS[request.param], + INT_FUNC_HASHES[request.param]) + def assert_mt19937_state_equal(a, b): assert_equal(a['bit_generator'], b['bit_generator']) @@ -269,7 +309,6 @@ class TestRandint(object): assert_(vals.min() >= 0) def test_repeatability(self): - import hashlib # We use a md5 hash of generated sequences of 1000 samples # in the range [0, 6) for all but bool, where the range # is [0, 2). Hashes are for little endian numbers. @@ -1862,3 +1901,23 @@ class TestSingleEltArrayInput(object): out = func(self.argOne, self.argTwo[0], self.argThree) assert_equal(out.shape, self.tgtShape) + + +# Ensure returned array dtype is corect for platform +def test_integer_dtype(int_func): + random.seed(123456789) + fname, args, md5 = int_func + f = getattr(random, fname) + actual = f(*args, size=2) + assert_(actual.dtype == np.dtype('l')) + + +def test_integer_repeat(int_func): + random.seed(123456789) + fname, args, md5 = int_func + f = getattr(random, fname) + val = f(*args, size=1000000) + if sys.byteorder != 'little': + val = val.byteswap() + res = hashlib.md5(val.view(np.int8)).hexdigest() + assert_(res == md5) |