BUG: random: biased samples from integers() with 8 or 16 bit dtype.

When an 8 or 16 bit dtype was given to the integers() method of the Generator class, the resulting sample was biased. The problem was the lines of the form const uint8_t threshold = -rng_excl % rng_excl; in the implementations of Lemire's method, in the C file distributions.c. The intent was to compute (UINT8_MAX+1 - rng_excl) % rng_excl However, when the type of rng_excl has integer conversion rank lower than a C int (which is almost certainly the case for the 8 and 16 bit types), the terms in the expression -rng_excl % rng_excl are promoted to int, and the result of the calculation is always 0. The fix is to make the expression explicit, and write it as const uint8_t threshold = (UINT8_MAX - rng) % rng_excl; rng is used, because rng_excl is simply rng + 1; by using rng, we we only need the constant UINT#_MAX, without the extra +1. For consistency, I made the same change for all the data types (8, 16, 32 and 64 bit). Closes gh-14774.
author: Warren Weckesser <warren.weckesser@gmail.com> 2019-10-24 22:03:05 -0400
committer: Warren Weckesser <warren.weckesser@gmail.com> 2019-10-24 22:06:31 -0400
commit: 4780c1bc78065154e2817e899a37acdde7225ec5 (patch)
tree: c39d5ff2ad634fe3244dfff358460ba2cbdb1569 /numpy/random/src
parent: 34e83881a9e377d8cc8fe2644bfb06600d9e77d1 (diff)
download: numpy-4780c1bc78065154e2817e899a37acdde7225ec5.tar.gz
1 files changed, 5 insertions, 14 deletions
diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c
index b382ead0b..ab8de8bcb 100644
--- a/numpy/random/src/distributions/distributions.c
+++ b/numpy/random/src/distributions/distributions.c
@@ -1170,10 +1170,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((__uint128_t)next_uint64(bitgen_state)) * rng_excl;
@@ -1196,10 +1193,7 @@ static NPY_INLINE uint64_t bounded_lemire_uint64(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-
-    const uint64_t threshold = -rng_excl % rng_excl;
-    /* Same as:threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) %
-     * rng_excl; */
+    const uint64_t threshold = (UINT64_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       x = next_uint64(bitgen_state);
@@ -1260,8 +1254,7 @@ static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint32_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint64_t)(0x100000000ULL - rng_excl)) % rng_excl; */
+    const uint32_t threshold = (UINT32_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint64_t)next_uint32(bitgen_state)) * rng_excl;
@@ -1295,8 +1288,7 @@ static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint16_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint32_t)(0x10000ULL - rng_excl)) % rng_excl; */
+    const uint16_t threshold = (UINT16_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint32_t)buffered_uint16(bitgen_state, bcnt, buf)) * rng_excl;
@@ -1331,8 +1323,7 @@ static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(bitgen_t *bitgen_state,
 
   if (leftover < rng_excl) {
     /* `rng_excl` is a simple upper bound for `threshold`. */
-    const uint8_t threshold = -rng_excl % rng_excl;
-    /* Same as: threshold=((uint16_t)(0x100ULL - rng_excl)) % rng_excl; */
+    const uint8_t threshold = (UINT8_MAX - rng) % rng_excl;
 
     while (leftover < threshold) {
       m = ((uint16_t)buffered_uint8(bitgen_state, bcnt, buf)) * rng_excl;
author	Warren Weckesser <warren.weckesser@gmail.com>	2019-10-24 22:03:05 -0400
committer	Warren Weckesser <warren.weckesser@gmail.com>	2019-10-24 22:06:31 -0400
commit	4780c1bc78065154e2817e899a37acdde7225ec5 (patch)
tree	c39d5ff2ad634fe3244dfff358460ba2cbdb1569 /numpy/random/src
parent	34e83881a9e377d8cc8fe2644bfb06600d9e77d1 (diff)
download	numpy-4780c1bc78065154e2817e899a37acdde7225ec5.tar.gz