Rewrite Floyd algorithm

- use a simple hash table with linear probing - adjust heuristic
author: Guillaume Horel <guillaume.horel@gmail.com> 2019-06-20 16:32:42 -0400
committer: Guillaume Horel <guillaume.horel@gmail.com> 2019-07-11 10:00:03 -0400
commit: e73122297f708cd59bdcefa1bce950f45a7c75fd (patch)
tree: 1829c1f647ccb26adc4787cc008187d061ac1010 /numpy/random
parent: bf487177ccf654c7c0ddf8dcacf7f2af20b14a11 (diff)
download: numpy-e73122297f708cd59bdcefa1bce950f45a7c75fd.tar.gz
1 files changed, 27 insertions, 17 deletions
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
index 6adf0f00b..19ae3225e 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/generator.pyx
@@ -11,6 +11,9 @@ from .pcg64 import PCG64
 from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer
 from cpython cimport (Py_INCREF, PyFloat_AsDouble)
 from libc cimport string
+from libc.stdlib cimport malloc, free
+from libc.stdio cimport printf
+from libc.string cimport memset
 
 cimport cython
 cimport numpy as np
@@ -601,6 +604,8 @@ cdef class Generator:
         cdef int64_t val, t, loc, size_i, pop_size_i
         cdef int64_t *idx_data
         cdef np.npy_intp j
+        cdef uint64_t set_size, mask
+        cdef uint64_t *hash_set
         # Format and Verify input
         a = np.array(a, copy=False)
         if a.ndim == 0:
@@ -687,7 +692,7 @@ cdef class Generator:
                 size_i = size
                 pop_size_i = pop_size
                 # This is a heuristic tuning. should be improvable
-                if pop_size_i > 200 and (size > 200 or size > (10 * pop_size // size)):
+                if pop_size_i > 10000 and (size_i > (pop_size_i // 10)):
                     # Tail shuffle size elements
                     idx = np.arange(pop_size, dtype=np.int64)
                     idx_ptr = np.PyArray_BYTES(<np.ndarray>idx)
@@ -697,26 +702,31 @@ cdef class Generator:
                     # Copy to allow potentially large array backing idx to be gc
                     idx = idx[(pop_size - size):].copy()
                 else:
-                    # Floyds's algorithm with precomputed indices
-                    # Worst case, O(n**2) when size is close to pop_size
+                    # Floyd's algorithm
                     idx = np.empty(size, dtype=np.int64)
                     idx_data = <int64_t*>np.PyArray_DATA(<np.ndarray>idx)
-                    idx_set = set()
-                    loc = 0
-                    # Sample indices with one pass to avoid reacquiring the lock
+                    # smallest power of 2 larger than 1.2 * size
+                    set_size = <uint64_t>(1.2 * size_i)
+                    mask = _gen_mask(set_size)
+                    set_size = 1 + mask
+                    hash_set = <uint64_t*>malloc(sizeof(uint64_t) * set_size)
+                    memset(hash_set, 0xffff, sizeof(uint64_t) * set_size)
                     with self.lock:
                         for j in range(pop_size_i - size_i, pop_size_i):
-                            idx_data[loc] = random_interval(&self._bitgen, j)
-                            loc += 1
-                    loc = 0
-                    while len(idx_set) < size_i:
-                        for j in range(pop_size_i - size_i, pop_size_i):
-                            if idx_data[loc] not in idx_set:
-                                val = idx_data[loc]
-                            else:
-                                idx_data[loc] = val = j
-                            idx_set.add(val)
-                            loc += 1
+                            val = random_interval(&self._bitgen, j)
+                            loc = val & mask
+                            while hash_set[loc] != -1 and hash_set[loc] != val:
+                                loc = (loc + 1) & mask
+                            if hash_set[loc] == -1: # then val not in hash_set
+                                hash_set[loc] = val
+                                idx_data[j - pop_size_i + size_i] = val
+                            else: # we need to insert j instead
+                                loc = j & mask
+                                while hash_set[loc] != -1:
+                                    loc = (loc + 1) & mask
+                                hash_set[loc] = j
+                                idx_data[j - pop_size_i + size_i] = j
+                    free(hash_set)
                 if shape is not None:
                     idx.shape = shape
author	Guillaume Horel <guillaume.horel@gmail.com>	2019-06-20 16:32:42 -0400
committer	Guillaume Horel <guillaume.horel@gmail.com>	2019-07-11 10:00:03 -0400
commit	e73122297f708cd59bdcefa1bce950f45a7c75fd (patch)
tree	1829c1f647ccb26adc4787cc008187d061ac1010 /numpy/random
parent	bf487177ccf654c7c0ddf8dcacf7f2af20b14a11 (diff)
download	numpy-e73122297f708cd59bdcefa1bce950f45a7c75fd.tar.gz