diff options
Diffstat (limited to 'numpy/random/_generator.pyx')
-rw-r--r-- | numpy/random/_generator.pyx | 315 |
1 files changed, 250 insertions, 65 deletions
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index cc2852da7..7ffa36775 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -5,6 +5,7 @@ import warnings from cpython.pycapsule cimport PyCapsule_IsValid, PyCapsule_GetPointer from cpython cimport (Py_INCREF, PyFloat_AsDouble) +from cpython.mem cimport PyMem_Malloc, PyMem_Free cimport cython import numpy as np @@ -25,8 +26,16 @@ from ._common cimport (POISSON_LAM_MAX, CONS_POSITIVE, CONS_NONE, CONS_GT_1, CONS_POSITIVE_NOT_NAN, CONS_POISSON, double_fill, cont, kahan_sum, cont_broadcast_3, float_fill, cont_f, check_array_constraint, check_constraint, disc, discrete_broadcast_iii, + validate_output_shape ) +cdef extern from "numpy/arrayobject.h": + int PyArray_ResolveWritebackIfCopy(np.ndarray) + object PyArray_FromArray(np.PyArrayObject *, np.PyArray_Descr *, int) + + enum: + NPY_ARRAY_WRITEBACKIFCOPY + np.import_array() cdef int64_t _safe_sum_nonneg_int64(size_t num_colors, int64_t *colors): @@ -47,6 +56,77 @@ cdef int64_t _safe_sum_nonneg_int64(size_t num_colors, int64_t *colors): return sum +cdef inline void _shuffle_raw_wrap(bitgen_t *bitgen, np.npy_intp n, + np.npy_intp first, np.npy_intp itemsize, + np.npy_intp stride, + char* data, char* buf) nogil: + # We trick gcc into providing a specialized implementation for + # the most common case, yielding a ~33% performance improvement. + # Note that apparently, only one branch can ever be specialized. + if itemsize == sizeof(np.npy_intp): + _shuffle_raw(bitgen, n, first, sizeof(np.npy_intp), stride, data, buf) + else: + _shuffle_raw(bitgen, n, first, itemsize, stride, data, buf) + + +cdef inline void _shuffle_raw(bitgen_t *bitgen, np.npy_intp n, + np.npy_intp first, np.npy_intp itemsize, + np.npy_intp stride, + char* data, char* buf) nogil: + """ + Parameters + ---------- + bitgen + Pointer to a bitgen_t instance. + n + Number of elements in data + first + First observation to shuffle. Shuffles n-1, + n-2, ..., first, so that when first=1 the entire + array is shuffled + itemsize + Size in bytes of item + stride + Array stride + data + Location of data + buf + Location of buffer (itemsize) + """ + cdef np.npy_intp i, j + + for i in reversed(range(first, n)): + j = random_interval(bitgen, i) + string.memcpy(buf, data + j * stride, itemsize) + string.memcpy(data + j * stride, data + i * stride, itemsize) + string.memcpy(data + i * stride, buf, itemsize) + + +cdef inline void _shuffle_int(bitgen_t *bitgen, np.npy_intp n, + np.npy_intp first, int64_t* data) nogil: + """ + Parameters + ---------- + bitgen + Pointer to a bitgen_t instance. + n + Number of elements in data + first + First observation to shuffle. Shuffles n-1, + n-2, ..., first, so that when first=1 the entire + array is shuffled + data + Location of data + """ + cdef np.npy_intp i, j + cdef int64_t temp + for i in reversed(range(first, n)): + j = random_bounded_uint64(bitgen, 0, i, 0, 0) + temp = data[j] + data[j] = data[i] + data[i] = temp + + cdef bint _check_bit_generator(object bitgen): """Check if an object satisfies the BitGenerator interface. """ @@ -707,8 +787,8 @@ cdef class Generator: idx = np.PyArray_Arange(0, pop_size_i, 1, np.NPY_INT64) idx_data = <int64_t*>(<np.ndarray>idx).data with self.lock, nogil: - self._shuffle_int(pop_size_i, max(pop_size_i - size_i, 1), - idx_data) + _shuffle_int(&self._bitgen, pop_size_i, + max(pop_size_i - size_i, 1), idx_data) # Copy to allow potentially large array backing idx to be gc idx = idx[(pop_size - size):].copy() else: @@ -736,7 +816,7 @@ cdef class Generator: hash_set[loc] = j idx_data[j - pop_size_i + size_i] = j if shuffle: - self._shuffle_int(size_i, 1, idx_data) + _shuffle_int(&self._bitgen, size_i, 1, idx_data) idx.shape = shape if is_scalar and isinstance(idx, np.ndarray): @@ -2809,6 +2889,7 @@ cdef class Generator: cnt = np.PyArray_SIZE(randoms) it = np.PyArray_MultiIterNew3(randoms, p_arr, n_arr) + validate_output_shape(it.shape, randoms) with self.lock, nogil: for i in range(cnt): _dp = (<double*>np.PyArray_MultiIter_DATA(it, 1))[0] @@ -3606,7 +3687,7 @@ cdef class Generator: Now, do one experiment throwing the dice 10 time, and 10 times again, and another throwing the dice 20 times, and 20 times again: - >>> rng.multinomial([[10], [20]], [1/6.]*6, size=2) + >>> rng.multinomial([[10], [20]], [1/6.]*6, size=(2, 2)) array([[[2, 4, 0, 1, 2, 1], [1, 3, 0, 3, 1, 2]], [[1, 4, 4, 4, 4, 3], @@ -3661,6 +3742,7 @@ cdef class Generator: temp = np.empty(size, dtype=np.int8) temp_arr = <np.ndarray>temp it = np.PyArray_MultiIterNew2(on, temp_arr) + validate_output_shape(it.shape, temp_arr) shape = it.shape + (d,) multin = np.zeros(shape, dtype=np.int64) mnarr = <np.ndarray>multin @@ -3941,7 +4023,7 @@ cdef class Generator: The drawn samples, of shape ``(size, k)``. Raises - ------- + ------ ValueError If any value in ``alpha`` is less than or equal to zero @@ -4111,7 +4193,159 @@ cdef class Generator: return diric - # Shuffling and permutations: + def permuted(self, object x, *, axis=None, out=None): + """ + permuted(x, axis=None, out=None) + + Randomly permute `x` along axis `axis`. + + Unlike `shuffle`, each slice along the given axis is shuffled + independently of the others. + + Parameters + ---------- + x : array_like, at least one-dimensional + Array to be shuffled. + axis : int, optional + Slices of `x` in this axis are shuffled. Each slice + is shuffled independently of the others. If `axis` is + None, the flattened array is shuffled. + out : ndarray, optional + If given, this is the destinaton of the shuffled array. + If `out` is None, a shuffled copy of the array is returned. + + Returns + ------- + ndarray + If `out` is None, a shuffled copy of `x` is returned. + Otherwise, the shuffled array is stored in `out`, + and `out` is returned + + See Also + -------- + shuffle + permutation + + Examples + -------- + Create a `numpy.random.Generator` instance: + + >>> rng = np.random.default_rng() + + Create a test array: + + >>> x = np.arange(24).reshape(3, 8) + >>> x + array([[ 0, 1, 2, 3, 4, 5, 6, 7], + [ 8, 9, 10, 11, 12, 13, 14, 15], + [16, 17, 18, 19, 20, 21, 22, 23]]) + + Shuffle the rows of `x`: + + >>> y = rng.permuted(x, axis=1) + >>> y + array([[ 4, 3, 6, 7, 1, 2, 5, 0], # random + [15, 10, 14, 9, 12, 11, 8, 13], + [17, 16, 20, 21, 18, 22, 23, 19]]) + + `x` has not been modified: + + >>> x + array([[ 0, 1, 2, 3, 4, 5, 6, 7], + [ 8, 9, 10, 11, 12, 13, 14, 15], + [16, 17, 18, 19, 20, 21, 22, 23]]) + + To shuffle the rows of `x` in-place, pass `x` as the `out` + parameter: + + >>> y = rng.permuted(x, axis=1, out=x) + >>> x + array([[ 3, 0, 4, 7, 1, 6, 2, 5], # random + [ 8, 14, 13, 9, 12, 11, 15, 10], + [17, 18, 16, 22, 19, 23, 20, 21]]) + + Note that when the ``out`` parameter is given, the return + value is ``out``: + + >>> y is x + True + """ + + cdef int ax + cdef np.npy_intp axlen, axstride, itemsize + cdef void *buf + cdef np.flatiter it + cdef np.ndarray to_shuffle + cdef int status + cdef int flags + + x = np.asarray(x) + + if out is None: + out = x.copy(order='K') + else: + if type(out) is not np.ndarray: + raise TypeError('out must be a numpy array') + if out.shape != x.shape: + raise ValueError('out must have the same shape as x') + np.copyto(out, x, casting='safe') + + if axis is None: + if x.ndim > 1: + if not (np.PyArray_FLAGS(out) & (np.NPY_ARRAY_C_CONTIGUOUS | + np.NPY_ARRAY_F_CONTIGUOUS)): + flags = (np.NPY_ARRAY_C_CONTIGUOUS | + NPY_ARRAY_WRITEBACKIFCOPY) + to_shuffle = PyArray_FromArray(<np.PyArrayObject *>out, + <np.PyArray_Descr *>NULL, flags) + self.shuffle(to_shuffle.ravel(order='K')) + # Because we only execute this block if out is not + # contiguous, we know this call will always result in a + # copy of to_shuffle back to out. I.e. status will be 1. + status = PyArray_ResolveWritebackIfCopy(to_shuffle) + assert status == 1 + else: + # out is n-d with n > 1, but is either C- or F-contiguous, + # so we know out.ravel(order='A') is a view. + self.shuffle(out.ravel(order='A')) + else: + # out is 1-d + self.shuffle(out) + return out + + ax = normalize_axis_index(axis, np.ndim(out)) + itemsize = out.itemsize + axlen = out.shape[ax] + axstride = out.strides[ax] + + it = np.PyArray_IterAllButAxis(out, &ax) + + buf = PyMem_Malloc(itemsize) + if buf == NULL: + raise MemoryError('memory allocation failed in permuted') + + if out.dtype.hasobject: + # Keep the GIL when shuffling an object array. + with self.lock: + while np.PyArray_ITER_NOTDONE(it): + _shuffle_raw_wrap(&self._bitgen, axlen, 0, itemsize, + axstride, + <char *>np.PyArray_ITER_DATA(it), + <char *>buf) + np.PyArray_ITER_NEXT(it) + else: + # out is not an object array, so we can release the GIL. + with self.lock, nogil: + while np.PyArray_ITER_NOTDONE(it): + _shuffle_raw_wrap(&self._bitgen, axlen, 0, itemsize, + axstride, + <char *>np.PyArray_ITER_DATA(it), + <char *>buf) + np.PyArray_ITER_NEXT(it) + + PyMem_Free(buf) + return out + def shuffle(self, object x, axis=0): """ shuffle(x, axis=0) @@ -4174,14 +4408,15 @@ cdef class Generator: # when the function exits. buf = np.empty(itemsize, dtype=np.int8) # GC'd at function exit buf_ptr = <char*><size_t>np.PyArray_DATA(buf) - with self.lock: - # We trick gcc into providing a specialized implementation for - # the most common case, yielding a ~33% performance improvement. - # Note that apparently, only one branch can ever be specialized. - if itemsize == sizeof(np.npy_intp): - self._shuffle_raw(n, 1, sizeof(np.npy_intp), stride, x_ptr, buf_ptr) - else: - self._shuffle_raw(n, 1, itemsize, stride, x_ptr, buf_ptr) + if x.dtype.hasobject: + with self.lock: + _shuffle_raw_wrap(&self._bitgen, n, 1, itemsize, stride, + x_ptr, buf_ptr) + else: + # Same as above, but the GIL is released. + with self.lock, nogil: + _shuffle_raw_wrap(&self._bitgen, n, 1, itemsize, stride, + x_ptr, buf_ptr) elif isinstance(x, np.ndarray) and x.ndim and x.size: x = np.swapaxes(x, 0, axis) buf = np.empty_like(x[0, ...]) @@ -4204,56 +4439,6 @@ cdef class Generator: j = random_interval(&self._bitgen, i) x[i], x[j] = x[j], x[i] - cdef inline _shuffle_raw(self, np.npy_intp n, np.npy_intp first, - np.npy_intp itemsize, np.npy_intp stride, - char* data, char* buf): - """ - Parameters - ---------- - n - Number of elements in data - first - First observation to shuffle. Shuffles n-1, - n-2, ..., first, so that when first=1 the entire - array is shuffled - itemsize - Size in bytes of item - stride - Array stride - data - Location of data - buf - Location of buffer (itemsize) - """ - cdef np.npy_intp i, j - for i in reversed(range(first, n)): - j = random_interval(&self._bitgen, i) - string.memcpy(buf, data + j * stride, itemsize) - string.memcpy(data + j * stride, data + i * stride, itemsize) - string.memcpy(data + i * stride, buf, itemsize) - - cdef inline void _shuffle_int(self, np.npy_intp n, np.npy_intp first, - int64_t* data) nogil: - """ - Parameters - ---------- - n - Number of elements in data - first - First observation to shuffle. Shuffles n-1, - n-2, ..., first, so that when first=1 the entire - array is shuffled - data - Location of data - """ - cdef np.npy_intp i, j - cdef int64_t temp - for i in reversed(range(first, n)): - j = random_bounded_uint64(&self._bitgen, 0, i, 0, 0) - temp = data[j] - data[j] = data[i] - data[i] = temp - def permutation(self, object x, axis=0): """ permutation(x, axis=0) @@ -4336,7 +4521,7 @@ def default_rng(seed=None): unpredictable entropy will be pulled from the OS. If an ``int`` or ``array_like[ints]`` is passed, then it will be passed to `SeedSequence` to derive the initial `BitGenerator` state. One may also - pass in a`SeedSequence` instance + pass in a `SeedSequence` instance. Additionally, when passed a `BitGenerator`, it will be wrapped by `Generator`. If passed a `Generator`, it will be returned unaltered. |