summaryrefslogtreecommitdiff
path: root/numpy/random/_bit_generator.pyx
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/random/_bit_generator.pyx')
-rw-r--r--numpy/random/_bit_generator.pyx626
1 files changed, 626 insertions, 0 deletions
diff --git a/numpy/random/_bit_generator.pyx b/numpy/random/_bit_generator.pyx
new file mode 100644
index 000000000..21d21e6bb
--- /dev/null
+++ b/numpy/random/_bit_generator.pyx
@@ -0,0 +1,626 @@
+"""
+BitGenerator base class and SeedSequence used to seed the BitGenerators.
+
+SeedSequence is derived from Melissa E. O'Neill's C++11 `std::seed_seq`
+implementation, as it has a lot of nice properties that we want.
+
+https://gist.github.com/imneme/540829265469e673d045
+http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html
+
+The MIT License (MIT)
+
+Copyright (c) 2015 Melissa E. O'Neill
+Copyright (c) 2019 NumPy Developers
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import abc
+import sys
+from itertools import cycle
+import re
+
+try:
+ from secrets import randbits
+except ImportError:
+ # secrets unavailable on python 3.5 and before
+ from random import SystemRandom
+ randbits = SystemRandom().getrandbits
+
+try:
+ from threading import Lock
+except ImportError:
+ from dummy_threading import Lock
+
+from cpython.pycapsule cimport PyCapsule_New
+
+import numpy as np
+cimport numpy as np
+
+from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi)
+
+__all__ = ['SeedSequence', 'BitGenerator']
+
+np.import_array()
+
+DECIMAL_RE = re.compile(r'[0-9]+')
+
+cdef uint32_t DEFAULT_POOL_SIZE = 4 # Appears also in docstring for pool_size
+cdef uint32_t INIT_A = 0x43b0d7e5
+cdef uint32_t MULT_A = 0x931e8875
+cdef uint32_t INIT_B = 0x8b51f9dd
+cdef uint32_t MULT_B = 0x58f38ded
+cdef uint32_t MIX_MULT_L = 0xca01f9dd
+cdef uint32_t MIX_MULT_R = 0x4973f715
+cdef uint32_t XSHIFT = np.dtype(np.uint32).itemsize * 8 // 2
+cdef uint32_t MASK32 = 0xFFFFFFFF
+
+def _int_to_uint32_array(n):
+ arr = []
+ if n < 0:
+ raise ValueError("expected non-negative integer")
+ if n == 0:
+ arr.append(np.uint32(n))
+ if isinstance(n, np.unsignedinteger):
+ # Cannot do n & MASK32, convert to python int
+ n = int(n)
+ while n > 0:
+ arr.append(np.uint32(n & MASK32))
+ n //= (2**32)
+ return np.array(arr, dtype=np.uint32)
+
+def _coerce_to_uint32_array(x):
+ """ Coerce an input to a uint32 array.
+
+ If a `uint32` array, pass it through directly.
+ If a non-negative integer, then break it up into `uint32` words, lowest
+ bits first.
+ If a string starting with "0x", then interpret as a hex integer, as above.
+ If a string of decimal digits, interpret as a decimal integer, as above.
+ If a sequence of ints or strings, interpret each element as above and
+ concatenate.
+
+ Note that the handling of `int64` or `uint64` arrays are not just
+ straightforward views as `uint32` arrays. If an element is small enough to
+ fit into a `uint32`, then it will only take up one `uint32` element in the
+ output. This is to make sure that the interpretation of a sequence of
+ integers is the same regardless of numpy's default integer type, which
+ differs on different platforms.
+
+ Parameters
+ ----------
+ x : int, str, sequence of int or str
+
+ Returns
+ -------
+ seed_array : uint32 array
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> from numpy.random._bit_generator import _coerce_to_uint32_array
+ >>> _coerce_to_uint32_array(12345)
+ array([12345], dtype=uint32)
+ >>> _coerce_to_uint32_array('12345')
+ array([12345], dtype=uint32)
+ >>> _coerce_to_uint32_array('0x12345')
+ array([74565], dtype=uint32)
+ >>> _coerce_to_uint32_array([12345, '67890'])
+ array([12345, 67890], dtype=uint32)
+ >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.uint32))
+ array([12345, 67890], dtype=uint32)
+ >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.int64))
+ array([12345, 67890], dtype=uint32)
+ >>> _coerce_to_uint32_array([12345, 0x10deadbeef, 67890, 0xdeadbeef])
+ array([ 12345, 3735928559, 16, 67890, 3735928559],
+ dtype=uint32)
+ >>> _coerce_to_uint32_array(1234567890123456789012345678901234567890)
+ array([3460238034, 2898026390, 3235640248, 2697535605, 3],
+ dtype=uint32)
+ """
+ if isinstance(x, np.ndarray) and x.dtype == np.dtype(np.uint32):
+ return x.copy()
+ elif isinstance(x, str):
+ if x.startswith('0x'):
+ x = int(x, base=16)
+ elif DECIMAL_RE.match(x):
+ x = int(x)
+ else:
+ raise ValueError("unrecognized seed string")
+ if isinstance(x, (int, np.integer)):
+ return _int_to_uint32_array(x)
+ elif isinstance(x, (float, np.inexact)):
+ raise TypeError('seed must be integer')
+ else:
+ if len(x) == 0:
+ return np.array([], dtype=np.uint32)
+ # Should be a sequence of interpretable-as-ints. Convert each one to
+ # a uint32 array and concatenate.
+ subseqs = [_coerce_to_uint32_array(v) for v in x]
+ return np.concatenate(subseqs)
+
+
+cdef uint32_t hashmix(uint32_t value, uint32_t * hash_const):
+ # We are modifying the multiplier as we go along, so it is input-output
+ value ^= hash_const[0]
+ hash_const[0] *= MULT_A
+ value *= hash_const[0]
+ value ^= value >> XSHIFT
+ return value
+
+cdef uint32_t mix(uint32_t x, uint32_t y):
+ cdef uint32_t result = (MIX_MULT_L * x - MIX_MULT_R * y)
+ result ^= result >> XSHIFT
+ return result
+
+
+class ISeedSequence(abc.ABC):
+ """
+ Abstract base class for seed sequences.
+
+ ``BitGenerator`` implementations should treat any object that adheres to
+ this interface as a seed sequence.
+
+ See Also
+ --------
+ SeedSequence, SeedlessSeedSequence
+ """
+
+ @abc.abstractmethod
+ def generate_state(self, n_words, dtype=np.uint32):
+ """
+ generate_state(n_words, dtype=np.uint32)
+
+ Return the requested number of words for PRNG seeding.
+
+ A BitGenerator should call this method in its constructor with
+ an appropriate `n_words` parameter to properly seed itself.
+
+ Parameters
+ ----------
+ n_words : int
+ dtype : np.uint32 or np.uint64, optional
+ The size of each word. This should only be either `uint32` or
+ `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that
+ requesting `uint64` will draw twice as many bits as `uint32` for
+ the same `n_words`. This is a convenience for `BitGenerator`s that
+ express their states as `uint64` arrays.
+
+ Returns
+ -------
+ state : uint32 or uint64 array, shape=(n_words,)
+ """
+
+
+class ISpawnableSeedSequence(ISeedSequence):
+ """
+ Abstract base class for seed sequences that can spawn.
+ """
+
+ @abc.abstractmethod
+ def spawn(self, n_children):
+ """
+ spawn(n_children)
+
+ Spawn a number of child `SeedSequence` s by extending the
+ `spawn_key`.
+
+ Parameters
+ ----------
+ n_children : int
+
+ Returns
+ -------
+ seqs : list of `SeedSequence` s
+ """
+
+
+cdef class SeedlessSeedSequence():
+ """
+ A seed sequence for BitGenerators with no need for seed state.
+
+ See Also
+ --------
+ SeedSequence, ISeedSequence
+ """
+
+ def generate_state(self, n_words, dtype=np.uint32):
+ raise NotImplementedError('seedless SeedSequences cannot generate state')
+
+ def spawn(self, n_children):
+ return [self] * n_children
+
+
+# We cannot directly subclass a `cdef class` type from an `ABC` in Cython, so
+# we must register it after the fact.
+ISpawnableSeedSequence.register(SeedlessSeedSequence)
+
+
+cdef class SeedSequence():
+ """
+ SeedSequence(entropy=None, *, spawn_key=(), pool_size=4)
+
+ SeedSequence mixes sources of entropy in a reproducible way to set the
+ initial state for independent and very probably non-overlapping
+ BitGenerators.
+
+ Once the SeedSequence is instantiated, you can call the `generate_state`
+ method to get an appropriately sized seed. Calling `spawn(n) <spawn>` will
+ create ``n`` SeedSequences that can be used to seed independent
+ BitGenerators, i.e. for different threads.
+
+ Parameters
+ ----------
+ entropy : {None, int, sequence[int]}, optional
+ The entropy for creating a `SeedSequence`.
+ spawn_key : {(), sequence[int]}, optional
+ A third source of entropy, used internally when calling
+ `SeedSequence.spawn`
+ pool_size : {int}, optional
+ Size of the pooled entropy to store. Default is 4 to give a 128-bit
+ entropy pool. 8 (for 256 bits) is another reasonable choice if working
+ with larger PRNGs, but there is very little to be gained by selecting
+ another value.
+ n_children_spawned : {int}, optional
+ The number of children already spawned. Only pass this if
+ reconstructing a `SeedSequence` from a serialized form.
+
+ Notes
+ -----
+
+ Best practice for achieving reproducible bit streams is to use
+ the default ``None`` for the initial entropy, and then use
+ `SeedSequence.entropy` to log/pickle the `entropy` for reproducibility:
+
+ >>> sq1 = np.random.SeedSequence()
+ >>> sq1.entropy
+ 243799254704924441050048792905230269161 # random
+ >>> sq2 = np.random.SeedSequence(sq1.entropy)
+ >>> np.all(sq1.generate_state(10) == sq2.generate_state(10))
+ True
+ """
+
+ def __init__(self, entropy=None, *, spawn_key=(),
+ pool_size=DEFAULT_POOL_SIZE, n_children_spawned=0):
+ if pool_size < DEFAULT_POOL_SIZE:
+ raise ValueError("The size of the entropy pool should be at least "
+ f"{DEFAULT_POOL_SIZE}")
+ if entropy is None:
+ entropy = randbits(pool_size * 32)
+ elif not isinstance(entropy, (int, np.integer, list, tuple, range,
+ np.ndarray)):
+ raise TypeError('SeedSequence expects int or sequence of ints for '
+ 'entropy not {}'.format(entropy))
+ self.entropy = entropy
+ self.spawn_key = tuple(spawn_key)
+ self.pool_size = pool_size
+ self.n_children_spawned = n_children_spawned
+
+ self.pool = np.zeros(pool_size, dtype=np.uint32)
+ self.mix_entropy(self.pool, self.get_assembled_entropy())
+
+ def __repr__(self):
+ lines = [
+ f'{type(self).__name__}(',
+ f' entropy={self.entropy!r},',
+ ]
+ # Omit some entries if they are left as the defaults in order to
+ # simplify things.
+ if self.spawn_key:
+ lines.append(f' spawn_key={self.spawn_key!r},')
+ if self.pool_size != DEFAULT_POOL_SIZE:
+ lines.append(f' pool_size={self.pool_size!r},')
+ if self.n_children_spawned != 0:
+ lines.append(f' n_children_spawned={self.n_children_spawned!r},')
+ lines.append(')')
+ text = '\n'.join(lines)
+ return text
+
+ @property
+ def state(self):
+ return {k:getattr(self, k) for k in
+ ['entropy', 'spawn_key', 'pool_size',
+ 'n_children_spawned']
+ if getattr(self, k) is not None}
+
+ cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer,
+ np.ndarray[np.npy_uint32, ndim=1] entropy_array):
+ """ Mix in the given entropy to mixer.
+
+ Parameters
+ ----------
+ mixer : 1D uint32 array, modified in-place
+ entropy_array : 1D uint32 array
+ """
+ cdef uint32_t hash_const[1]
+ hash_const[0] = INIT_A
+
+ # Add in the entropy up to the pool size.
+ for i in range(len(mixer)):
+ if i < len(entropy_array):
+ mixer[i] = hashmix(entropy_array[i], hash_const)
+ else:
+ # Our pool size is bigger than our entropy, so just keep
+ # running the hash out.
+ mixer[i] = hashmix(0, hash_const)
+
+ # Mix all bits together so late bits can affect earlier bits.
+ for i_src in range(len(mixer)):
+ for i_dst in range(len(mixer)):
+ if i_src != i_dst:
+ mixer[i_dst] = mix(mixer[i_dst],
+ hashmix(mixer[i_src], hash_const))
+
+ # Add any remaining entropy, mixing each new entropy word with each
+ # pool word.
+ for i_src in range(len(mixer), len(entropy_array)):
+ for i_dst in range(len(mixer)):
+ mixer[i_dst] = mix(mixer[i_dst],
+ hashmix(entropy_array[i_src], hash_const))
+
+ cdef get_assembled_entropy(self):
+ """ Convert and assemble all entropy sources into a uniform uint32
+ array.
+
+ Returns
+ -------
+ entropy_array : 1D uint32 array
+ """
+ # Convert run-entropy, program-entropy, and the spawn key into uint32
+ # arrays and concatenate them.
+
+ # We MUST have at least some run-entropy. The others are optional.
+ assert self.entropy is not None
+ run_entropy = _coerce_to_uint32_array(self.entropy)
+ spawn_entropy = _coerce_to_uint32_array(self.spawn_key)
+ entropy_array = np.concatenate([run_entropy, spawn_entropy])
+ return entropy_array
+
+ @np.errstate(over='ignore')
+ def generate_state(self, n_words, dtype=np.uint32):
+ """
+ generate_state(n_words, dtype=np.uint32)
+
+ Return the requested number of words for PRNG seeding.
+
+ A BitGenerator should call this method in its constructor with
+ an appropriate `n_words` parameter to properly seed itself.
+
+ Parameters
+ ----------
+ n_words : int
+ dtype : np.uint32 or np.uint64, optional
+ The size of each word. This should only be either `uint32` or
+ `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that
+ requesting `uint64` will draw twice as many bits as `uint32` for
+ the same `n_words`. This is a convenience for `BitGenerator`s that
+ express their states as `uint64` arrays.
+
+ Returns
+ -------
+ state : uint32 or uint64 array, shape=(n_words,)
+ """
+ cdef uint32_t hash_const = INIT_B
+ cdef uint32_t data_val
+
+ out_dtype = np.dtype(dtype)
+ if out_dtype == np.dtype(np.uint32):
+ pass
+ elif out_dtype == np.dtype(np.uint64):
+ n_words *= 2
+ else:
+ raise ValueError("only support uint32 or uint64")
+ state = np.zeros(n_words, dtype=np.uint32)
+ src_cycle = cycle(self.pool)
+ for i_dst in range(n_words):
+ data_val = next(src_cycle)
+ data_val ^= hash_const
+ hash_const *= MULT_B
+ data_val *= hash_const
+ data_val ^= data_val >> XSHIFT
+ state[i_dst] = data_val
+ if out_dtype == np.dtype(np.uint64):
+ # For consistency across different endiannesses, view first as
+ # little-endian then convert the values to the native endianness.
+ state = state.astype('<u4').view('<u8').astype(np.uint64)
+ return state
+
+ def spawn(self, n_children):
+ """
+ spawn(n_children)
+
+ Spawn a number of child `SeedSequence` s by extending the
+ `spawn_key`.
+
+ Parameters
+ ----------
+ n_children : int
+
+ Returns
+ -------
+ seqs : list of `SeedSequence` s
+ """
+ cdef uint32_t i
+
+ seqs = []
+ for i in range(self.n_children_spawned,
+ self.n_children_spawned + n_children):
+ seqs.append(type(self)(
+ self.entropy,
+ spawn_key=self.spawn_key + (i,),
+ pool_size=self.pool_size,
+ ))
+ self.n_children_spawned += n_children
+ return seqs
+
+
+ISpawnableSeedSequence.register(SeedSequence)
+
+
+cdef class BitGenerator():
+ """
+ BitGenerator(seed=None)
+
+ Base Class for generic BitGenerators, which provide a stream
+ of random bits based on different algorithms. Must be overridden.
+
+ Parameters
+ ----------
+ seed : {None, int, array_like[ints], SeedSequence}, optional
+ A seed to initialize the `BitGenerator`. If None, then fresh,
+ unpredictable entropy will be pulled from the OS. If an ``int`` or
+ ``array_like[ints]`` is passed, then it will be passed to
+ ~`numpy.random.SeedSequence` to derive the initial `BitGenerator` state.
+ One may also pass in a `SeedSequence` instance.
+
+ Attributes
+ ----------
+ lock : threading.Lock
+ Lock instance that is shared so that the same BitGenerator can
+ be used in multiple Generators without corrupting the state. Code that
+ generates values from a bit generator should hold the bit generator's
+ lock.
+
+ See Also
+ -------
+ SeedSequence
+ """
+
+ def __init__(self, seed=None):
+ self.lock = Lock()
+ self._bitgen.state = <void *>0
+ if type(self) is BitGenerator:
+ raise NotImplementedError('BitGenerator is a base class and cannot be instantized')
+
+ self._ctypes = None
+ self._cffi = None
+
+ cdef const char *name = "BitGenerator"
+ self.capsule = PyCapsule_New(<void *>&self._bitgen, name, NULL)
+ if not isinstance(seed, ISeedSequence):
+ seed = SeedSequence(seed)
+ self._seed_seq = seed
+
+ # Pickling support:
+ def __getstate__(self):
+ return self.state
+
+ def __setstate__(self, state):
+ self.state = state
+
+ def __reduce__(self):
+ from ._pickle import __bit_generator_ctor
+ return __bit_generator_ctor, (self.state['bit_generator'],), self.state
+
+ @property
+ def state(self):
+ """
+ Get or set the PRNG state
+
+ The base BitGenerator.state must be overridden by a subclass
+
+ Returns
+ -------
+ state : dict
+ Dictionary containing the information required to describe the
+ state of the PRNG
+ """
+ raise NotImplementedError('Not implemented in base BitGenerator')
+
+ @state.setter
+ def state(self, value):
+ raise NotImplementedError('Not implemented in base BitGenerator')
+
+ def random_raw(self, size=None, output=True):
+ """
+ random_raw(self, size=None)
+
+ Return randoms as generated by the underlying BitGenerator
+
+ Parameters
+ ----------
+ size : int or tuple of ints, optional
+ Output shape. If the given shape is, e.g., ``(m, n, k)``, then
+ ``m * n * k`` samples are drawn. Default is None, in which case a
+ single value is returned.
+ output : bool, optional
+ Output values. Used for performance testing since the generated
+ values are not returned.
+
+ Returns
+ -------
+ out : uint or ndarray
+ Drawn samples.
+
+ Notes
+ -----
+ This method directly exposes the the raw underlying pseudo-random
+ number generator. All values are returned as unsigned 64-bit
+ values irrespective of the number of bits produced by the PRNG.
+
+ See the class docstring for the number of bits returned.
+ """
+ return random_raw(&self._bitgen, self.lock, size, output)
+
+ def _benchmark(self, Py_ssize_t cnt, method=u'uint64'):
+ '''Used in tests'''
+ return benchmark(&self._bitgen, self.lock, cnt, method)
+
+ @property
+ def ctypes(self):
+ """
+ ctypes interface
+
+ Returns
+ -------
+ interface : namedtuple
+ Named tuple containing ctypes wrapper
+
+ * state_address - Memory address of the state struct
+ * state - pointer to the state struct
+ * next_uint64 - function pointer to produce 64 bit integers
+ * next_uint32 - function pointer to produce 32 bit integers
+ * next_double - function pointer to produce doubles
+ * bitgen - pointer to the bit generator struct
+ """
+ if self._ctypes is None:
+ self._ctypes = prepare_ctypes(&self._bitgen)
+
+ return self._ctypes
+
+ @property
+ def cffi(self):
+ """
+ CFFI interface
+
+ Returns
+ -------
+ interface : namedtuple
+ Named tuple containing CFFI wrapper
+
+ * state_address - Memory address of the state struct
+ * state - pointer to the state struct
+ * next_uint64 - function pointer to produce 64 bit integers
+ * next_uint32 - function pointer to produce 32 bit integers
+ * next_double - function pointer to produce doubles
+ * bitgen - pointer to the bit generator struct
+ """
+ if self._cffi is None:
+ self._cffi = prepare_cffi(&self._bitgen)
+ return self._cffi