""" BitGenerator base class and SeedSequence used to seed the BitGenerators. SeedSequence is derived from Melissa E. O'Neill's C++11 `std::seed_seq` implementation, as it has a lot of nice properties that we want. https://gist.github.com/imneme/540829265469e673d045 http://www.pcg-random.org/posts/developing-a-seed_seq-alternative.html The MIT License (MIT) Copyright (c) 2015 Melissa E. O'Neill Copyright (c) 2019 NumPy Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ import abc import sys from itertools import cycle import re from secrets import randbits from threading import Lock from cpython.pycapsule cimport PyCapsule_New import numpy as np cimport numpy as np from ._common cimport (random_raw, benchmark, prepare_ctypes, prepare_cffi) __all__ = ['SeedSequence', 'BitGenerator'] np.import_array() DECIMAL_RE = re.compile(r'[0-9]+') cdef uint32_t DEFAULT_POOL_SIZE = 4 # Appears also in docstring for pool_size cdef uint32_t INIT_A = 0x43b0d7e5 cdef uint32_t MULT_A = 0x931e8875 cdef uint32_t INIT_B = 0x8b51f9dd cdef uint32_t MULT_B = 0x58f38ded cdef uint32_t MIX_MULT_L = 0xca01f9dd cdef uint32_t MIX_MULT_R = 0x4973f715 cdef uint32_t XSHIFT = np.dtype(np.uint32).itemsize * 8 // 2 cdef uint32_t MASK32 = 0xFFFFFFFF def _int_to_uint32_array(n): arr = [] if n < 0: raise ValueError("expected non-negative integer") if n == 0: arr.append(np.uint32(n)) # NumPy ints may not like `n & MASK32` or ``//= 2**32` so use Python int n = int(n) while n > 0: arr.append(np.uint32(n & MASK32)) n //= 2**32 return np.array(arr, dtype=np.uint32) def _coerce_to_uint32_array(x): """ Coerce an input to a uint32 array. If a `uint32` array, pass it through directly. If a non-negative integer, then break it up into `uint32` words, lowest bits first. If a string starting with "0x", then interpret as a hex integer, as above. If a string of decimal digits, interpret as a decimal integer, as above. If a sequence of ints or strings, interpret each element as above and concatenate. Note that the handling of `int64` or `uint64` arrays are not just straightforward views as `uint32` arrays. If an element is small enough to fit into a `uint32`, then it will only take up one `uint32` element in the output. This is to make sure that the interpretation of a sequence of integers is the same regardless of numpy's default integer type, which differs on different platforms. Parameters ---------- x : int, str, sequence of int or str Returns ------- seed_array : uint32 array Examples -------- >>> import numpy as np >>> from numpy.random.bit_generator import _coerce_to_uint32_array >>> _coerce_to_uint32_array(12345) array([12345], dtype=uint32) >>> _coerce_to_uint32_array('12345') array([12345], dtype=uint32) >>> _coerce_to_uint32_array('0x12345') array([74565], dtype=uint32) >>> _coerce_to_uint32_array([12345, '67890']) array([12345, 67890], dtype=uint32) >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.uint32)) array([12345, 67890], dtype=uint32) >>> _coerce_to_uint32_array(np.array([12345, 67890], dtype=np.int64)) array([12345, 67890], dtype=uint32) >>> _coerce_to_uint32_array([12345, 0x10deadbeef, 67890, 0xdeadbeef]) array([ 12345, 3735928559, 16, 67890, 3735928559], dtype=uint32) >>> _coerce_to_uint32_array(1234567890123456789012345678901234567890) array([3460238034, 2898026390, 3235640248, 2697535605, 3], dtype=uint32) """ if isinstance(x, np.ndarray) and x.dtype == np.dtype(np.uint32): return x.copy() elif isinstance(x, str): if x.startswith('0x'): x = int(x, base=16) elif DECIMAL_RE.match(x): x = int(x) else: raise ValueError("unrecognized seed string") if isinstance(x, (int, np.integer)): return _int_to_uint32_array(x) elif isinstance(x, (float, np.inexact)): raise TypeError('seed must be integer') else: if len(x) == 0: return np.array([], dtype=np.uint32) # Should be a sequence of interpretable-as-ints. Convert each one to # a uint32 array and concatenate. subseqs = [_coerce_to_uint32_array(v) for v in x] return np.concatenate(subseqs) cdef uint32_t hashmix(uint32_t value, uint32_t * hash_const): # We are modifying the multiplier as we go along, so it is input-output value ^= hash_const[0] hash_const[0] *= MULT_A value *= hash_const[0] value ^= value >> XSHIFT return value cdef uint32_t mix(uint32_t x, uint32_t y): cdef uint32_t result = (MIX_MULT_L * x - MIX_MULT_R * y) result ^= result >> XSHIFT return result class ISeedSequence(abc.ABC): """ Abstract base class for seed sequences. ``BitGenerator`` implementations should treat any object that adheres to this interface as a seed sequence. See Also -------- SeedSequence, SeedlessSeedSequence """ @abc.abstractmethod def generate_state(self, n_words, dtype=np.uint32): """ generate_state(n_words, dtype=np.uint32) Return the requested number of words for PRNG seeding. A BitGenerator should call this method in its constructor with an appropriate `n_words` parameter to properly seed itself. Parameters ---------- n_words : int dtype : np.uint32 or np.uint64, optional The size of each word. This should only be either `uint32` or `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that requesting `uint64` will draw twice as many bits as `uint32` for the same `n_words`. This is a convenience for `BitGenerator`s that express their states as `uint64` arrays. Returns ------- state : uint32 or uint64 array, shape=(n_words,) """ class ISpawnableSeedSequence(ISeedSequence): """ Abstract base class for seed sequences that can spawn. """ @abc.abstractmethod def spawn(self, n_children): """ spawn(n_children) Spawn a number of child `SeedSequence` s by extending the `spawn_key`. See :ref:`seedsequence-spawn` for additional notes on spawning children. Parameters ---------- n_children : int Returns ------- seqs : list of `SeedSequence` s """ cdef class SeedlessSeedSequence(): """ A seed sequence for BitGenerators with no need for seed state. See Also -------- SeedSequence, ISeedSequence """ def generate_state(self, n_words, dtype=np.uint32): raise NotImplementedError('seedless SeedSequences cannot generate state') def spawn(self, n_children): return [self] * n_children # We cannot directly subclass a `cdef class` type from an `ABC` in Cython, so # we must register it after the fact. ISpawnableSeedSequence.register(SeedlessSeedSequence) cdef class SeedSequence(): """ SeedSequence(entropy=None, *, spawn_key=(), pool_size=4) SeedSequence mixes sources of entropy in a reproducible way to set the initial state for independent and very probably non-overlapping BitGenerators. Once the SeedSequence is instantiated, you can call the `generate_state` method to get an appropriately sized seed. Calling `spawn(n) ` will create ``n`` SeedSequences that can be used to seed independent BitGenerators, i.e. for different threads. Parameters ---------- entropy : {None, int, sequence[int]}, optional The entropy for creating a `SeedSequence`. All integer values must be non-negative. spawn_key : {(), sequence[int]}, optional An additional source of entropy based on the position of this `SeedSequence` in the tree of such objects created with the `SeedSequence.spawn` method. Typically, only `SeedSequence.spawn` will set this, and users will not. pool_size : {int}, optional Size of the pooled entropy to store. Default is 4 to give a 128-bit entropy pool. 8 (for 256 bits) is another reasonable choice if working with larger PRNGs, but there is very little to be gained by selecting another value. n_children_spawned : {int}, optional The number of children already spawned. Only pass this if reconstructing a `SeedSequence` from a serialized form. Notes ----- Best practice for achieving reproducible bit streams is to use the default ``None`` for the initial entropy, and then use `SeedSequence.entropy` to log/pickle the `entropy` for reproducibility: >>> sq1 = np.random.SeedSequence() >>> sq1.entropy 243799254704924441050048792905230269161 # random >>> sq2 = np.random.SeedSequence(sq1.entropy) >>> np.all(sq1.generate_state(10) == sq2.generate_state(10)) True """ def __init__(self, entropy=None, *, spawn_key=(), pool_size=DEFAULT_POOL_SIZE, n_children_spawned=0): if pool_size < DEFAULT_POOL_SIZE: raise ValueError("The size of the entropy pool should be at least " f"{DEFAULT_POOL_SIZE}") if entropy is None: entropy = randbits(pool_size * 32) elif not isinstance(entropy, (int, np.integer, list, tuple, range, np.ndarray)): raise TypeError('SeedSequence expects int or sequence of ints for ' 'entropy not {}'.format(entropy)) self.entropy = entropy self.spawn_key = tuple(spawn_key) self.pool_size = pool_size self.n_children_spawned = n_children_spawned self.pool = np.zeros(pool_size, dtype=np.uint32) self.mix_entropy(self.pool, self.get_assembled_entropy()) def __repr__(self): lines = [ f'{type(self).__name__}(', f' entropy={self.entropy!r},', ] # Omit some entries if they are left as the defaults in order to # simplify things. if self.spawn_key: lines.append(f' spawn_key={self.spawn_key!r},') if self.pool_size != DEFAULT_POOL_SIZE: lines.append(f' pool_size={self.pool_size!r},') if self.n_children_spawned != 0: lines.append(f' n_children_spawned={self.n_children_spawned!r},') lines.append(')') text = '\n'.join(lines) return text @property def state(self): return {k:getattr(self, k) for k in ['entropy', 'spawn_key', 'pool_size', 'n_children_spawned'] if getattr(self, k) is not None} cdef mix_entropy(self, np.ndarray[np.npy_uint32, ndim=1] mixer, np.ndarray[np.npy_uint32, ndim=1] entropy_array): """ Mix in the given entropy to mixer. Parameters ---------- mixer : 1D uint32 array, modified in-place entropy_array : 1D uint32 array """ cdef uint32_t hash_const[1] hash_const[0] = INIT_A # Add in the entropy up to the pool size. for i in range(len(mixer)): if i < len(entropy_array): mixer[i] = hashmix(entropy_array[i], hash_const) else: # Our pool size is bigger than our entropy, so just keep # running the hash out. mixer[i] = hashmix(0, hash_const) # Mix all bits together so late bits can affect earlier bits. for i_src in range(len(mixer)): for i_dst in range(len(mixer)): if i_src != i_dst: mixer[i_dst] = mix(mixer[i_dst], hashmix(mixer[i_src], hash_const)) # Add any remaining entropy, mixing each new entropy word with each # pool word. for i_src in range(len(mixer), len(entropy_array)): for i_dst in range(len(mixer)): mixer[i_dst] = mix(mixer[i_dst], hashmix(entropy_array[i_src], hash_const)) cdef get_assembled_entropy(self): """ Convert and assemble all entropy sources into a uniform uint32 array. Returns ------- entropy_array : 1D uint32 array """ # Convert run-entropy and the spawn key into uint32 # arrays and concatenate them. # We MUST have at least some run-entropy. The others are optional. assert self.entropy is not None run_entropy = _coerce_to_uint32_array(self.entropy) spawn_entropy = _coerce_to_uint32_array(self.spawn_key) if len(spawn_entropy) > 0 and len(run_entropy) < self.pool_size: # Explicitly fill out the entropy with 0s to the pool size to avoid # conflict with spawn keys. We changed this in 1.19.0 to fix # gh-16539. In order to preserve stream-compatibility with # unspawned SeedSequences with small entropy inputs, we only do # this when a spawn_key is specified. diff = self.pool_size - len(run_entropy) run_entropy = np.concatenate( [run_entropy, np.zeros(diff, dtype=np.uint32)]) entropy_array = np.concatenate([run_entropy, spawn_entropy]) return entropy_array @np.errstate(over='ignore') def generate_state(self, n_words, dtype=np.uint32): """ generate_state(n_words, dtype=np.uint32) Return the requested number of words for PRNG seeding. A BitGenerator should call this method in its constructor with an appropriate `n_words` parameter to properly seed itself. Parameters ---------- n_words : int dtype : np.uint32 or np.uint64, optional The size of each word. This should only be either `uint32` or `uint64`. Strings (`'uint32'`, `'uint64'`) are fine. Note that requesting `uint64` will draw twice as many bits as `uint32` for the same `n_words`. This is a convenience for `BitGenerator`s that express their states as `uint64` arrays. Returns ------- state : uint32 or uint64 array, shape=(n_words,) """ cdef uint32_t hash_const = INIT_B cdef uint32_t data_val out_dtype = np.dtype(dtype) if out_dtype == np.dtype(np.uint32): pass elif out_dtype == np.dtype(np.uint64): n_words *= 2 else: raise ValueError("only support uint32 or uint64") state = np.zeros(n_words, dtype=np.uint32) src_cycle = cycle(self.pool) for i_dst in range(n_words): data_val = next(src_cycle) data_val ^= hash_const hash_const *= MULT_B data_val *= hash_const data_val ^= data_val >> XSHIFT state[i_dst] = data_val if out_dtype == np.dtype(np.uint64): # For consistency across different endiannesses, view first as # little-endian then convert the values to the native endianness. state = state.astype('0 if type(self) is BitGenerator: raise NotImplementedError('BitGenerator is a base class and cannot be instantized') self._ctypes = None self._cffi = None cdef const char *name = "BitGenerator" self.capsule = PyCapsule_New(&self._bitgen, name, NULL) if not isinstance(seed, ISeedSequence): seed = SeedSequence(seed) self._seed_seq = seed # Pickling support: def __getstate__(self): return self.state def __setstate__(self, state): self.state = state def __reduce__(self): from ._pickle import __bit_generator_ctor return __bit_generator_ctor, (self.state['bit_generator'],), self.state @property def state(self): """ Get or set the PRNG state The base BitGenerator.state must be overridden by a subclass Returns ------- state : dict Dictionary containing the information required to describe the state of the PRNG """ raise NotImplementedError('Not implemented in base BitGenerator') @state.setter def state(self, value): raise NotImplementedError('Not implemented in base BitGenerator') @property def seed_seq(self): """ Get the seed sequence used to initialize the bit generator. .. versionadded:: 1.25.0 Returns ------- seed_seq : ISeedSequence The SeedSequence object used to initialize the BitGenerator. This is normally a `np.random.SeedSequence` instance. """ return self._seed_seq def spawn(self, int n_children): """ spawn(n_children) Create new independent child bit generators. See :ref:`seedsequence-spawn` for additional notes on spawning children. Some bit generators also implement ``jumped`` as a different approach for creating independent streams. .. versionadded:: 1.25.0 Parameters ---------- n_children : int Returns ------- child_bit_generators : list of BitGenerators Raises ------ TypeError When the underlying SeedSequence does not implement spawning. See Also -------- random.Generator.spawn, random.SeedSequence.spawn : Equivalent method on the generator and seed sequence. """ if not isinstance(self._seed_seq, ISpawnableSeedSequence): raise TypeError( "The underlying SeedSequence does not implement spawning.") return [type(self)(seed=s) for s in self._seed_seq.spawn(n_children)] def random_raw(self, size=None, output=True): """ random_raw(self, size=None) Return randoms as generated by the underlying BitGenerator Parameters ---------- size : int or tuple of ints, optional Output shape. If the given shape is, e.g., ``(m, n, k)``, then ``m * n * k`` samples are drawn. Default is None, in which case a single value is returned. output : bool, optional Output values. Used for performance testing since the generated values are not returned. Returns ------- out : uint or ndarray Drawn samples. Notes ----- This method directly exposes the raw underlying pseudo-random number generator. All values are returned as unsigned 64-bit values irrespective of the number of bits produced by the PRNG. See the class docstring for the number of bits returned. """ return random_raw(&self._bitgen, self.lock, size, output) def _benchmark(self, Py_ssize_t cnt, method='uint64'): """Used in tests""" return benchmark(&self._bitgen, self.lock, cnt, method) @property def ctypes(self): """ ctypes interface Returns ------- interface : namedtuple Named tuple containing ctypes wrapper * state_address - Memory address of the state struct * state - pointer to the state struct * next_uint64 - function pointer to produce 64 bit integers * next_uint32 - function pointer to produce 32 bit integers * next_double - function pointer to produce doubles * bitgen - pointer to the bit generator struct """ if self._ctypes is None: self._ctypes = prepare_ctypes(&self._bitgen) return self._ctypes @property def cffi(self): """ CFFI interface Returns ------- interface : namedtuple Named tuple containing CFFI wrapper * state_address - Memory address of the state struct * state - pointer to the state struct * next_uint64 - function pointer to produce 64 bit integers * next_uint32 - function pointer to produce 32 bit integers * next_double - function pointer to produce doubles * bitgen - pointer to the bit generator struct """ if self._cffi is None: self._cffi = prepare_cffi(&self._bitgen) return self._cffi